From a6aacdb159fc99b066e8a917da562e8e0c9ef3be Mon Sep 17 00:00:00 2001
From: Xuehan <xxman@google.com>
Date: Thu, 3 Jul 2025 18:58:04 +0000
Subject: [PATCH 01/59] ini

Signed-off-by: Jialei Chen <jialeic@google.com>
---
 README_Jialei_runs.md                         |  24 +++
 .../configs/grpo_unique_numbers_gemma1b.yaml  |  43 ++++
 .../configs/grpo_unique_numbers_llama8b.yaml  |  38 ++++
 examples/run_grpo_unique_numbers.py           | 192 ++++++++++++++++++
 image.png                                     | Bin 0 -> 20259 bytes
 .../ray_actor_environment_registry.py         |   1 +
 .../environments/simulated_user/__init__.py   |   0
 .../simulated_user/unique_numbers.py          | 141 +++++++++++++
 nemo_rl/experience/rollouts.py                |  32 ++-
 .../test_unique_numbers_environment.py        |  43 ++++
 10 files changed, 505 insertions(+), 9 deletions(-)
 create mode 100644 README_Jialei_runs.md
 create mode 100644 examples/configs/grpo_unique_numbers_gemma1b.yaml
 create mode 100644 examples/configs/grpo_unique_numbers_llama8b.yaml
 create mode 100644 examples/run_grpo_unique_numbers.py
 create mode 100644 image.png
 create mode 100644 nemo_rl/environments/simulated_user/__init__.py
 create mode 100644 nemo_rl/environments/simulated_user/unique_numbers.py
 create mode 100644 tests/unit/environments/test_unique_numbers_environment.py

diff --git a/README_Jialei_runs.md b/README_Jialei_runs.md
new file mode 100644
index 0000000000..5ec6b4ef34
--- /dev/null
+++ b/README_Jialei_runs.md
@@ -0,0 +1,24 @@
+## What I have run so far
+
+- 07/01: successfully run sliding_puzzle example:
+```
+uv run python examples/run_grpo_sliding_puzzle.py logger.wandb_enabled=True grpo.val_at_start=True policy.train_micro_batch_size=1 cluster.gpus_per_node=8
+```
+
+- 07/02: run with new dummy retreival simulator:
+Task: env generate and hide a list of integers, LLM agent need to guess how many unique integers are in the list.
+Allow LLM to ask for the list of integers by index one by one.
+
+```
+uv run python examples/run_grpo_unique_numbers.py
+```
+or with llama-8b
+```
+uv run python examples/run_grpo_unique_numbers.py --config examples/configs/grpo_unique_numbers_llama8b.yaml
+```
+
+Seems there is some chat render/format issue with gemma-1b, leading to low quality in the beginning. Llama-8b seems to be better?
+
+![alt text](image.png)
+
+see full wandb metrics [here](https://wandb.ai/jialeichen777-google/grpo-simulated-retrieval/reports/Dummy-retrival-task-for-llama-8b-and-gemma-1b--VmlldzoxMzQ0OTgyMw)
\ No newline at end of file
diff --git a/examples/configs/grpo_unique_numbers_gemma1b.yaml b/examples/configs/grpo_unique_numbers_gemma1b.yaml
new file mode 100644
index 0000000000..638647590a
--- /dev/null
+++ b/examples/configs/grpo_unique_numbers_gemma1b.yaml
@@ -0,0 +1,43 @@
+# GRPO configuration for unique numbers environment
+defaults: "grpo_math_1B.yaml"
+
+grpo:
+  num_prompts_per_step: 32
+  num_generations_per_prompt: 16
+  max_rollout_turns: 20
+  max_num_steps: 100
+  val_at_start: true
+
+data:
+  add_system_prompt: false
+
+checkpointing:
+  enabled: false
+  checkpoint_dir: "results/grpo-unique-numbers"
+  metric_name: "val_reward"
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+
+env:
+  unique_numbers:
+    cfg:
+      max_turns: 15
+      min_length: 5
+      max_length: 10
+      max_integer: 15
+
+logger:
+  wandb_enabled: True 
+  wandb:
+    project: "grpo-simulated-retrieval"
+    name: "gemma-1b-__NOW__"
+
+policy: 
+  train_micro_batch_size: 1
+  model_name: google/gemma-3-1b-it
+  tokenizer:
+    name: google/gemma-3-1b-it
+
+cluster:
+  gpus_per_node: 8
\ No newline at end of file
diff --git a/examples/configs/grpo_unique_numbers_llama8b.yaml b/examples/configs/grpo_unique_numbers_llama8b.yaml
new file mode 100644
index 0000000000..8cdd63bcdd
--- /dev/null
+++ b/examples/configs/grpo_unique_numbers_llama8b.yaml
@@ -0,0 +1,38 @@
+# GRPO configuration for unique numbers environment
+defaults: "grpo_math_8B.yaml"
+
+grpo:
+  num_prompts_per_step: 32
+  num_generations_per_prompt: 16
+  max_rollout_turns: 20
+  max_num_steps: 100
+  val_at_start: true
+
+data:
+  add_system_prompt: false
+
+checkpointing:
+  enabled: false
+  checkpoint_dir: "results/grpo-unique-numbers"
+  metric_name: "val_reward"
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+
+env:
+  unique_numbers:
+    cfg:
+      max_turns: 15
+      min_length: 5
+      max_length: 10
+      max_integer: 15
+
+logger:
+  wandb_enabled: True 
+  wandb:
+    project: "grpo-simulated-retrieval"
+    name: "llama-8b-__NOW__"
+
+
+cluster:
+  gpus_per_node: 8
\ No newline at end of file
diff --git a/examples/run_grpo_unique_numbers.py b/examples/run_grpo_unique_numbers.py
new file mode 100644
index 0000000000..892d37142f
--- /dev/null
+++ b/examples/run_grpo_unique_numbers.py
@@ -0,0 +1,192 @@
+import argparse
+import itertools
+import os
+import pprint
+import random
+from datetime import datetime, timedelta
+from typing import Iterator
+
+from omegaconf import OmegaConf
+from torch.utils.data import IterableDataset
+from transformers import AutoTokenizer
+
+from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType
+from nemo_rl.distributed.virtual_cluster import init_ray
+from nemo_rl.environments.simulated_user.unique_numbers import (
+    UniqueNumbersEnv,
+    UniqueNumbersMetadata,
+)
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.utils.config import load_config, parse_hydra_overrides
+from nemo_rl.utils.logger import get_next_experiment_dir
+
+OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
+
+PROMPT = (
+    "I will play a game with you. I have a list of integers in mind and can NOT tell you. "
+    "Your goal is to guess the count of UNIQUE numbers in my list. The only 2 things you can do is the following: "
+    "You can either ask me 'what is number k?' to get the number at position k in my list, "
+    "or answer 'there are m unique numbers' whenever you feel you want to make a guess."
+    "Please do not say anything else. You cannot ask me to provide the list of integers."
+)
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Run GRPO with unique numbers simulator")
+    parser.add_argument("--config", type=str, default=None, help="Path to YAML config file")
+    args, overrides = parser.parse_known_args()
+    return args, overrides
+
+
+def generate_datum(tokenizer: AutoTokenizer, env_cfg: dict, task_name: str, idx: int, add_system_prompt: bool) -> DatumSpec:
+    formatted_prompt = tokenizer.apply_chat_template(
+        [{"role": "user", "content": PROMPT}],
+        tokenize=False,
+        add_system_prompt=add_system_prompt,
+        add_generation_prompt=True,
+        add_special_tokens=False,
+    ).strip()
+    token_ids = tokenizer(formatted_prompt, return_tensors="pt", add_special_tokens=False)["input_ids"][0]
+
+    def _generate_numbers(min_length, max_length, max_integer, default_max_turns) -> UniqueNumbersMetadata:
+        length = random.randint(min_length, max_length)
+        numbers = [random.randint(0, max_integer) for _ in range(length)]
+        return UniqueNumbersMetadata(
+            numbers=numbers,
+            unique_count=len(set(numbers)),
+            turn=0,
+            max_turns=default_max_turns,
+        )
+
+    metadata = _generate_numbers(
+        min_length=env_cfg["cfg"]["min_length"],
+        max_length=env_cfg["cfg"]["max_length"],
+        max_integer=env_cfg["cfg"]["max_integer"],
+        default_max_turns=env_cfg["cfg"]["max_turns"],
+    )
+
+    message_log: LLMMessageLogType = [
+        {"role": "user", "content": formatted_prompt, "token_ids": token_ids}
+    ]
+    return {
+        "message_log": message_log,
+        "length": len(token_ids),
+        "extra_env_info": metadata,
+        "loss_multiplier": 1.0,
+        "idx": idx,
+        "task_name": task_name,
+    }
+
+
+class IterableNumbersDataset(IterableDataset):
+    def __init__(self, tokenizer, env_cfg, task_name, add_system_prompt, length):
+        super().__init__()
+        self.tokenizer = tokenizer
+        self.env_cfg = env_cfg
+        self.task_name = task_name
+        self.add_system_prompt = add_system_prompt
+        self.length = length
+
+    def __iter__(self) -> Iterator[DatumSpec]:
+        for i in itertools.count():
+            yield generate_datum(
+                tokenizer=self.tokenizer,
+                env_cfg=self.env_cfg,
+                task_name=self.task_name,
+                idx=i,
+                add_system_prompt=self.add_system_prompt,
+            )
+
+    def __len__(self):
+        return self.length
+
+
+def setup_data(tokenizer, env_cfg, task_name, length, val_length, add_system_prompt):
+    env_config = env_cfg[task_name]
+    env = UniqueNumbersEnv.options(num_gpus=0).remote(cfg=dict(env_config["cfg"]))
+    task_to_env = {task_name: env}
+
+    train_ds = IterableNumbersDataset(
+        tokenizer=tokenizer,
+        env_cfg=env_config,
+        task_name=task_name,
+        add_system_prompt=add_system_prompt,
+        length=length,
+    )
+    val_ds = IterableNumbersDataset(
+        tokenizer=tokenizer,
+        env_cfg=env_config,
+        task_name=task_name,
+        add_system_prompt=add_system_prompt,
+        length=val_length,
+    )
+    val_task_to_env = task_to_env
+    return train_ds, val_ds, task_to_env, val_task_to_env
+
+
+def main():
+    args, overrides = parse_args()
+    if not args.config:
+        args.config = os.path.join(os.path.dirname(__file__), "configs", "grpo_unique_numbers_gemma1b.yaml")
+    config = load_config(args.config)
+    if overrides:
+        config = parse_hydra_overrides(config, overrides)
+    config: MasterConfig = OmegaConf.to_container(config, resolve=True)
+
+    now_pst = datetime.utcnow() + timedelta(hours=-7)
+    config["logger"]["wandb"]["name"] = config["logger"]["wandb"]["name"].replace("__NOW__", now_pst.strftime("%m/%d-%H:%M"))
+
+    config["logger"]["log_dir"] = get_next_experiment_dir(config["logger"]["log_dir"])
+    if config["checkpointing"]["enabled"]:
+        print(f"\U0001F4CA Using checkpoint directory: {config['checkpointing']['checkpoint_dir']}")
+
+    pprint.pprint(config)
+
+    init_ray()
+
+    tokenizer = get_tokenizer(config["policy"]["tokenizer"])
+    config["policy"]["generation"] = configure_generation_config(config["policy"]["generation"], tokenizer)
+
+    ds_length = config["grpo"]["num_prompts_per_step"] * config["grpo"]["num_generations_per_prompt"] * config["grpo"]["max_num_steps"]
+    dataset, val_dataset, task_to_env, val_task_to_env = setup_data(
+        tokenizer=tokenizer,
+        env_cfg=config["env"],
+        task_name="unique_numbers",
+        length=ds_length,
+        val_length=config["grpo"]["max_val_samples"],
+        add_system_prompt=config["data"]["add_system_prompt"],
+    )
+
+    (
+        policy,
+        policy_generation,
+        cluster,
+        dataloader,
+        val_dataloader,
+        loss_fn,
+        logger,
+        checkpointer,
+        grpo_state,
+        master_config,
+    ) = setup(config, tokenizer, dataset, val_dataset)
+
+    grpo_train(
+        policy,
+        policy_generation,
+        dataloader,
+        val_dataloader,
+        tokenizer,
+        loss_fn,
+        task_to_env,
+        val_task_to_env,
+        logger,
+        checkpointer,
+        grpo_state,
+        master_config,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/image.png b/image.png
new file mode 100644
index 0000000000000000000000000000000000000000..8c71e34373da9c530f3a34138c02f5b68157f18b
GIT binary patch
literal 20259
zcmeFZg<n)%*Edd#Qqn0M(j`cDiqc58bm!0w(nu*K9nvM;AtBw}-6ag2zk_()_x1js
z_xTTAK4+Mjb9S!1_FCT+dqU)8C6E#E5uu=<kl()(`v3(6eG0r7;GY9`*c*hqfj4OI
z2MH0Vl0m{9;ETSI+IwRe87Mm7GdvV@kQo%{u?X<O2VPK6pf7<?aKJkT@RCo5hJpp&
zp&y^8!~9bc`ZOK(Kc5+Z=TO3mqVL}W?}`TYMn=|PQyYiklGI3`;-s09nuD5*G>?Ie
z6_cK!jlL0+i<RwT6(~Lz9^j*uk%Jzoi<PA{n8$^m{7(rU;Pc~cW^&R$MI0>n$<<`!
zNkwh!jYv6|UNgNW7eFK>CFQd>H0Jpr_V#ae;2S@=se^+p4>Pl~von+P8zvij6J{1}
zZf@q+tjw&ej6exSu&cF$o(rQjnBu9Cf7%f<0vp(y**cinSd%`stEX?{=)g}-{@Brf
z{+`BZ<YM;Uo~*%trv*%q`SA%e3)5@n|FjKM<$Ju#BX8znWT`G@W(AN3=tJPm>(_jL
z%Kwii|LyT#HP!xGll}j!`L8Gc|C-8RBYROBE1*jUf&Z?|-){f?@NY#v=Es@;ixN*{
z{&N?gvj8F=^MBS%0MYUF3v(zaA*lCa!b&dC2Pp_HN)t0(OZ)C3I55zCOwjK^N;s<W
zKc8vYLYMhjRHaRnVBJ(zkxb5N{7@;Zo}H|(q6;OcO(*|=Z5T#|heLrz%0T8D73KM*
zf1UT}<>2~Ye@ajP`k-OlgKKI}(}m?l@>1Qm2hSz&VG7OHp7Hnaq);$V7nuwx&v(tp
zU!S1heg0hUV9G*89Ua*xCMOF-Erp>#LVqs5C?R8Gs*TM}o+^e=?L&r5!sjT2?-Udi
zWWS+7|80wO_y)wo%eyn2AzDrrk;$ZeX!%XF-`zWX7zPar`th=r@Cmn`wBV1dw?ZL`
ztK;>52m9wfl<zX>vSg{1A2H#882wX^z<&=iDB+6${NAUL`3Oa!`=rCo_@s0H9sV*B
zG<-etxcu{f2KgBZ#NYiKGywh7Dzq-V(6DDlAnV`qXoeutZD9^lQ<T4xy%d4+J49lS
z`CHzXlmbc^wH#U){C5bgxG-oAx_KEd{+0*5gobyELWWO-{X6AmG@tYv5oDvk<w?7N
z4nJ864HN#I@b3U9&;qVG%|GRZ-$MBj8^Zc+{+*H!GcclNdC`}D%fqw6pq-Kups~UI
zoznj!!GE|w?3J&PO|!VFHke;oS+(!J^!7JJqW2z=!+JNmHgPGVyXa<Vi9kWpqPC&)
znX~Gm^7UPqmlR3S?XWWID!|AhF#van!J!PG@cHKCP|<E0SE#4XTb4!5%nPTaTw9r$
zo5x)-;#hdyX-!URH#m<iT-*moOpH&LJF2!YX5sSApEFO{q?Z}UU1VaF`$56`!}w0i
zo4%Tbhxz$#IrL>brngo=9*rvP;PDMTiu`dL2J1KJ>W;(R8lzKvldeIVJh`{ekqKY7
zhJENpgds&k1bxg1YNN>r4@95VRS*W-)PGa)IxQ<6j8yd`Uz_G(S}YInx`y0rm(!Qz
zB-uO=H{A^)Y6^6vIIA?$_tD0?X=)NMUX7)hU2mukBXbVq9?Ut<V%sTmHcUR8tShn}
zMS9iy9NrzvZ8RI)5fKrcUU&=8FI>hCG=>tpxv;Rau}u%M9L3F<b-Evok=)mcRX^ai
z)Tf;vl>W$u4CV|ZnYH;>7|#C`4cd10zNF9w-yxR|dh^{d>ofZ!x?db*r7@_z!_{@;
z)^=_WKV9i6H2mP{$<N<-iJoXT!BBtOTPQyFK(}@k=s$zwsmu_$Cz~AARoZf#!?$@q
z56Ma!j{8!~w)M;PWWKr5x9_*>;?2kGqWFj3c#B*|IJ~GqjOe_iXk@_tKxZA^&WUYZ
z7XFmi)RZE?Ln}*o6OH!i@{+5!XE4WV&RO1isrj^m<RMCyNFe$CvO8^i`KM_8<?ilo
zuz0Cx?E*L7+3B0p&AFrDx~*gxu*p!I*{om5E-{Y-d^p<Hs_o9{j>8P)*dv|1Z*By)
z1LE&Rzf#Gj7nU^osd$zrx(H}JvqnO)%1wYxaGZ!ey~4FHn|aTo?8;%qlo?`@LW>$n
zRG5KphY9Yosu@n6nHm8zH;Scs)W6Lk64a4edidFNdQhV60BKAzz3#+SY?;?mzwm6i
zW%net7GX-}2{^1hQ|ay7WA0<)<m?lG#KA84*e01<SZuId<Qncxowgo%$tmV)3B6%%
ziSaONr-g<4AR^+oFo3gUX-gPM1_Lif2VFZ);kZswyA&_ol5h_>DeP#o`ank28^
zbFYyiXV4*ZSs@q-ufVRgzH_VBJ3nJxoFEBHkn=XVYH-*MMn7$}@nWo)K!i33(|G3W
zi2L~^#a6Y+V6F5N(cl@g<~db3e^Zp#U7v0@ou-CHaIr4VK!#{wcWsz|cfqNnZb>eq
zW=+8u=GIb~0ksSl?e<y+-}PFg0D1APcEg3dlS-Ur`^2j8%EHA`E++M^Sx<XQE-h}d
zO<|V9oRgW}((5fZyD<obW@9cd!^skEk%^Yqx8mR$t7X5b{(8y9#vfIwjA9<(5mkqa
zi=)bk5F)f-F+f(nBRqpYf>4g-zPE5_|0RRP2uji37-On`_hF{mlb8Qwd(Gh;eCPH^
zmf?Ixdr$+Vil!#@R*Sl=_A}kR&FcsyWrd{Q44=6V*fh@!eg%7yZDZ@-Ya_Td{`jEW
z-P`;9q{)3EBRPwN!xAq$H@A<gI{AT=;W|8tV=V91L;WE`bh4RF^-kR+RgZp%_vYd#
zz6*1(Qsc@ggqZj<0q11gSUiXAic}|FdEvvFa-rGuFxe=<>pT5Dyji*~6nHVb$5pdz
z9nt=>+7)-oQ#PAatX3T{W7S`>o3?Zz)!W<u9d*$&DDL(cm3Xapa4_@!?izh)WG~=w
zHzFe9ODBVIn2ud<Pfs)_$;O5@jJA#r6Xe8|AAz9r_Ue1c<P~_ZWqqV;JE@%24N|4L
zO(G{~wDoeEey~e#i_^%y@ctBg44u%)3lbh$ZiR`7x#1;hkTqYw8&>QyeVmP~B|u9X
z!qjAlUwn0BQmHpah0p24kY8=K*ChZih5=~8SK%Dj<`k{^tvJ{51RSH3V@z?!x9?Qp
z1iw&Kp4o1oyD9XmT8O@N3*_UoT~6nit@b{j`D(g1m3`VfI%Rs*a5SQM#88v%s9J-O
z6-(kNh7E@@-J73sS<6*_nTeF2SDT>YWkh={Qt!O<AXI-T@fokAyl%mI@ix}B#u<Uf
zoytWcqd7xivgYXf*+8sx!=;ET>`#^7xE50*1ruqbCTZSe+V>u?$8l;5-FG(yE=Wz+
zvwJ6vxd@SOrKMjqUEE7v>o4%IsNP4-o!e$ITgk`jfG3pgBOb=sAFy#69A7BoSCgWJ
z03s-wYj^vN3@-T~Qd<OxAd|m+!7B^`0;!q>`!`nTg;h4m&TokLo%#pOQ(mt~o6!oM
zY;`#xcsU;;k4l=Ldz_TYjk#{6TJ=q7FYsLq=zQ|#e;_|LcTBZ#dU3&cNt?p&iot(X
zle^JY-gtkvvl2ucL%y3Zb)2C3o1~<u(*6GS7s2Mc(hldXB@vsjtA}HJ)yr;0xomNL
zMyQ5s@B1*_QJYSYYW_CfR+Y=uAd)YU&5T~_3fDhTy&1$)=8}?=r6J9{@lN~m?_2H%
zU>y!;Oy-WkmY5_rqtBJ)yzbv|E4L2a=sK(@VTUuqi!u1p9M4;re6wx9#;H%e2~*?Q
zb`ir^?ld5rF5bgat92>EYnav!$dZWs5U2L>*JOc#ttw~3hAWFsDB7>eO17@d>a}0L
zsLDRHgtK;z`{epEy6JqoUtjaCrf&;7kPYq{X1zHr3sZ$FkeMer$1oL@8L+D>=VNCV
zCtxJ;3=4~N?zM@34_>w)nBMytS%6H$<B#)@;cnxz88iLNH;P(bLqqaP{~(N`!yNSo
zDhz2l^Li+f=ImidGKBV`jxK9qci~muDa|nzzT9SeF^oBLW&w)oT(~}^py1&qo{K^{
zJ<C?(;p}>-sAQ<K6(k#(#Pc>pD`co}p2TCBGh(5Q1zzkGz}{jgygWo^py{fVsilfe
za7Jt3);9O8*>U^m0WqT0TLR(MC4e>VsXku|lfuKVNAj`5Yv^fV5?>#XuhEIhkfKq6
ze$X&UKgU>jLchV5fWYb|_C6*41MWzr0kGI^NIHW22mJYVQ$Yzw?jK7WJ%R2ve*muh
zkIE0fcRh0hoF2}oy$%#}FZ0KHV~4KJ2s5-?*9!E_uN#$|gd3LjJj;XQ&l%i$N(Z+0
zUyp@kKb-Ax%X7omQuuUD&W{Ld3}bLLn^6+o-7@wHY)STVZ_&*R${tTv80@JG2fGs=
zRWGCv9Vp%Jm*$n-T1fJqy})<7IijJXiaQr*3C4H!G*b2IX!6eZ4n291J!PXSrxn*8
zVzrmII?;K1j*ZWEG!-LYPOdJ^s8r^_l7e=1DZaB*r$=U#!oE-<;5b~{;9oG+_?G2;
zetBc)RH14%w{@h)Eyl#rv=D{&3Fo(QkMz&y_j^-@)>b){w-u(_#VxT1V1}(ygRvSo
zS{};i4@@+#4w_pAb5(8Daw(NA+Pd_U`(s!xL?m0x%RM@+)+BTlT9y`zR6A%(4rAr=
zSMFr6zQ(@2BTkTGnb~pJm+Q+r+bbdIS`n#3T@*PMEgHJ0@u&*pnJX^cx0=y<%kJ%|
zde0;@7+IRpVK=$A)v=cr=EnIYJ0oD!SoV8AZ@t%BmAT+atDC%)1!Sz0WTBbNM#Pd`
z<(=7s5Ti6jsblNihu;UYruNu2@d$8a?*YvJr9JR%QUvk$^J{$b(!z6uUn>{)vLgsR
zY)(BAm0Kh&=@cYQLauL`B|dH5Co@n#YpBbVhCe5vrM1aBAnXbg$Z71ep_Xb0aS}Yk
zq;uIO(l7M9vDq4aDOZwp9=H~hz?mF4f9tKJt0Knx@H&mE`G@H3VLim+qYQgc&*-Fx
zRiZ#`LA(tIp{T`%&d4EdNJsc*MEA6YwC98ML9B>Fx#fZj@2ffsX(9b)blm$jDON-d
zrx~iI!*gLYbR2<n#)4wBiios46IblbB_Au}4zmMelXhZoaX*ahGAi8#snQ;e94SsN
zsLp~KtS<D(rs-+C<V%VZbB3C81i-l;Of-3w@0O$-dLc+1ReBks&it~U;=>5&(HW2j
z*nS@g;&6H$DB0Z|(YT(NO5(klRtcKj$t$6dvLJ9I<ju&`n(VuCcyWL|@uBv;3svP$
z<wg$FbJ2JoNR8O`Ic28i;tbjSRiuhTA8GXw>9II&X;Nf>*~J)A-o`T1u4z%$^tiyd
z^R8uY!#)R3<LXPE^9@A3EA|mmJxAj-&u8rLKMQ7lr=m@26fE6S8X-!txsSe>eduj3
zzh7~_kNyyd?Uo!l)N-_h8giQy0QFY!LCQL#S+Qiw%3`sfw?|2FCz+~f!;_hx#(!UN
zWJrzY3@);VgUh&RhSV#6Kgc_J<Su7H>_*2$2+>XY1AcyKm#7WD@I2fIp5}2}uRYnc
zXtsRLM%sG>ZdfCYlXL~87BGboRx<r8zwWPW6cVYC<5g<$E9wCUn2Jyw1BJ7l!s>C%
zY>NP+11;vMs*9_Kuc`hCV#nsn3R{OJIHR;v_rWO5BOB2_f}I<wH<7<C29Bezt;<9f
zpIAFF3=|vY*EDx68mSz=DJ}EtbYIHS+8mbI&MU|=@b>yNi%OWC(9hO;o%St8jmA2r
zW1xG5<USUoD>4el%yQ|)?oA)T&h(m1Pow%$+IzEjxu`ZZ$h78oxnpD_2dlBJkeRBq
zJrAzt#>mSatglocEiF;IPuE|v%O>cyGY7iFhz2Zb&=JrLE%^1uos)M>vB*=sCT30o
zNgc0WIym&~eUP4kq>CJYNfZuCt+BSYVMdx;G?JO)6Pu^shUB7VP2W#m+);nVdsnRK
zq&VvDkt9z1m1faVP;LH&1GcJipVhAT8Vwy&;O^4ZD9%!2S-(*eyBt&QxKxMZSMSh9
zjsousRwIPHhf6`#*<J{H7Qs?1tUH-3utQg2^AeXT50MMJk7ymY?UTGZn__r28tQkt
zCYwYz<EVJ<B1z=6d%W|_&*(q&s}t@yTc@`zX+FE#yJ5-_Sj&;TU~Q=F;`{M=wdKP7
zdN;L4<lU-abMHl3tO&a_JWoRge@~Z+>G}H0Hkn%=QwdlrZ5P^&Wbg12$HF;i^q#}t
zF&WR;4DauhD(ol^?r%i!8!kn2_j3$_XfjIA`uJF!(T>YF*kZ$6Ka#=se3F>UOu9Jw
z9wR(yVC6cPUi_(ezd-pRXO_`IjMs~zb3)}kCvVBF%uCil9(?nFAK#3S_s!+dInj4=
zWvUCKZK`|kPx<j(&QLpIS~c(;)3zXYXEr62iZj9u*QY4uhs}Pg&o{Rz>pq9c%Eai$
zy*|M*M_*OBDMh6{I&Ro#l7;-Tp)oke>cvnwTOqj%k0=+voqmwN$uLklOTx`gm*^Bz
z+^sy0fk5_pM#m-LHt&5$jf0)X!dOP1J1OK2B){6zV~N0Kg4Fr~YoA`R!0;?mdpX>q
z!=dZbL&j0#iIkOH-I9X3RF8;<5!zWrL(`fXe<yNxoI;Kb3OwLjx>PLY1QX6wOKHhb
z6n7CxM1(bJ3oc^P4=yq7Dhc=nqK4&N^&Ma_tG!Czv`p#yx_xV>T6!f$;g~pUn2*`l
zobF<W8SV1Ch3JRWuy*HOGNJ;RB)|iiuv^pn3H|q2NegGs))Go0xYG(-OE&$RS5b8R
z{pXLSiyGFF1T$*|?$$k+bH%B%<@;9n8|gtuY;(8vp=d2d2=VWrWicT=>;#MPu8loi
zZGhC93fYS1#ka_=X(ew&{qRv-{F{kv?qxp{XYK)r+W9gk{pf0{m!jII<*tvvjeqei
zb@P4lta+g!pXNHlZEyI1=DDpYY)wXK?YH5;HQnM&=@CJcWI;NP!8l9RpdbD`0fk=~
zcdl(X6qk@nvVJvX6QBK5ZrW#jc^0;{aiw-TQ^4%;+iKUeq^8Q(*+n4b<%HFp)?k+A
zth9c_;u&*|8B*7}p?v=c6K*Mo%t6K0D;`>5BDY4PJq{|0*j=6b7-?SmJV%SVVg~mQ
zA97NeB4^4eS8^g`<#PPl?2r)^OgP<?Jc_VdudxZbD-B4#?Iq<E-v&GQCOIMcVv}9@
z!Gyk-%iyq6{9>=;n+zK{fZ66|FX^W}X7+}vx6u+>^={Wy-5K&L#ZbU}Bf;ySPT#Wg
zYPG%@KU+$3{zaaAi+<CYv50suBQr?)oBjdp5*JtO)IDK|De4=`C0+4N4YH25!eNFY
zHxz_i<k2&H+r$Pp@KH-h1zMQb9q-<D%oa|RdjDAAGM!Gg0Rb_nzGU|Iwp_<K`Oaxr
zupmD|V{OrTcvvTHP)H(ubB3bvgObYBj2?!rJOWm{b(Q?{fh%sxa{^0D8b{gcLFHiM
z=t>7qgaVhv`raw8RXnSQ1ucA+i3NS88-X$|tFX&*%r*iddaWrH^pMe7t}4gf1ISgD
ze?zi(aHs0TPuI2U^<ILw%YH9?;(MCTkYbJ>5}6N1HRf6CN>D>`;AMj3P1vQ`(K#`q
z9fQH>u6<(?+s+iR)ry0M5^SXPW~?#XZ`F{7y-y8p6Fm{dnoLc^b2!@6n>TZLRpx@8
z6_+ab_I<EQs@Ko31uC_Si#;CF9G1@{%%)RN3p1^2_GdRE1x2JrY8>{NZ4?JlO?trl
zxR3!i-_EaFDMn~2Kd=<-J{bxu3I!c{`LWu3W7;a$l)89N_DZi#f>N#J*Jb<)zQDAY
zU0ZAli*v!P64NDb84~|S=CM=m8Ua&v`HayZCsqN<AQru~C69T%b<E{F3-?a-NwI#0
z`mbc#YJ){HbWWOYHR?9!`Zqa>;43}Ndi8N^@Z`HWmL;X%TbE-;J<7)~!Aw6=()iI!
zM4Zp;>z<eWF0H6aLGe0bajh8h`i-Z2zi!5zoq;=~gW>-ha1f)WmF?TfP16mgI=n<i
z^TUsiGpZ08>q@3ChUnD@zFPY(zsqp4YX*ydVVV8?UNl?W!rBZAso&fvmR3}j;@$<L
z)@sxE_*rr?6+ehCvVRhsR(7u2%4cMIT4AIQJ=4@d9Nb@~*ie8AGiq;I(`cp;-FV5w
z#NV}fKSaY7MAxAKt&@P867^~d;RSA6&mQ7x#kG_iTq(kGr~AmNarf9G@bVmMyiVEL
zo=F)bLD_elK3>++YibeVYgwXgL?4YD(5D&f3p<2)siw4X)%m{aMn->;N4vJ|E7c|@
zGYy2J(t1hmcxi_|AWHi%4pXKa(P;(Jh2wuVM~jo3->PqA#_h^5$`H<IFVM8}RbLV-
zY~(VK?8<VFEtKx7$GO>PuTi{bl3LXec%}LZ%z|Qd6Cs~$8ivR)U8bJTap66xex(HY
zo{)+$=^T?lZ(<|1q&rAC*rgNj9YMv;M$jc@HE82~ynr001FqIY--e5_$L;X_NnntT
z8<zvTO`o0}=6A6<<;MQcf_lP$48>_Tm8PB^(7Br-7_W3r(t=K9*bIZ~Z~SE6*~aD-
zvsXwW9RnE~Ies70vP|+TSxFHo47-$W>G&pz=53KsWZw@r5S#kC^<Bi7y|3EbF&*vX
z?e^3T^;XXstNr#I^&vKn<r};DLZ?*w0sb}uTM8m(4az-2Mv%fY9kgzhYWp5A-_}?O
zj){%<;%DCADa?)>qtH&Xi{2#o)avQ0F3s=ai*AOcYM7)rjv-xLgs`RbG))h5b`ZTL
zyP0}h9^?j#5Ep;zM(d`1<E``5vwmaO_GL8^66%mn+s4;rr}u>ca2cgqxKR&dCa2}s
zj7)l`&i)CExdM>fXeU7#hUtR(hiB$GEuZwk0X1!5%3Jf(hISOTT19k%9z%+4+C<DG
z)MI-f<`~|SmWRw<l811H0V>zj%D%xE^(*ws0^Jr;mB=LjushJ1E_ovj51DxFA%P%M
zChQV+DzaXW1&QOEDNZB{9$75{?Kp=qlk6A|uZ-fHgnp&oTDt-Uk}>K4R+|>3>MYUu
zU_x&vN_GESzpVy?>+g~oHQ0Q0W3(zYB#mcs`o%&>-}VyM-F%xbyyG#D0=FmoIt#q6
z2#xNRC0kt82<L*=mS3S6v3}#}>+Q|^@ng^L#b8dR@;y~6{ft3?yir7pe7!yW(oN8h
zj~J%EWhMhe8ARf$RcJrnTZU=qEqw+5*yz`JzN6x(h%RVM1*z+4e{U4INSSKmEj<`!
zQIXgg{^>_RcC2-n3B9b^=|1H$0j92>PSdTwuhz<<OaR|oW0so1F~lbHX;vdLgY#KW
zr|b%~nIdfxd=s2FSLwjKhNRXZ*NR-9tyQ8=)iM!gvj(muUkf*?jw5a)4sWP81Z*oj
zjJ1-C&CvH4{M-GC9jun!>Z};Hn?X%(8v|i#Xu)XQAW11HT0z03lJ+0N2exA8&_kWS
z;C=frh8Q99;JxTO{n)E24w`+9&Bzv5W~e<gdC1+u#9BxKB$MY{ayuly<YSnPU24+N
z`{6>>qg%JFk4JtUTC&V6P<ECX1?dYWg&dNQ?fBlcfk{l3t?NwnU~Q09l9plgNNTba
z)3j8uGX}WChabW1=U2D0-h8emH&;hN82DC^F82S#=iUkpi5w-uBbE)O*Zn7Irw#ZW
zv>ATD{S(BK0`^_GXi(taD4q~5;Jv^_+V_15>IrpY_@oEz4`;0WZD3d&%5Sj~#{HkL
zSUJ!f8LxvW*WWOg0}y9(E^8(J`_-QTXs#QD2p{Edf7&3C^NG^f*oksnSJx&;^DdMB
z^iv@3^%tSJMT+rLGlb(f*oQBKaE=V9Y~D(4^F1EP9d>j&U)hVCMOB8|-IRv8AH5#I
zuVEyMhywUFGVNsBicSiwZr-nKmD8zte^ez>f3~Rbo0~mZjKpt|eerJx@UNk9j3m&9
z8rN8BL<+^!tca3|{B^9jQ^eB*>15@8bEjo(H}Jr1O+1$h3(*L34%6KT$>}(5r=49<
zJ+E}Vt3nmNz*Qed`ZJSssn*zFu(8}TrJB{R(FG_~6McpL+E(uR8(rsp-wh(0g;NfQ
z#L8I48;=QkukWSfTWWKsh$&V{x8KT(5j|1Q8i59VOIlJ)-S$jt*P5P{Mj@nWmPe&<
zHm^E)F!N}zc*r%g!)C$R$w6Lo23nGkGdT+e=LPmDyT#<_=0ImmVxlBub0FvRlr5gf
zbt9IOmXk9mM<!{N6&2$ts3;QegCT08z58?6fb!J5aEj`X#Cw)L;)~gkUSWcBc^;;t
zMn@VHTsz3~Xc3W)ZdL>dXI_+&a?W&Tv}UKI!FuvwaC1P@bgG0Oi(U)!G4k6ak*>f%
zKB_@z$!NFC%;KzT*fgb|%#8*AqdOoo*u78XE=?^_MkU@|6HZ)QLP1H3fijt+dv4b)
zdFiz<Gc9d)&sS=Bz=0Z0qWi8djxpDXB@K<5CkGiB83+Rf74Ebl4dqfu6jZ*ShJ!|f
zM}sbog2aE(fd7tzY=-+ncpa9FhRh3}AAQTButsawx8o(?K;cFz;jN=#l4J-aK%Wg;
zHMiKK5%l&q#A8&sDr;zJZuTv&cymzOe1C0Uf6^<JpVe-Odvxey1EUpCU+<Rt{rh*Z
zTrM#nrixbOr6XCK7AuKW_6baDCrQ?ca%duo>3nDw!w|u9t{M_~rj6R`r%FDwp&z8y
z*X=3eswqAl0uQsxvok&A*u<ryy6?jAZMH2<LYl2p8jm(yZL)O{{(KXRs?iEFFj<G#
zXu~lVc0O4=Czfi8&sid0yVa12t7N2Qm6fU09ugXAPnUzc6@!3IiY5%Y466}`=k`7>
z$D`(kucHX9C2KRsB|&+wC>yWIG~~PYf%6%w$n6TxL^$+RINBnpatiYzrc3Z3&aB#k
zw9%cUvFVNU@Eg|AcNXs<UT$s$+C!Yy3^|F3Yn6sQU&qJC84hQq?V%SwzZ}~?;U@l`
zQ$FyKWW4UO@o|x_L6z3>PTo!z9;IH4lhc<wCexJQh1-?ZlIzK#=C~VkB-N%4bPn&z
zDGX;xZ8>v6OCJ0-A{OakiO9j+vpvq9D~M<i9qpS6R7xtU4^mR}%F4=PQDmR?aeB%E
zbaa{{>oFl{#$h?1gi6tey{-zFbr$ho)(Uj-RFNNT7i=|}m*|uhMO%q#|3Vwqktlde
z-0&^|SM?4#yI9XpNB(taXjskYYNuS3IKxz$pm)T&yhi;el@cwPFdW7gluV}d@cyr$
z4{%?5AD?%SUp(_s$y1aoNl+y(^$b)U=mL9Dp3t>0Z;8KLga7SfXRDQsg3cLH)?yxt
zVSO}MUd~F#z-GKySXZa6u{kRa-AGDKMz%OCu-v8oqBEpdB-tk`3hpMJxwL#MU1h@J
zm_B-Hk<fIk?1%(r)*^VQ_4d;p&k1VCa9&kqr4obKP>n^A9!`A$JNJxh^PDM*>Q);L
zh&;t-E&jDPbdX>6-t^l;IBzZ*DIN7=!mhhjsOWgS7gt#>4dor**hNgyUuzAE7T%aL
z!uwPC%46HBR-XB5(4Ry=jtht>d3vpej_G8rlo#L4(v@urKm?Xv<)_Oi+Vvw&)@GvD
zYr?y2M-XWcXMoHf&N2oPk+H+(<1_T56*7w$+={!wDUx|H%i!e6@+|{piQ3?1l_-96
z3*%N{yJg0TVPZ}2vP~W)ApEc4Gw;wvTaOQ{qfHgc;Aw#{lnbSkd7rDu`_rm}bMc}o
zMa<AhUL88r>Xk<jN4yrS<Ie&OU$^zCutUm9JJzslJk*3#OxIl?xT-K(*h3ERUAv2*
z8VR>Xf=JizKlS+?$&E}^YtgUIx*S{)F<Mw9R=+qGZY2UTG<bR5!TRcuw!);V5JPa?
zU)4{#LpO}ke)Dv0>ov&6wlvzYp#P+1VfJw;?a`n9J*-~16$vIngm^o?&1FQ%kpF!v
z<_H`E8ax)Dhtl7z@sTe3LHW-=!xIDaoeQAL4zZaGpF^*~3w4JZgd_rDv!iqo8V24+
z=p%wqH*6n)39Rosz*2~eiGC~Sp5mj@UB}X>CfW2ldx{|FIUqIs`5qM<Dj5&jJ}{Y|
z0wSg)@cwYXJ(tfEZs`8NJz*{yQw;b=in!sSGS%a_z%Z;5j)T`5hax$SDNO(5(=5vY
z$s*pP;tS})Lh7sY>Xh3ItGfZO7n+h^k&|Y81@@#DHIUE3tDj6t2q+Czm_g5Rja9T`
zTSCWE1dv~nPljK!@qor`V_m`rMMbm4#+jJZRO6|e>S`yqqo&C1Ih%{_%WrR(Guqll
zoLp_8fUE-{gqkdRZHo{%gN6|*G$`M0vX2SrM+=(5;^La7KmFYEz8exSYF~3KI8F^X
zy-G?<qye-`Nu4oOZ2VDfZkks_9D}P*W>(h0$qE$6&)28Ct&UN0SiO~O_2+9IQ&@N(
z9}4JpP}BGCF;sG?LM$vSY+fE@jGEQSvva&OOeK3*-@bWnH$tfTI9}j8guV;Q8XT1E
z;#k20xrO^+*ikOeqv6<A<%M6MKdLfPJOqb)D^yKYO-+fbD+)tKOFO`^^H;W;8%OKb
zSi@Q^>AbBl9*E+Ea(d~2G9S|1sc%Pzj*Y#66$FnqOp)&2;5wj^ggtnrR{NEikiKTt
z{;)*cN?;#g1TkXY2m?4;$!%TB^H!amMm@j95xr_mlj`vS;|keao*w<3@BaRv9j7#|
z;}^TKK7|DlJL3l71tleRl2S4<cA=smcp8|^P;+zL25sU*`iNeu{`BUKv|7loV5^eP
z7P#JXNgC?{_m7!7KD;mkrMHRwaMdBW1=R8%?qBV`x!^o3zo8s>Ej-lyE-NtbttN%5
zAV2?vQ<2m3fY*rlR^O08B)@2bQ|wrGbh~o-VYT-|v+OYdqF7~_2Wa1rlc)O7tGZqH
zY_+dQMqWyRpoCgYU<SlVqFf(T$}6irIB3CDkxUC0&$Xm?ux#CE<uX4*#ylO#f|%W1
z%h=!F4rfTNUY&Gwh?$$2Wfv5f<Fgt@Rd(N;zRxWxlGe-C+`c^42n}a1LTElEF=~tP
zvCp<p%?7Kg*G@oI^ky9$kbo6}0!h1v=35QnnA0zbG!6;NoNGuA7+TvNsGqAsUMP<F
z`2lF4F&2l}OQe2$YbcFkPV2v7JBI1fB-PdT!-p?B+avgZZuAL6CfF}+&RB$}XeA#^
zJO8SAD1gKz#@EIKa=-<>ph^cd*iC@TjUD^p5ZZ^*YkjZAy&MD`5s^~F`RT~Mt-@h<
zH~QXU{<T3mAj<)P$u&<hU1CH!hl9n0n0aqYT)a_@iIz5cb*<R0Ia#f49&%CyNogMU
zWzCQisvE7N?xh2^nq6Zg6pSxbtM28kcqCdP$mN!>*k%Hgc<NGbjn?bZrGdqo_R1e$
z&{Ls5qf247v*v1HK}SP_4kvS~(731gN&IuGAIw%u_H+B;wi+3JpBw!F3$utJPhJ8_
z?yIncWEr8^+L)2ID1g*J(|%(bHd-|pP>ar3&=I2B+A!vgTIx%DUio7-@!<~6U*p)U
z@sD#kKdtyJCZ+tWYmfKI3I^JMgRHFZZe|R>kwD*K_Mn8aT(Mi`o(xoeD0ec6b!>N?
z?p=e5u3<$O*Cl@AJn1}p$}8d|O}gz{A8JdS2EUEqy8jU=Tc*7<{tAQ@@PaIzdfl?q
zjFIa=AWaDY;Yf7-o8JTMo2_BRy}1WQ_RSZ8Lm32PMTZtY3EeI}i?G9mae0rgauibz
zKBTh`&09>d7gx0-{BYU-z1&I)lNwxGlWULVN-0!8d{7m>r=eiiq|8@EKcsBk$*RTm
z+z>_kXFCq4kuu#RoOdvco|CGfQ|U*A6W7UiO~vd&Z$n2yribAI!RhNrpVVN;*}7@?
z%`8kwm|#T4ia}x6;^Ayy!y?Mp`l}GEy}1$y6Uwr^GxYNjUkYFrC7ZuonZ9)L9eiOD
zcDBr~mr^};(hVg<_^E38V}dLl8?nmfd+=WJD6Qntb%bHG>Ka-Jcu5x5ClVdaf#hd~
z+7SG9NNpDTq095LfSe1X+=0E`T?JRt%TQ3Y28@TcscJsNG3HKCZbxHy*4}q^s^PF|
zs(h`D*E;^h@YyVVHfTCJ{NzWzOY_VmQb{jZ=j`F2rx)B*-_DH!-d^FW7oO(iCv!Fm
z=jC|#-3G3&8YJp-aW86qSE<W)o1L|Kw!6lRSEc`L4EAm#e~PNZ^{r3WghEH%cLgTL
zAq_oCro*rGURLC#+ySZGd)i9{C^8Cl33BfJH*Q8mkXoM22co9*JMUG8`?S?%x)$lr
zKLis*BY!}n#vPUiLnIoyi+L^P%MN;ZoIi73t9k0>6&u{`_DS6sZ4G7*o#io8!#7b3
zEOM(DY}2e~Fsy2P@fgV8Q&m5kH*p-8wU#H!Nv(9OomD+EADck-<B&!8!692(#6fh!
zPYo6CKJ+s4IWFk5$j&BUzRdjW*#;g-p21o6_v(l(W&Mo1lS=jXC}D}Dzavg>-TMvJ
zXjXr&f4DF?x^i=DG>+}w3t_J5WN8tc3#tzXR4`{Ch4~2DFs8lS!fvAZzSD$0S3@@`
zpS-T0d0}E+GvC#;@rVV(E29meb!--cYB%!1qMvmZOtW~Sd6amMxs`fYZRyBv<co?H
zT6zkH=Zk<(*4<;KytM(-RaJ#AjXvh)0I4uFP7i%+*)%*TOsXl@LtelG0SL4^Tf&aM
z;U_!FXFr_&EnhZBG`Fx&rm`MuT9|~Jgw0H+ppQzw$gjX}!S_LH_ppbQ{R7Y+h0%B9
zeQID^xRrMgjBV2RLfE|XwiMe+t}`4b==D^GMj*((8n#<%vazTa#i_yV*~Eug5%+cE
zP{PjQ&yQsv>6SIWD@zRy4p#bkG8w8$PBIAY*rn>K*3i_VpSP(UC%QMY#j^u|4?5@i
zqO+P@ceqpGUR7vIZTrMR$&{|#8ljDKOsk!1QsJVWIDhW5!fx=5_Or7}A4z8{yV`N=
zmzKgpw^+QSK0d_I{OG7L>?N~V&HY5rq_YhyDATJ}u`!Fg304X&-a6DFGc>yW)aG})
zB3i301ptf-2{cG3Jq!xe6!z>K1kF!|>d#&RM>1JQj6JkkNnUClJnjYpXjAY(zZAPG
zHI%7FUNfz_(_Al&BZ42U#IMjzYeRw$UW^r5^qJ>CVpXLpm?b+WXSC6^f|j3uAi_#Q
z0*+C)`HRy5UGDuI*zEc=pUw7Tsx~p<DaxZ&I28MQ7T<Zl3WLO1_dLSLc0bhz!zotE
zqSJ!W+(r!7uh3K<38qP|^|kO)Lycc(X!Wq)vkUq6JscYKr&jfQB}oTvYAV%9_V(pF
zi5AA8eAyr7y?=k2J2+V;{SCqwWXzyCMtDx9+N3(k4H89X@#?euIptE4hYoTJ6RK38
zHug>BbA{>b=or2{4p0J28UWSo7m&O$#*+DFk0LH1F;;7rY$E9B<TShSQ}`nSC_5qT
zmT|lFOY6)ZJX48)XPhxLIjP`v5C8ttv!sQCvoqwFl$3la)1_uuwvR%Bp`oFpGE;9;
zxkl3YE9Xj!`SqOp;d*-B%i0CoQVn0b(pGt?NQMA`^<zH~)^D@PZ_Y$M&sfHpgr~6z
zn$C_R;n_+8s1EHF8ZIt<_~fE#{U1;hH(9p}(%jt*Q}4{Ta(Cx_1Y{)*hmzPEPJKRF
z0n^iXx!<?94{b1#5yjQS!qDt-Am8(I6~CaJencEdYZju#U81_DeI4<WjmZ=VK*<*%
zK9&y+rJXf|C9$J@*Q~6gc^MfaDxnu+V~SQ?=0V!~%*~gHwsp9}!RVsOm2<_ZoyA4x
zm26w!+Ts@>N<dU_!`w-yDV0v(C)T)r?dQJ37dgfmi~!&zMLPw(ND?LC=Sge6E)dh$
zz{ZCAmq~drr%ERLBcbK?gg_tL15w*;M_d?{nvejmUw^Q1LciA5FjC;rhvg&qNNHTG
z&zY;M#g#6{n1*EzqqkRJ!v-lS=i%PiLbH*~fYTY1J=69;av+aV0#)ELSPe~r&EOYX
z;_IJh;Jo{Nk5>}|k=bPl;Ozs6Q7t>w$+|*^jwgRvEfs*8AtJS!HcioqNlDp8gI$DF
zvV5O@MKDI)DCVY0+zP`p_{yV!8PU78va8G<-5P-JYM2_RR=E51>(_}AtE4D2o4GF8
zj|iqDQhUXm&91WswR8NG0UnQ3Se9<J6?PVQrtcQ!qtZV!K-@1}H>Z~Z2cgNZ0-pbr
zZv7!{_jA7IouwvC2zMlE5#(K?<cJB-2?au{2<I2$sPXI$K0FesG12cu;Kmy5NR-t_
z4s3k`SR6xK0n#NOy%}UsUukZ+l-3fz?D#{oEzq+4MY8hn){FNnf>(>@mwGM^EZfYl
z+0X!>mj|?qy1Q~*WH*~8fQ-%AU@-IpZ!j7@;xYHAl^1CC$mN;#p}hOAd2~{oc5aNH
zERW8>%ePQW&P%n=#Pg$YAzu%j)V74}xHHg!IRGJa9JgSUbd4Tj!O9koSPL2_*hyEK
z2>jU>02dSism^eF-L+<_YQb0U_vU3dqd|W>aLdo2)wXq75d(`P`$Zj(lPE-M=ZHd<
z@g5VTJSd=$7C5b&KF}q15FVcE)cM<f0CvM`Dkvm=vFDsvX)MkGDorm@ChO=$g8wYZ
z5iqNZRP|OV|E~#e_|&$bTl{TBfGU7_hQuEXr+(9hcVES(GeW#8tiF}8h+0YV*pFX0
zkZ3jicHPij<nAR{Rc)@fru(A0ec><^?J;d!sFf2Y5b-*Z-NFpAsV^xblkEyIR&;Yq
zR4hC{5)>lfz)}zqff-0-Z>J?f7nAItA>W$VU-|eAQd{n5J*y~fHg?qd`iar-fQ9Bh
zZE~m2%QHn{RI^2uNpzqB?6YcXMGFf#c}*ZmMwMhtCi3*1PR}5Cb2orxg?w|?F2J!Q
z+t>4v<z5H+0N#@iA0)q0%cUnI;Nc54NA>nyD7d=^Dbja^(?{*dA_SA3sU>1SY(@P#
zZC5xlw^4x|{u=#t|Ax+AyK4zB__F^oyxiw?D`r;9BO*{L6g8wj>s`>wI^NJ{nhmI!
zV@z^4Oe-=GN~NTv-18`xj?dNZa=R{$@6-foFS$LVk$p^Jj{#+7X1Wz?2O{tw(Q9te
z{cfJwXQ-xINU{Qw`r>EKMgdb+YZLXCe|6Pd{z2FQmiIkgXd>H}#3W6>vTyw6n3`K5
ziZ&>H&1dj05CJOrWBYb=fOr{S7;e45JpKHc&D_+qvMS|C6!FjW3=9BXWOa4Mwd=FJ
zy%-!x^p*3^b;Ryxc^X<;gxTL;?HLIZ{&8Cip(5aICK3Pdl4qpdIMC$y9UAzcTaP6<
z?)mwjwN`@(L~s-SNpnRr3wWo7rI?#!r*yqzEzQh!P~46?r`82Zt-UW-y3A(Eu}=?b
z+}FKcg*Le{gxB1*B`C4j5D!9!t+cs1jBo;M_mS*h@sr^w4t7pXslH^~U-=(ds}RZ}
zRf7U8WBd4-nWv(zVU$BKzyi9pjdue?yiPS0>8LL2QqD6`&;RZc-_g+zWoZ0MCH{Kg
z?Sjd5!!7O)S)E!b^y7d%a#dDo>!$a)!Oq9=Fal-tk7VG22}G*5lR+v?Yy+6OY2-uL
z%IPTDQ^4ROgD+D{ZP{5S2eR)wEbyT~OHB$aPW$^8mw%3_cu@lTF4ASDbC5Y~g|T6}
z-SapJhq9%D=*Vgc<EOwMIS>yVYmxJqpfJ<&{2s!6QD=o(1P0EuJdV~Ez&@M_mG-6J
zs%V;ZL~5LNT8wGnv;^Rd0RR>9YLUthuw!&(s5WYv#*3gjFh5W5pV+|zaFpgE3W)3R
zjRt6C_&b)o`}oI8?+cvCxrnaHijruP(&rP-E&|J)kONHps23h#lTNvRl>74+hfYIW
zWVnyF@g6-w&wAeJ)*T|y0YW&#pAQ%o=|{lHQx7LyX1@UYjy%l1rk&^FGp_v~Q5g+1
zV{NLxQBT0d^Zv)6V>&6~qhNp$gYqkZowZBl`{4CX94??sp_k{P7K-(8GC(&#!br)_
z9h!Bux)R^90j9;dMP$-fj~m@54ZxevKEk^Wm9RF}G@0DW<3Bd{nH25VFCuo2zvc7q
z#q-oRV#trf8diisr{7kqTO5<hE#$*#cl;Z7ImPkyLAtJ4I?Qlhx(m+C3@&MQdyY&Z
z@G9N2tJl%MuCUYgxV?)&lg0cg^71=WNa$HpQ`4vWyQ8Tc6Tyb0eq$v5;|@VnKsRDx
zVy-D?dG_pc$pHgsmUm#vDuVd0m~c^(Kb@T-{Ro1g+cq+4Fxws$j`=nt;bhzDb6;sP
zB!JItU%SM!|2R!+LUW@O0#yogm_PdOckE-2)zuMzztzfW0LV^X>$0%wDsckNU1(D(
z$b^HhVqy$;r*&m=hNLz)oNSr0`&$xREYR7jsuQ&)gFr(F4h{)Mo3%)rzWH0zQJ8>U
z`i#)g*@;jJb|^i4o$hDN;Vf@yIYnDHW?Z3+HGd`mcyVd#va^+cQNDV`mCgRJrD(pS
zO1btmCsnp*_0vZ_-#B(=`QFZ&5SFutn2_UQiSVbD_uV!v6Do=qiLZa24!yIpqoi(z
zts$W8=?R<SW`1#a{un=)q%GuUXP=1+jNRkF@>&9n3joNR-qhJX9bWT2L;xubyOwx#
zhN9+|m4&RXt(hJz-Z``*PdP00THAlzaO3p3jVi)vzsfLILU`J@u)aQsZsz8QGAV9M
zj5@cVW4;T@c&&GClQms;Tn@?Rf;4e9Zeb*|X+P{24D9)P^NJwE%6@H+gNA|$#DLFB
z2luHD4{@9LO^1C^ag%eyyc>K@P6n*SB4%iFeCN>j>p>LCKZ45?1{&AlP{^YZLe8nn
zWBiT%8|x7MhLl3gaWqs*4bh<!$w~&^Bi~*|3mp{|a~6ogR(vi%ICFUib!2schE@K!
z6NG#a{HxR9@Ni@$ehk{7KkhN1C?I$Ux6=wTs2JDhYOLYMlYp}V7$Df;@$^LtOyjoT
zPzr>1?s(COIh~CDu?J8vpiqU<Meo=+_B`;slo`jnFb*C=L4bqw^f#eu%Nb&1!4)zi
z-E)ACe2IZz;|09#I@Yf&^XNB|+q6KRrjG&3&Dv*`6TJ&4dwl8BcC3%;?{_F5cIdUD
zzrCDvHH_eOLk1L(2C#QC*ma<7ipvLKz4Vg-bn~dQ=>Z5+YAbGSTsueMygvd4IvOSg
zl10MpcB@p{78WhG?ezcMWUTrlB9oY`&8h)NpYfQoo4AG(rP9uW3U8bYdTjK?qx}jp
zxKz%3#9;ilzi{{gI{cl~6!Ar9F>t%73r_+14-q~K6-Aq9R2VZ_Eow>U=jXH8ELuv+
z$$<;J9`0c)^+eGD9v&`If0I0lK5_%Hoa6=s$jv8ToJXSj1c6>u3||};r)Op|R(|}+
z&t^9It<qs<u3jdum|xh+>Oe``Q<{?~`ll#V*ovPC!!X9<WVR#zS9maX^$$FN$A?jS
zfy*Z98~CTEF3<~b-CrZdb#;5&bjP~1)Fyp@UpkUtjm|(K2omnw+c(*45F8$E_qnYs
z2!yZ7PYuqIZU|?<2@A@OLjFT=AA}$GXDYwdhw151Mtos|3>%OT-zrK3scUQ%P*~<C
z4H!F4XIWWUHT9i1I#MS;!h+;gw3U~>zvNQ0I`V7eEed-oV7j0E**704q5wFZ8&K^J
zwSUR5&B8K*g=@k{OhhF9@}=4M@UW=`E8xD)osDoWAFHv6Q#|hNHGdhOl(ZHT2)_iM
z*J;aF0vK7%%6=c7c2bi_tBEiP*Xzj3thViut4gmsDsDlqZFx!~<MWLF(_`8jY88dq
zs$WY=Ldu5vI5P)$U<474*#9*?^bf1PZUVl~ikP^=^jLb;u}pEKjootzlDk5OEe>|}
zv368JS<Xcl#*RgFW7J3)_t1sO;cAmGYaX2T9ev?fkKEx8@_jsrtFEH*HXtA%kgfK~
ztr-AKrI_S$$hR1EtxVawks2RgnCa)VSJ+w|yN6V|YwCPf;IU{4$9Iy=ITMZhhw$mR
z@G)_5p#j@hP9_}vBC1tX)YOIAbCaelHOhoAynq`|su_{FC4WHsU6NOjwY_tx#X*VA
zf+YHrh(a3&y}&(9KeX~8$n8iDn2PNd(Sqfp;UfAoN?Jf=l01)NwD(F)s+oC!#CTBG
zlgNIedz3hjy9_wZYAveQj}peExk*U6`Rlbf;}gXfFt?jjXC0QRqxzs(Q0vG<KRs3<
zwFGh-I8AA;vKe-%C1zM90G@rke1$N40<58|g6p8s6rMfy>?0KU7+~^`gx6mb^`Uu6
zJopF?9G7zUQ3&A{j0>E~K8Jc0`lO0a6QCFzNYF+4JwZYGPXq8G13(O1(Jbz-R@Mt>
z($L2?0i6w4DfRL1$ToFgM4g_x_CfOrI3zO$fS0Iz_Y>S2rUzsS@y9MC13n*rhQBN%
z3B`0_{aduZ0ovd2sb)H0RV)m=&Wqi@4KsDQNg|Mc_oNuN9btY9YrlZ*_#2`EItEb!
zlpOpa)c+5*cr?NAcI|clhkU5m^Y~x9CaH@8^7;`qEobNNmT#X%wTSk*U*uoDk>T&}
zO-4tvZFsA)iYz%d)(yuzFYcmb1@G8s4`si(vB2mrhW)lPe;PkA`fKT7DM?AQlXV0D
zAc(GY;eK$v5-_(p`e8Ajz>;O!8v~}{;Ys`zzQ%WxUr{l}BxghP8_D2uOHS|G#U%W4
z<I^DcfuK|0BMvx$NOV{K`1qfoj^kX7%@Q>J>vR%vaq*oT3ktx<F-1K%k0j!?2K*qj
zv^}zRhvfKa4=<~Z7&E>j*<?&ZY%=5GRGep`pH>12rXB<++V6UWN!wfR(w;SCuA&if
z*}mf_P5;Q|=#X&0^E0k~#&Xii>tAk=1&6L~K%sZhm|L;;)UL0;%B`%e<;vx13a&n3
z9B%}?E}Nc*9)7Dq;ZuEqTz_Z*sFq+Tg32ar51iA41k=#43Y^G@h|Is{$JwX<%@bm!
zukY9Az%T6lP5o;t^XL4Vtw_Tcf(C;_i4;!^=OdJd0SHA`$9kuL>wiR#_rFBXPIEku
zehn1V3*X1z0w^gc$RQJb-g9*luj?gR<R_N_7$A952`dLSO@YMJfApmf=0_VlFc>L2
zo15Spw9RJ{x$TN{-s=4Hk675aHrbtFX{2G<|3g6SR18gy1R{&a#7{*Md;CTx`z`jy
zSUL*<6J~2yg?{GvoU;qP<ByH+PvQ%N|HneHNj|8kkZ>#Wx}Q(5nUCiysZ@vQ8?2dL
z{wC8KGiwK>Y$01WoxQcg9cN$!vq!$RTYbsqWa$gyaNKq6NLxfigDjFqLjC~%WrQC_
z&6YEW^$+)H2*N|JbQ+6s_>UHwBCk(-e9{{T;sARFdlLcS_%Iao3R#~Rt)C0wal3+$
zp~E`KXGdw9e$CUQA`W7q>R@BX4$oE*_y-Wv=Y(>O4S9Q^9%Cw&Hl0@=>=a!=Y)EyY
z?rtHa!8;q!m5zHH=9ZS3eatOte3_mq!>0;!XzdIRR@pzROFIaM65!^#)A=8Hcv%qU
zwqp#?#b!NUs5izB18`HRb*$sWO;Xrpo68)A(-pVp!PLFa8!uN29S*8ghlevh?X;t+
zjwQ0|2Gg$Wv=s=F<%ZxgHL7Y{q6*&Pu2MI+Fsl6sSk%-dA;mz0K>7YMdz6=?@CaJA
zKW9!0b??LkVmmDwIjrcM%r`<Xk(Cq`rGijZN9rAu)}==1=ul=)j+1%es(-gYQlP6P
zs;yr78B!N9I(Xd>l$r(^5x-I;Nc{7&3r#{kTfHU&iNXN^Sv%SBI@CII*~quN6t~7>
z<Ku=`<rS5cSgV{#yU(1~Anq!DEdIsmqWzza?M0LR49FH#P!KW7xwQYKlyIwJa*8vQ
zGekr8?XSbjoE+mBJRRtvBesyp$lQ@EiC(U~+lKk74Fme-@^VRyoynNxYXT>~K^)-Z
z+17WX2;5(wsm{R3N4^&#MflCf`B2imJCWA+%$iD(V8LlFyd2@yzQQE!t|mchNp$IE
z(c|Sq8&YX#{CA^ZaR;3G)zF<)$4wR08|pL<sy!~Z%D~P3Z<M?CUZQ5CHygWJ3RGY7
zD}YdDEjgn0214&8pz~)+8XL7VZ+^Go5g?X2PcL*7@>TIQm4Ke@!z(EhVLIQ1=smC^
zHOSr^`z#5}%+E{SU2j|-Gej%1dq<v-vz$#1*&gr<HvgKeU6XUGsj5=Y)NELQ;k-x2
zXhxTh)OolXk_(%P(G~W**+dg8+ZbH%x^+nr>&9tm5j8W*uU+Y`;gy+I+eQO7*meE^
zeLk)4VHnyX^atPX>u*JJY;UZo#@$>e+F32!FQOs|tJ2S09oPUNz%KWhJI{%9uSek%
zj0V#vDe#p<2h|tt#IFP!ociV;y$GEJ|NG5e28M{nFz*y|_VeSP?lYnr_M*H``uA3x
z^!u|E|Aes_aK4PkI61esZ)l9$&yne9YfDR<(y&Sad;`mYXib6=1%s|s+;(z2iF8g~
zN8)n7!C`4Z_pVw6A(<x;PKwqB5>Xlc#vSHHN{kA>a(q_1<6=2rphRraVPHX=bwWcy
ztO<ets``z5ql~>Y{gkxA&9WFy`83aTx}~p4!a@Fz#|~gAk;hYuV$Tr=Ku@Y2+8NNL
zhufr2DDnULT*OI%5U0J4K;>xcQhU%Pvf*NWKDL#&YhmPAfRT|A`>6ARIB5>!GR--x
zYp-K*k`nM3qP|4@N5KAm{85mlgmm%0Q(0yQRP9u^{1Zfh0{KM(Bf9(rVti80P|&4-
zb>8$g@DI`iHb0sNFv6dlRHo>EfCrFKA_Mpj*jcgu0Vm6kKMK;!A{_R(V}PsG0*35L
z|C`PK*UGtnC4Gl+ylhQN>&#kSGFm2TnzJ;q$lKJknc^ig7^!4AFJa;&!vxbj4J$|5
zQp{9P^OBZYDl8C<ZeE6lN`jY=yn!tg#Tp_a`%-6T|G>V#e$Mxt&pDsZd0wCA{k)&&
zsnA>2t}^uTF^otDIwai**94&4k2+{UVi~OXhK;@b&FTPXp%%W^UnV&7(2Z^0fc&jZ
zu$_RA`Ynj0at~@O1aP-3@jz7of&Q8npl8thhgH4BIW`bTfw;FA_Wpf5<)m@H(#}Y~
z9<g9PI~3vzfb)Vd1wLR?V^GFqZv80dV1h$K55On{qX;0p81{DaUjIR-5A1DZ`9UC^
z(~wjitoaqB$ZLtJ$S;G4R@J&%p?m>N-Qa}g8=Q)QGpB6FPWG`&^HkMhs&Yl~6jd{d
z(bx+Ki{XIiRVB}I%JUn7r%W7K2XZG|ooiDB-gQwRk@N%xY5cE2R4{?ys+MV1?`WsL
zEOMV3;CK37oS*$eR5LPi_*`Nld2sGOPfFZC<$-v>n@DnvKnOl9bfPQc7HNlKU)x=Z
zNY4JLhwG8ysFud!c+3<tV3N$AkHn`>NR~PrU0iaE>{rV7XAuX0MyG<X$&4|V=QNu2
z{rdXSx(B%cTm~%R3-qk7Wmike3JYe*<g+m`-vPbx6>HZm^C)gAgN`j@gFxB7bS3Mz
zd+q7X(=`<ndD07hem14JF)$<JHS~5IhkSl~(+l?40_~C%LF#ps)D=mly0`AJvLbfm
z?yHo`(_d<)^n_axPcJvOWDa?2X_}11UaJpiH~rqum2~Fp+4zZ<2f&kaXm72g8r=mZ
zZUzOANU$v%S0!z&&K-b8tKp0sZ@ADM#-cyGr82jkNl-`s{PT#(JV>%INu!^YrXDS$
z4T5D(Dh0P-eG%p&y#Ny!Sl!~>_Q1gr3q+}YpiXgA+7ZnqZbVY-0;Jk}Ex3D#xr-fY
z+~4mYk5PMQe)2`7)56kj0a8qD5Y@mzXeAy!@@Q2fVBBBdkJOP4F3B<k*SSa~<6Rau
zy6NxjuQV+39f|7mz9bNQ;en~Igb8zbIB{JukbXJi9^ZJu$LdIT$K<%$*n}8OD4pn5
z(0OCqMc*098tx~#aV69AxaE-TaaZU~MUg`mUMIngFA#8)a({b9uwL4R^Fk)9DThRh
zq=v6cWlHYD+*0TFZ`yIky4nE=jO#eogm@@0UzO1E38T0w|AvWl7k@>SS70BH$Ez(b
z=RA)>XHR~^vS#D{BItJOKRIz)zp1=DWT#{7qp;|3%<!*~NTzp>B(coWpYrYV=2G%L
zl;0?Q7EBonc4d4$78Dee5@C8AyjI+Sh7Q$5!&OQN6^6=ao2<-!k(Qq9`$6MjWSW*d
z%u<S5lk3ild){B@1Rd<A>4k6BEK>Tg)XoBX#tj^*SXK)hnZ?n~HBC)!$*bAI61-xr
z#?XSe9JtHNUvAr5QBi;9mvNZ0!t$PpTN?QMTZCQhyz!pzES^}iJ=|2IqSbvuoGACD
zB-`nt)MVk~tLYQL`;qgULcZlG&y}e)jp`o*TGG7V3bqf|+KCyA^y_dv?7Mi#<ztx{
zUJ!Hh$Q2R1YiVF$_KC0M^*POa=sFPSWUd(=ni^%SyTZkB8(Md0PmzH8q`bTHU+>KB
z+g_*-dZ|!~F0ySItK6e6dPc=Jq!_~4W?L$(f$0o+E#P6do<*kXG=3!BV%p+df~WhZ
zH_dYY+pP+<Ok@kqj)UW?h8x?Ltw`Hb>JW#p%p^2&26%DY)@Rs}1MhldY~Hk!aICRq
z@0-9sb3{vq?2?j%g*QLsJTb!D;)D0u5&j<5l!W&cS@6FP$KXP@pHmy0zytNW0hKHx
zfE7s;%2v8CQR8v%A<s3e*n8}Qp1JPJ@W+REU@H5atj`R?A`=Zv!F<b^55GT1<-VRd
z3<n^L$SlSbnuFJu;@Ny?NPZIM)o_ZQZpSz8paGYCw>>k_Surs?B~GmtxlmZ_yha~-
z5A6yjf5{&Lh5DJ1)QNig54Ok0Qf>-%uS#04NIaYNMkH^Oc^23=&umEhu^mCgciBU~
z(impmG2=Kxp~FKPEh?QOd*}9u-=-K@Tw^GV*J3$ceJJcz%_Z5T!{j9G-34(J9}i85
z`|u-C`Cn><Ypx`L0g7~n_)x!4WcN*gN)yB#AnltTP?3%T(pb`tFu(|?UEejK^&TX4
y^+$b4)&Ad~Ghuzo((ccm#m_?HGZ?!dQsXf<PKzo1Ev{}2@cH=!9%=A8o%IirKE_Z0

literal 0
HcmV?d00001

diff --git a/nemo_rl/distributed/ray_actor_environment_registry.py b/nemo_rl/distributed/ray_actor_environment_registry.py
index 1f1937729d..b685cde801 100644
--- a/nemo_rl/distributed/ray_actor_environment_registry.py
+++ b/nemo_rl/distributed/ray_actor_environment_registry.py
@@ -23,6 +23,7 @@
     "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker": PY_EXECUTABLES.MCORE,
     "nemo_rl.environments.math_environment.MathEnvironment": PY_EXECUTABLES.SYSTEM,
     "nemo_rl.environments.games.sliding_puzzle.SlidingPuzzleEnv": PY_EXECUTABLES.SYSTEM,
+    "nemo_rl.environments.simulated_user.unique_numbers.UniqueNumbersEnv": PY_EXECUTABLES.SYSTEM,
 }
 
 
diff --git a/nemo_rl/environments/simulated_user/__init__.py b/nemo_rl/environments/simulated_user/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/nemo_rl/environments/simulated_user/unique_numbers.py b/nemo_rl/environments/simulated_user/unique_numbers.py
new file mode 100644
index 0000000000..b95bafd554
--- /dev/null
+++ b/nemo_rl/environments/simulated_user/unique_numbers.py
@@ -0,0 +1,141 @@
+"""Simulated user environment for counting unique numbers."""
+
+from __future__ import annotations
+
+import random
+import re
+from typing import Optional, TypedDict
+
+import ray
+import torch
+
+from nemo_rl.data.interfaces import LLMMessageLogType
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.environments.interfaces import EnvironmentInterface, EnvironmentReturn
+
+
+class UniqueNumbersConfig(TypedDict, total=False):
+    """Configuration for :class:`UniqueNumbersEnv`."""
+
+    min_length: int
+    max_length: int
+    max_turns: int
+
+
+class UniqueNumbersMetadata(TypedDict):
+    """Metadata for a UniqueNumbersEnv episode."""
+
+    numbers: list[int]
+    unique_count: int
+    turn: int
+    max_turns: int
+
+PENALTY_FOR_NO_GUESS = -0.1
+PENALTY_FOR_INCORRECT_GUESS = 0.0
+PENALTY_FOR_EVERY_ASK = -0.01
+PENALTY_FOR_INCORRECT_FORMAT = -0.02
+
+class _UniqueNumbersRunner:
+    query_re = re.compile(r"what is number (\d+)\??$", re.IGNORECASE)
+    guess_re = re.compile(r"there are (\d+) unique numbers", re.IGNORECASE)
+
+    def process_turn(
+        self, message_log: LLMMessageLogType, metadata: UniqueNumbersMetadata
+    ) -> tuple[dict[str, str], float, bool, None, Optional[UniqueNumbersMetadata]]:
+        turn = metadata["turn"]
+        max_turns = metadata["max_turns"]
+
+        if turn >= max_turns:
+            # Out of turns
+            return {"role": "user", "content": "<done>"}, PENALTY_FOR_NO_GUESS, True, None, None
+
+        last_msg = ""
+        if message_log and message_log[-1]["role"] == "assistant":
+            last_msg = message_log[-1]["content"].strip()
+
+        if not last_msg:
+            # no last message from assistant, assuming done
+            return {"role": "user", "content": "<done>"}, PENALTY_FOR_NO_GUESS, True, None, None
+
+        query_match = self.query_re.search(last_msg)
+        if query_match:
+            k = int(query_match.group(1))
+            if 1 <= k <= len(metadata["numbers"]):
+                content = str(metadata["numbers"][k - 1])
+            else:
+                content = f"Invalid index! There are {len(metadata['numbers'])} numbers."
+            next_meta = {
+                "numbers": metadata["numbers"],
+                "unique_count": metadata["unique_count"],
+                "turn": turn + 1,
+                "max_turns": max_turns,
+            }
+            return {"role": "user", "content": content}, PENALTY_FOR_EVERY_ASK, False, None, next_meta
+
+        guess_match = self.guess_re.search(last_msg)
+        if guess_match:
+            m = int(guess_match.group(1))
+            reward = 1.0 if m == metadata["unique_count"] else PENALTY_FOR_INCORRECT_GUESS
+            return {"role": "user", "content": "<done>"}, reward, True, None, None
+
+        # default response
+        next_meta = {
+            "numbers": metadata["numbers"],
+            "unique_count": metadata["unique_count"],
+            "turn": turn + 1,
+            "max_turns": max_turns,
+        }
+        help_msg = "Please ask 'what is number k?' or say 'there are m unique numbers'."
+        return {"role": "user", "content": help_msg}, PENALTY_FOR_INCORRECT_FORMAT, False, None, next_meta
+
+
+@ray.remote
+class UniqueNumbersEnv(EnvironmentInterface):
+    """Environment where the LLM must deduce the count of unique numbers."""
+
+    def __init__(self, cfg: Optional[UniqueNumbersConfig] = None):
+        cfg = cfg or {}
+        self.min_length = cfg.get("min_length", 3)
+        self.max_length = cfg.get("max_length", 7)
+        self.default_max_turns = cfg.get("max_turns", 10)
+        self.runner = _UniqueNumbersRunner()
+
+    def step(
+        self,
+        message_log_batch: list[LLMMessageLogType],
+        metadata_batch: list[Optional[UniqueNumbersMetadata]],
+    ) -> EnvironmentReturn:
+        results = []
+        for log, meta in zip(message_log_batch, metadata_batch):
+            assert meta is not None, "Metadata must not be None for UniqueNumbersEnv."
+            assert meta["numbers"] is not None, "Numbers must not be None in metadata."
+            assert meta["unique_count"] > 0, "Unique count must be greater than 0 in metadata."
+            results.append(self.runner.process_turn(log, meta))
+
+        observations, rewards, terminateds, stop_strings, next_metadata = [], [], [], [], []
+        for obs, rew, term, stops, meta in results:
+            observations.append(obs)
+            rewards.append(rew)
+            terminateds.append(term)
+            stop_strings.append(stops)
+            next_metadata.append(meta)
+
+        return EnvironmentReturn(
+            observations=observations,
+            metadata=next_metadata,
+            next_stop_strings=stop_strings,
+            rewards=torch.tensor(rewards, dtype=torch.float32),
+            terminateds=torch.tensor(terminateds, dtype=torch.bool),
+        )
+
+    def shutdown(self) -> None:  # pragma: no cover
+        pass
+
+    def global_post_process_and_metrics(
+        self, batch: BatchedDataDict
+    ) -> tuple[BatchedDataDict, dict]:
+        final_rewards = batch.get(
+            "total_reward", torch.tensor([0.0] * len(batch["idx"]))
+        )
+        avg_reward = final_rewards.mean().item() if len(final_rewards) > 0 else 0.0
+        return batch, {"unique_numbers_avg_reward": avg_reward}
diff --git a/nemo_rl/experience/rollouts.py b/nemo_rl/experience/rollouts.py
index 1e0a027bcb..963fa3f73a 100644
--- a/nemo_rl/experience/rollouts.py
+++ b/nemo_rl/experience/rollouts.py
@@ -405,12 +405,18 @@ def run_multi_turn_rollout(
         truncation_mask = torch.zeros_like(env_output.terminateds, dtype=torch.bool)
         for i, global_idx in enumerate(active_indices.tolist()):
             env_obs_content = env_output.observations[i]["content"]
-            # Tokenize the raw content from the environment
-            # TODO @sahilj: handle if we want these subsequent messages to have a chat template
-            tokenized_obs = tokenizer(
-                env_obs_content, return_tensors="pt", add_special_tokens=False
-            ).input_ids[0]
-
+            # Tokenize the raw content from the environment into chat format if needed
+            env_role = env_output.observations[i]["role"].lower()
+            if env_role in {"user", "assistant", "system"}:
+                formatted_obs = tokenizer.apply_chat_template(
+                    [{"role": env_role, "content": env_obs_content}],
+                    tokenize=False,
+                    add_special_tokens=False,
+                    add_generation_prompt=False,
+                ).strip()
+                tokenized_obs = tokenizer(formatted_obs, return_tensors="pt", add_special_tokens=False).input_ids[0]
+            else:
+                tokenized_obs = tokenizer(env_obs_content, return_tensors="pt", add_special_tokens=False).input_ids[0]
             # check if new message overflows max_seq_len
             if (
                 len(tokenized_obs) + len(generated_ids[i]) + active_input_lengths[i]
@@ -660,9 +666,17 @@ async def run_sample_multi_turn_rollout(
         terminated = env_output.terminateds[0].item()
         env_obs_content = env_output.observations[0]["content"]
         # Tokenize environment response
-        tokenized_obs = tokenizer(
-            env_obs_content, return_tensors="pt", add_special_tokens=False
-        ).input_ids[0]
+        env_role = env_output.observations[0]["role"].lower()
+        if env_role in {"user", "assistant", "system"}:
+            formatted_obs = tokenizer.apply_chat_template(
+                [{"role": env_role, "content": env_obs_content}],
+                tokenize=False,
+                add_special_tokens=False,
+                add_generation_prompt=False,
+            ).strip()
+            tokenized_obs = tokenizer(formatted_obs, return_tensors="pt", add_special_tokens=False).input_ids[0]
+        else:
+            tokenized_obs = tokenizer(env_obs_content, return_tensors="pt", add_special_tokens=False).input_ids[0]
 
         # Check for sequence length overflow
         if input_lengths + gen_token_count + len(tokenized_obs) >= max_seq_len:
diff --git a/tests/unit/environments/test_unique_numbers_environment.py b/tests/unit/environments/test_unique_numbers_environment.py
new file mode 100644
index 0000000000..2ee01166b4
--- /dev/null
+++ b/tests/unit/environments/test_unique_numbers_environment.py
@@ -0,0 +1,43 @@
+import os
+import time
+
+import pytest
+import ray
+
+from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env
+from nemo_rl.environments.simulated_user.unique_numbers import (
+    UniqueNumbersEnv,
+    UniqueNumbersMetadata,
+)
+
+
+@pytest.fixture(scope="module")
+def unique_env():
+    env = UniqueNumbersEnv.options(
+        runtime_env={
+            "py_executable": get_actor_python_env(
+                "nemo_rl.environments.simulated_user.unique_numbers.UniqueNumbersEnv"
+            ),
+            "env_vars": dict(os.environ),
+        }
+    ).remote(cfg={"max_turns": 5, "min_length": 3, "max_length": 3})
+    yield env
+    env.shutdown.remote()
+    ray.kill(env)
+    time.sleep(0.1)
+
+
+def test_query_and_reward(unique_env):
+    metadata = UniqueNumbersMetadata(numbers=[1, 2, 1], unique_count=2, turn=0, max_turns=5)
+    query_log = [[{"role": "assistant", "content": "what is number 2?"}]]
+    result = ray.get(unique_env.step.remote(query_log, [metadata]))
+
+    assert result.observations[0]["content"] == "2"
+    assert result.rewards[0] == 0.0
+    assert result.terminateds[0] is False
+
+    guess_meta = UniqueNumbersMetadata(numbers=[1, 2, 1], unique_count=2, turn=3, max_turns=5)
+    guess_log = [[{"role": "assistant", "content": "there are 2 unique numbers"}]]
+    guess_result = ray.get(unique_env.step.remote(guess_log, [guess_meta]))
+    assert guess_result.terminateds[0] is True
+    assert guess_result.rewards[0] == 1.0

From eba5e76ff4e56ae1f97c628e0fe4caf2c414be26 Mon Sep 17 00:00:00 2001
From: Parth Chadha <pchadha@nvidia.com>
Date: Wed, 2 Jul 2025 10:55:46 -0700
Subject: [PATCH 02/59] fix: load HF model only on rank 0 (#544)

Signed-off-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/eval.yaml                    |  9 +++
 nemo_rl/models/generation/vllm.py             |  4 ++
 .../models/policy/dtensor_policy_worker.py    | 61 +++++++++++++++++--
 .../unit/models/policy/test_dtensor_worker.py |  7 +++
 4 files changed, 75 insertions(+), 6 deletions(-)

diff --git a/examples/configs/eval.yaml b/examples/configs/eval.yaml
index 0308f65ed6..e880d98bc7 100644
--- a/examples/configs/eval.yaml
+++ b/examples/configs/eval.yaml
@@ -22,6 +22,15 @@ generation:
     pipeline_parallel_size: 1
     gpu_memory_utilization: 0.9
     max_model_len: 2048
+  colocated:
+    # true: generation shares training GPUs
+    # false: uses dedicated generation resources
+    enabled: true
+    # only relevant when enabled is false
+    resources:
+      gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
+      num_nodes: null # Decides number of nodes to be dedicated to generation
+
 
 tokenizer:
   name: ${generation.model_name} ## specify if you'd like to use a tokenizer different from the model's default
diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index cc8b44d5f3..9506a063d3 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -316,6 +316,10 @@ def _patch_vllm_init_workers_ray():
         os.environ["VLLM_USE_V1"] = os.environ.get("NRL_VLLM_USE_V1", "1")
         os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
 
+        if not self.cfg["colocated"]["enabled"]:
+            os.environ["NCCL_SHM_DISABLE"] = "1"
+            os.environ["NCCL_P2P_DISABLE"] = "1"
+
         load_format = self.cfg["vllm_cfg"]["load_format"]
         if ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(self.model_name):
             load_format = "auto"
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index a5e1d9259d..46e1e8a52a 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -21,7 +21,12 @@
 
 import ray
 import torch
+from accelerate import init_empty_weights
 from torch import nn
+from torch.distributed.checkpoint.state_dict import (
+    StateDictOptions,
+    set_model_state_dict,
+)
 from torch.distributed.fsdp import (
     FSDPModule,
 )
@@ -30,7 +35,7 @@
 from torch.distributed.tensor.experimental._attention import (
     set_rotate_method,
 )
-from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
 from transformers.integrations.accelerate import find_tied_parameters
 from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
 
@@ -137,6 +142,15 @@ def __init__(
         init_reference_model: bool = True,
         **kwargs: Any,
     ):
+        # Disable NCCL SHM if training and generation are not co-located: https://github.com/NVIDIA-NeMo/RL/issues/564
+        if (
+            "generation" in config
+            and config["generation"] is not None
+            and not config["generation"]["colocated"]["enabled"]
+        ):
+            os.environ["NCCL_SHM_DISABLE"] = "1"
+            os.environ["NCCL_P2P_DISABLE"] = "1"
+
         self.cfg = config
         # torch distributed init. Envars for rank, world_size, and master_addr and master_port are set from the ray remote call
         torch.distributed.init_process_group(backend="nccl")
@@ -156,19 +170,38 @@ def __init__(
         else:
             raise ValueError(f"Unknown precision: {self.cfg['precision']}")
 
-        print(f"[Rank {self.rank}] Loading model {model_name} on CPU...")
-        self.model = AutoModelForCausalLM.from_pretrained(
+        model_config = AutoConfig.from_pretrained(
             model_name,
-            device_map="cpu",  # load weights onto CPU initially
             # Always load the model in float32 to keep master weights in float32.
             # Keeping the master weights in lower precision has shown to cause issues with convergence.
-            # https://github.com/NVIDIA-NeMo/RL/issues/279 will fix the issue of CPU OOM for larger models.
             torch_dtype=torch.float32,
             trust_remote_code=True,
             **sliding_window_overwrite(
                 model_name
             ),  # due to https://github.com/huggingface/transformers/issues/38002
         )
+
+        full_state_dict = None
+        if self.rank == 0:
+            print(f"[Rank {self.rank}] Loading model {model_name} on CPU...")
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                device_map="cpu",  # load weights onto CPU initially
+                trust_remote_code=True,
+                config=model_config,
+            )
+            full_state_dict = model.state_dict()
+            del model
+
+        print(f"[Rank {self.rank}] Initializing empty model for FSDP...")
+        # All ranks initialize model on meta device, so FSDP can shard it.
+        # The actual weights will be broadcast from rank 0.
+
+        with init_empty_weights():
+            self.model = AutoModelForCausalLM.from_config(
+                model_config,
+            )
+
         # caching since this property is not always preserved after FSDP
         self.num_tied_weights = len(find_tied_parameters(self.model))
         self.skip_tie_check = os.environ.get(
@@ -222,8 +255,24 @@ def __init__(
             custom_parallel_plan=self.cfg["dtensor_cfg"]["custom_parallel_plan"],
         )
 
+        print(f"[Rank {self.rank}] Loading state dict from rank 0...")
+        # This will broadcast the state dict from rank 0 to all other ranks
+        # and load it into the FSDP model.
+        set_model_state_dict(
+            self.model,
+            model_state_dict=full_state_dict,
+            options=StateDictOptions(
+                full_state_dict=True,
+                broadcast_from_rank0=True,
+            ),
+        )
+
+        # Manually broadcast buffers
+        for _, buf in self.model.named_buffers():
+            torch.distributed.broadcast(buf, src=0)
+
         if self.cpu_offload:
-            self.model = self.move_buffer_to_device(self.model, "cpu")
+            self.model = self.move_to_device(self.model, "cpu")
 
         # used for streaming update inference engine weights
         self._held_sharded_state_dict_reference: Optional[dict[str, torch.Tensor]] = (
diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py
index 0a42ea1e9f..91bf140641 100644
--- a/tests/unit/models/policy/test_dtensor_worker.py
+++ b/tests/unit/models/policy/test_dtensor_worker.py
@@ -61,6 +61,13 @@ def create_test_config(
             "top_k": None,
             "stop_token_ids": None,
             "stop_strings": None,
+            "colocated": {
+                "enabled": True,
+                "resources": {
+                    "gpus_per_node": None,
+                    "num_nodes": None,
+                },
+            },
         },
         "dtensor_cfg": {
             "enabled": True,

From e7905564f6358d46b50c2023e53f230b786744e6 Mon Sep 17 00:00:00 2001
From: Xuehan Xiong <xxman@google.com>
Date: Wed, 2 Jul 2025 15:36:59 -0700
Subject: [PATCH 03/59] feat: supports evaluation of multiple-choice benchmarks
 (#559)

Signed-off-by: Xuehan Xiong <xxman@google.com>
Signed-off-by: Xuehan <xxman@google.com>
Signed-off-by: Xuehan Xiong xxman@google.com
Signed-off-by: Yuki Huang <yukih@nvidia.com>
Signed-off-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: KiddoZhu <zhaochengz@nvidia.com>
Signed-off-by: Sahil Jain <48468750+SahilJain314@users.noreply.github.com>
Signed-off-by: Luis Vega <vegaluisjose@users.noreply.github.com>
Signed-off-by: Terry Kong <terrycurtiskong@gmail.com>
Signed-off-by: Luis Vega <2478335+vegaluisjose@users.noreply.github.com>
Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Anna Shors <ashors@nvidia.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Sahil Jain <sahilj@nvidia.com>
Signed-off-by: Yuki Huang <scsse_hqh2016@126.com>
Signed-off-by: Shun Kiyono <shun.kiyono@sbintuitions.co.jp>
Signed-off-by: Wei Du <wedu@nvidia.com>
Signed-off-by: Charlie Truong <chtruong@nvidia.com>
Signed-off-by: Atsunori Fujita <afujita@nvidia.com>
Co-authored-by: yuki <48991475+yuki-666@users.noreply.github.com>
Co-authored-by: Parth Chadha <pchadha@nvidia.com>
Co-authored-by: Zhaocheng Zhu <zhaochengz@nvidia.com>
Co-authored-by: Sahil Jain <48468750+SahilJain314@users.noreply.github.com>
Co-authored-by: Luis Vega <vegaluisjose@users.noreply.github.com>
Co-authored-by: Terry Kong <terrycurtiskong@gmail.com>
Co-authored-by: Luis Vega <2478335+vegaluisjose@users.noreply.github.com>
Co-authored-by: Yi-Fu Wu <yifu.wu@gmail.com>
Co-authored-by: Anna Shors <ashors@nvidia.com>
Co-authored-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Co-authored-by: Yuki Huang <scsse_hqh2016@126.com>
Co-authored-by: Shun Kiyono <shunk52@gmail.com>
Co-authored-by: Wei Du <wedu@nvidia.com>
Co-authored-by: Charlie Truong <chtruong@nvidia.com>
Co-authored-by: atfujita <40932835+AtsunoriFujita@users.noreply.github.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 README.md                                     |   2 +-
 docs/guides/eval.md                           |  25 ++-
 docs/guides/grpo.md                           |   2 +-
 docs/guides/sft-openmathinstruct2.md          |   2 +-
 examples/configs/{ => evals}/eval.yaml        |   5 +-
 examples/configs/evals/gpqa_eval.yaml         |  15 ++
 examples/configs/evals/local_eval.yaml        |  14 ++
 examples/configs/evals/math_eval.yaml         |   9 +
 examples/prompts/gpqa.txt                     |   1 +
 examples/prompts/mmlu.txt                     |   1 +
 examples/run_eval.py                          |  53 +++---
 examples/run_grpo_math.py                     |  71 +-------
 nemo_rl/data/eval_datasets/__init__.py        |  88 +++++++++
 nemo_rl/data/eval_datasets/aime2024.py        |  44 +++++
 nemo_rl/data/eval_datasets/gpqa.py            |  63 +++++++
 .../data/eval_datasets/local_math_dataset.py  |  54 ++++++
 nemo_rl/data/eval_datasets/math.py            |  49 +++++
 nemo_rl/data/eval_datasets/mmlu.py            |  56 ++++++
 nemo_rl/data/eval_datasets/mmlu_pro.py        |  44 +++++
 nemo_rl/data/processors.py                    | 168 ++++++++++++++++++
 nemo_rl/environments/math_environment.py      |  43 ++++-
 nemo_rl/evals/answer_parsing.py               | 104 +++++++++++
 tests/functional/test_converter_roundtrip.py  |  14 --
 tests/unit/data/eval_datasets/test_gpqa.py    |  42 +++++
 tests/unit/data/eval_datasets/test_math.py    |  41 +++++
 tests/unit/data/eval_datasets/test_mmlu.py    |  43 +++++
 tests/unit/data/test_data_processor.py        |   2 +-
 .../environments/test_math_environment.py     |  95 ++++++++++
 28 files changed, 1019 insertions(+), 131 deletions(-)
 rename examples/configs/{ => evals}/eval.yaml (92%)
 create mode 100644 examples/configs/evals/gpqa_eval.yaml
 create mode 100644 examples/configs/evals/local_eval.yaml
 create mode 100644 examples/configs/evals/math_eval.yaml
 create mode 100644 examples/prompts/gpqa.txt
 create mode 100644 examples/prompts/mmlu.txt
 create mode 100644 nemo_rl/data/eval_datasets/__init__.py
 create mode 100644 nemo_rl/data/eval_datasets/aime2024.py
 create mode 100644 nemo_rl/data/eval_datasets/gpqa.py
 create mode 100644 nemo_rl/data/eval_datasets/local_math_dataset.py
 create mode 100644 nemo_rl/data/eval_datasets/math.py
 create mode 100644 nemo_rl/data/eval_datasets/mmlu.py
 create mode 100644 nemo_rl/data/eval_datasets/mmlu_pro.py
 create mode 100644 nemo_rl/data/processors.py
 create mode 100644 nemo_rl/evals/answer_parsing.py
 create mode 100644 tests/unit/data/eval_datasets/test_gpqa.py
 create mode 100644 tests/unit/data/eval_datasets/test_math.py
 create mode 100644 tests/unit/data/eval_datasets/test_mmlu.py

diff --git a/README.md b/README.md
index cdf9404834..4dc2f7395f 100644
--- a/README.md
+++ b/README.md
@@ -377,7 +377,7 @@ uv run python examples/run_eval.py \
 ```
 > **Note:** Evaluation results may vary slightly due to various factors, such as sampling parameters, random seed, inference engine version, and inference engine settings.
 
-Refer to `examples/configs/eval.yaml` for a full list of parameters that can be overridden. For an in-depth explanation of evaluation, refer to the [Evaluation documentation](docs/guides/eval.md).
+Refer to `examples/configs/evals/eval.yaml` for a full list of parameters that can be overridden. For an in-depth explanation of evaluation, refer to the [Evaluation documentation](docs/guides/eval.md).
 
 ## Set Up Clusters
 
diff --git a/docs/guides/eval.md b/docs/guides/eval.md
index 0281bb21f7..b4f97b8c64 100644
--- a/docs/guides/eval.md
+++ b/docs/guides/eval.md
@@ -25,7 +25,7 @@ Once the conversion is complete, you can override the `generation.model_name` to
 ### Prepare the Evaluation Configuration
 **Override with Custom Settings**
 
-To run the evaluation, you can use the [default configuration file](../../examples/configs/eval.yaml). Alternatively, you can specify a custom one or override some settings via the command line.
+To run the evaluation, you can use the [default configuration file](../../examples/configs/evals/eval.yaml). Alternatively, you can specify a custom one or override some settings via the command line.
 
 The default configuration employs greedy sampling to evaluate Qwen2.5-Math-1.5B-Instruct on AIME-2024.
 
@@ -42,7 +42,7 @@ We will use the `run_eval.py` script to run an evaluation using a model directly
 Note that the evaluation script only supports the Hugging Face format model. If you haven't converted your DCP format model, you should back to [Convert DCP to HF](#convert-dcp-to-hf-optional) and follow the guide to convert your model.
 
 ```sh
-# Run evaluation script with default config (examples/configs/eval.yaml)
+# Run evaluation script with default config (examples/configs/evals/eval.yaml)
 uv run python examples/run_eval.py
 
 # Run evaluation script with converted model
@@ -51,16 +51,22 @@ uv run python examples/run_eval.py generation.model_name=$PWD/results/grpo/hf
 # Run evaluation script with custom config file
 uv run python examples/run_eval.py --config path/to/custom_config.yaml
 
+# Run evaluation script on one of the supported benchmarks (e.g., GPQA)
+uv run python examples/run_eval.py --config examples/configs/evals/gpqa_eval.yaml
+
+# Run evaluation script with a local dataset that is prefetched as a csv file.
+uv run python examples/run_eval.py --config examples/configs/evals/local_eval.yaml
+
 # Override specific config values via command line
 # Example: Evaluation of DeepScaleR-1.5B-Preview on MATH-500 using 8 GPUs
 #          Pass@1 accuracy averaged over 16 samples for each problem
 uv run python examples/run_eval.py \
+    --config examples/configs/evals/math_eval.yaml \
     generation.model_name=agentica-org/DeepScaleR-1.5B-Preview \
     generation.temperature=0.6 \
     generation.top_p=0.95 \
-    generation.vllm_cfg.max_model_len=32768 \
-    data.dataset_name=HuggingFaceH4/MATH-500 \
-    data.dataset_key=test \
+    generation.vllm_cfg.max_model_len=32768 \ 
+    data.dataset_name="math500" \
     eval.num_tests_per_prompt=16 \
     cluster.gpus_per_node=8
 ```
@@ -80,3 +86,12 @@ metric='pass@1' num_tests_per_prompt=1
 score=0.1000 (3.0/30)
 ============================================================
 ```
+
+## List of currently supported benchmarks
+
+- [AIME-2024](../../nemo_rl/data/eval_datasets/aime2024.py)
+- [GPQA and GPQA-diamond](../../nemo_rl/data/eval_datasets/gpqa.py)
+- [MATH and MATH-500](../../nemo_rl/data/eval_datasets/math.py)
+- [MMLU](../../nemo_rl/data/eval_datasets/mmlu.py)
+- [MMLU-Pro](../../nemo_rl/data/eval_datasets/mmlu_pro.py)
+
diff --git a/docs/guides/grpo.md b/docs/guides/grpo.md
index 1f63df559d..f577820a21 100644
--- a/docs/guides/grpo.md
+++ b/docs/guides/grpo.md
@@ -67,7 +67,7 @@ def my_data_processor(
 ) -> DatumSpec:
 ```
 
-We have an example of this as `math_data_processor` in [run_grpo_math.py](../../examples/run_grpo_math.py)
+We have an example of this as `math_data_processor` in [processors.py](../../nemo_rl/data/processors.py)
 
 #### Putting it all together
 
diff --git a/docs/guides/sft-openmathinstruct2.md b/docs/guides/sft-openmathinstruct2.md
index 6698c12bc0..1228d42a7d 100644
--- a/docs/guides/sft-openmathinstruct2.md
+++ b/docs/guides/sft-openmathinstruct2.md
@@ -38,7 +38,7 @@ To evaluate on the [MATH-500 benchmark](https://huggingface.co/datasets/HuggingF
 
 ```
 uv run examples/run_eval.py \
-    --config=examples/configs/eval.yaml \
+    --config=examples/configs/evals/eval.yaml \
     generation.model_name=results/sft_openmathinstruct2/step_1855/hf \
     tokenizer.name=meta-llama/Llama-3.1-8B-Instruct \
     data.dataset_name=HuggingFaceH4/MATH-500 \
diff --git a/examples/configs/eval.yaml b/examples/configs/evals/eval.yaml
similarity index 92%
rename from examples/configs/eval.yaml
rename to examples/configs/evals/eval.yaml
index e880d98bc7..eab0f1db21 100644
--- a/examples/configs/eval.yaml
+++ b/examples/configs/evals/eval.yaml
@@ -40,10 +40,7 @@ data:
   max_input_seq_length: ${generation.vllm_cfg.max_model_len} # useless since we directly use prompts in evaluation
   prompt_file: null
   system_prompt_file: null
-  dataset_name: "HuggingFaceH4/aime_2024"
-  dataset_key: "train"
-  problem_key: "problem"
-  solution_key: "answer"
+  dataset_name: "aime2024"
 
 env:
   math:
diff --git a/examples/configs/evals/gpqa_eval.yaml b/examples/configs/evals/gpqa_eval.yaml
new file mode 100644
index 0000000000..463702d3a4
--- /dev/null
+++ b/examples/configs/evals/gpqa_eval.yaml
@@ -0,0 +1,15 @@
+# GPQA evaluation Configuration
+defaults: "eval.yaml"
+
+generation:
+  model_name: "Qwen/Qwen2.5-7B-Instruct"
+  vllm_cfg:
+    max_model_len: 3072
+
+data:
+  prompt_file: "examples/prompts/gpqa.txt"
+  dataset_name: "gpqa"
+
+env:
+  math:
+    verifier_type: "multichoice"
diff --git a/examples/configs/evals/local_eval.yaml b/examples/configs/evals/local_eval.yaml
new file mode 100644
index 0000000000..ad9def2112
--- /dev/null
+++ b/examples/configs/evals/local_eval.yaml
@@ -0,0 +1,14 @@
+# Evaluation Configuration from local files.
+defaults: "eval.yaml"
+
+generation:
+  model_name: "Qwen/Qwen2.5-7B-Instruct"
+
+data:
+  prompt_file: "examples/prompts/cot.txt"
+  dataset_name: "local"
+  problem_key: "Question"
+  solution_key: "Answer"
+  split: "train"
+  data_paths: "https:\/\/openaipublic.blob.core.windows.net\/simple-evals\/math_500_test.csv"
+  file_format: "csv"
diff --git a/examples/configs/evals/math_eval.yaml b/examples/configs/evals/math_eval.yaml
new file mode 100644
index 0000000000..b42956866d
--- /dev/null
+++ b/examples/configs/evals/math_eval.yaml
@@ -0,0 +1,9 @@
+# Math evaluation Configuration
+defaults: "eval.yaml"
+
+generation:
+  model_name: "Qwen/Qwen2.5-7B-Instruct"
+
+data:
+  prompt_file: "examples/prompts/cot.txt"
+  dataset_name: "math"
diff --git a/examples/prompts/gpqa.txt b/examples/prompts/gpqa.txt
new file mode 100644
index 0000000000..04ea20d553
--- /dev/null
+++ b/examples/prompts/gpqa.txt
@@ -0,0 +1 @@
+Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
diff --git a/examples/prompts/mmlu.txt b/examples/prompts/mmlu.txt
new file mode 100644
index 0000000000..04ea20d553
--- /dev/null
+++ b/examples/prompts/mmlu.txt
@@ -0,0 +1 @@
+Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
diff --git a/examples/run_eval.py b/examples/run_eval.py
index 6f7f60cc44..89e2ede395 100644
--- a/examples/run_eval.py
+++ b/examples/run_eval.py
@@ -19,16 +19,12 @@
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from datasets import load_dataset
 from omegaconf import OmegaConf
-from transformers import AutoTokenizer
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
 
-from examples.run_grpo_math import math_data_processor
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data import MathDataConfig
 from nemo_rl.data.datasets import AllTaskProcessedDataset
-from nemo_rl.data.interfaces import TaskDataSpec
-from nemo_rl.data.llm_message_utils import remap_dataset_keys
+from nemo_rl.data.eval_datasets import load_eval_dataset
 from nemo_rl.distributed.ray_actor_environment_registry import (
     get_actor_python_env,
 )
@@ -36,6 +32,9 @@
 from nemo_rl.environments.math_environment import MathEnvironment
 from nemo_rl.evals.eval import MasterConfig, run_env_eval, setup
 from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.utils.config import load_config
+
+TokenizerType = PreTrainedTokenizerBase
 
 
 def parse_args():
@@ -54,28 +53,14 @@ def parse_args():
     return args, overrides
 
 
-def setup_data(tokenizer: AutoTokenizer, data_config: MathDataConfig, env_configs):
-    print("\n▶ Setting up data...")
-    math_task_spec = TaskDataSpec(
-        task_name="math",
-        prompt_file=data_config["prompt_file"],
-        system_prompt_file=data_config["system_prompt_file"],
-    )
+def setup_data(tokenizer: AutoTokenizer, data_config, env_configs):
+    print("Setting up data...")
 
     # load dataset
-    base_dataset = load_dataset(data_config["dataset_name"])
-    if data_config["dataset_key"] is not None:
-        base_dataset = base_dataset[data_config["dataset_key"]]
-    # remap problem and solution keys
-    remapped_dataset = remap_dataset_keys(
-        base_dataset,
-        mapping_dict={
-            data_config["problem_key"]: "problem",
-            data_config["solution_key"]: "expected_answer",
-        },
-    )
+    base_dataset = load_eval_dataset(data_config)
+    rekeyed_ds = base_dataset.rekeyed_ds
 
-    math_env = MathEnvironment.options(
+    env = MathEnvironment.options(
         runtime_env={
             "py_executable": get_actor_python_env(
                 "nemo_rl.environments.math_environment.MathEnvironment"
@@ -84,14 +69,14 @@ def setup_data(tokenizer: AutoTokenizer, data_config: MathDataConfig, env_config
     ).remote(env_configs["math"])
 
     dataset = AllTaskProcessedDataset(
-        dataset=remapped_dataset,
+        dataset=rekeyed_ds,
         tokenizer=tokenizer,
-        default_task_data_spec=math_task_spec,
-        task_data_processors=math_data_processor,
+        default_task_data_spec=base_dataset.task_spec,
+        task_data_processors=base_dataset.processor,
         max_seq_length=data_config["max_input_seq_length"],
     )
 
-    return dataset, math_env, tokenizer
+    return dataset, env, tokenizer
 
 
 def main():
@@ -100,9 +85,11 @@ def main():
     args, overrides = parse_args()
 
     if not args.config:
-        args.config = os.path.join(os.path.dirname(__file__), "configs", "eval.yaml")
+        args.config = os.path.join(
+            os.path.dirname(__file__), "configs", "evals", "eval.yaml"
+        )
 
-    config = OmegaConf.load(args.config)
+    config = load_config(args.config)
     print(f"Loaded configuration from: {args.config}")
 
     if overrides:
@@ -129,7 +116,7 @@ def main():
     # Setup data
     (
         dataset,
-        math_env,
+        env,
         tokenizer,
     ) = setup_data(tokenizer, config["data"], config["env"])
 
@@ -144,7 +131,7 @@ def main():
     run_env_eval(
         vllm_generation,
         dataloader,
-        math_env,
+        env,
         master_config,
     )
 
diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py
index 4a64d3c13b..673322eb61 100644
--- a/examples/run_grpo_math.py
+++ b/examples/run_grpo_math.py
@@ -16,9 +16,8 @@
 import os
 import pprint
 from collections import defaultdict
-from typing import Any, Optional, cast
+from typing import Any, Optional
 
-import torch
 from omegaconf import OmegaConf
 from transformers import PreTrainedTokenizerBase
 
@@ -116,74 +115,6 @@ def hf_data_processor(
     return output
 
 
-# Example of a generic math data processor
-# TaskDataProcessFnCallable
-def math_data_processor(
-    datum_dict: dict[str, Any],
-    task_data_spec: TaskDataSpec,
-    tokenizer: TokenizerType,
-    max_seq_length: int,
-    idx: int,
-) -> DatumSpec:
-    """Process a datum dictionary (directly loaded from dataset) into a DatumSpec for the Math Environment."""
-    problem = datum_dict["problem"]
-    solution = str(datum_dict["expected_answer"])
-    extra_env_info = {"ground_truth": solution}
-
-    message_log: LLMMessageLogType = []
-
-    # system prompt
-    if task_data_spec.system_prompt:
-        sys_prompt: dict[str, str | torch.Tensor] = {
-            "role": "system",
-            "content": task_data_spec.system_prompt,
-        }
-        sys = tokenizer.apply_chat_template(
-            [cast(dict[str, str], sys_prompt)],
-            tokenize=False,
-            add_generation_prompt=False,
-            add_special_tokens=False,
-        )
-        sys_prompt["token_ids"] = tokenizer(sys, return_tensors="pt")["input_ids"][0]
-        message_log.append(sys_prompt)
-
-    # user prompt
-    if task_data_spec.prompt:
-        problem = task_data_spec.prompt.format(problem)
-    user_message = {"role": "user", "content": problem}
-    message = tokenizer.apply_chat_template(
-        [user_message],
-        tokenize=False,
-        add_generation_prompt=True,
-        add_special_tokens=False,
-    )
-    user_message["token_ids"] = tokenizer(message, return_tensors="pt")["input_ids"][0]
-    user_message["content"] = message
-    message_log.append(user_message)
-
-    length = sum(len(m["token_ids"]) for m in message_log)
-
-    loss_multiplier = 1.0
-    if length > max_seq_length:
-        # make smaller and mask out
-        for indiv_message in message_log:
-            indiv_message["token_ids"] = indiv_message["token_ids"][
-                : min(4, max_seq_length // len(message_log))
-            ]
-        loss_multiplier = 0.0
-
-    output: DatumSpec = {
-        "message_log": message_log,
-        "length": length,
-        "extra_env_info": extra_env_info,
-        "loss_multiplier": loss_multiplier,
-        "idx": idx,
-    }
-    if "task_name" in datum_dict:
-        output["task_name"] = datum_dict["task_name"]
-    return output
-
-
 def setup_data(
     tokenizer: TokenizerType,
     data_config: DataConfig,
diff --git a/nemo_rl/data/eval_datasets/__init__.py b/nemo_rl/data/eval_datasets/__init__.py
new file mode 100644
index 0000000000..2e5ba97974
--- /dev/null
+++ b/nemo_rl/data/eval_datasets/__init__.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_rl.data.eval_datasets.aime2024 import AIME2024Dataset
+from nemo_rl.data.eval_datasets.gpqa import GPQADataset
+from nemo_rl.data.eval_datasets.local_math_dataset import LocalMathDataset
+from nemo_rl.data.eval_datasets.math import MathDataset
+from nemo_rl.data.eval_datasets.mmlu import MMLUDataset
+from nemo_rl.data.eval_datasets.mmlu_pro import MMLUProDataset
+
+
+def load_eval_dataset(data_config):
+    """Loads evaluation dataset."""
+    dataset_name = data_config["dataset_name"]
+    if dataset_name == "mmlu":
+        base_dataset = MMLUDataset(
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    elif dataset_name == "aime2024":
+        base_dataset = AIME2024Dataset(
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    elif dataset_name == "gpqa":
+        base_dataset = GPQADataset(
+            variant="main",
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    elif dataset_name == "gpqa_diamond":
+        base_dataset = GPQADataset(
+            variant="diamond",
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    elif dataset_name == "mmlu_pro":
+        base_dataset = MMLUProDataset(
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    elif dataset_name == "math":
+        base_dataset = MathDataset(
+            variant="math_test",
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    elif dataset_name == "math500":
+        base_dataset = MathDataset(
+            variant="math_500_test",
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    elif dataset_name == "local":
+        base_dataset = LocalMathDataset(
+            name=dataset_name,
+            data_paths=data_config["data_paths"],
+            problem_key=data_config["problem_key"],
+            solution_key=data_config["solution_key"],
+            file_format=data_config["file_format"],
+            split=data_config["split"],
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    else:
+        raise ValueError(f"Unknown dataset {dataset_name}.")
+    return base_dataset
+
+
+__all__ = [
+    "AIME2024Dataset",
+    "GPQADataset",
+    "LocalMathDataset",
+    "MathDataset",
+    "MMLUDataset",
+    "MMLUProDataset",
+]
diff --git a/nemo_rl/data/eval_datasets/aime2024.py b/nemo_rl/data/eval_datasets/aime2024.py
new file mode 100644
index 0000000000..9e585bb511
--- /dev/null
+++ b/nemo_rl/data/eval_datasets/aime2024.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""AIME 2024 dataset."""
+
+from typing import Any, Optional
+
+from datasets import load_dataset
+
+from nemo_rl.data import processors
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class AIME2024Dataset:
+    def __init__(
+        self,
+        prompt_file: Optional[str] = None,
+        system_prompt_file: Optional[str] = None,
+    ):
+        ds = load_dataset("HuggingFaceH4/aime_2024", split="train")
+        self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
+        self.task_spec = TaskDataSpec(
+            task_name="aime2024",
+            prompt_file=prompt_file,
+            system_prompt_file=system_prompt_file,
+        )
+        self.processor = processors.math_data_processor
+
+    def _rekey(self, data: dict[str, Any]):
+        return {
+            "problem": data["problem"],
+            "expected_answer": data["answer"],
+        }
diff --git a/nemo_rl/data/eval_datasets/gpqa.py b/nemo_rl/data/eval_datasets/gpqa.py
new file mode 100644
index 0000000000..f41efa136a
--- /dev/null
+++ b/nemo_rl/data/eval_datasets/gpqa.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""GPQA dataset and its variants."""
+
+import random
+from typing import Any, Literal, Optional
+
+from datasets import load_dataset
+
+from nemo_rl.data import processors
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class GPQADataset:
+    def __init__(
+        self,
+        variant: Literal["diamond", "main"] = "diamond",
+        prompt_file: Optional[str] = None,
+        system_prompt_file: Optional[str] = None,
+    ):
+        ds = load_dataset("Idavidrein/gpqa", f"gpqa_{variant}", split="train")
+        self._rng = random.Random()
+        self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
+        self.task_spec = TaskDataSpec(
+            task_name=f"GPQA_{variant}",
+            prompt_file=prompt_file,
+            system_prompt_file=system_prompt_file,
+        )
+        self.processor = processors.multichoice_qa_processor
+
+    def _rekey(self, data: dict[str, Any]):
+        choices = [
+            data["Correct Answer"],
+            data["Incorrect Answer 1"],
+            data["Incorrect Answer 2"],
+            data["Incorrect Answer 3"],
+        ]
+        permutation = self._rng.sample(range(4), 4)
+        choices = [choices[i] for i in permutation]
+        correct_index = choices.index(data["Correct Answer"])
+        correct_answer = "ABCD"[correct_index]
+        return {
+            "question": data["Question"],
+            "options": dict(
+                A=choices[0],
+                B=choices[1],
+                C=choices[2],
+                D=choices[3],
+            ),
+            "answer": correct_answer,
+        }
diff --git a/nemo_rl/data/eval_datasets/local_math_dataset.py b/nemo_rl/data/eval_datasets/local_math_dataset.py
new file mode 100644
index 0000000000..2810899b4a
--- /dev/null
+++ b/nemo_rl/data/eval_datasets/local_math_dataset.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Local math dataset."""
+
+from typing import Any, Literal, Optional
+
+from datasets import load_dataset
+
+from nemo_rl.data import processors
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class LocalMathDataset:
+    def __init__(
+        self,
+        data_paths: str | list[str],
+        problem_key: str,
+        solution_key: str,
+        name: str,
+        split: Optional[str] = None,
+        file_format: Literal["csv", "json"] = "csv",
+        prompt_file: Optional[str] = None,
+        system_prompt_file: Optional[str] = None,
+    ):
+        ds = load_dataset(file_format, data_files=data_paths)
+        if split is not None:
+            ds = ds[split]
+        self._problem_key = problem_key
+        self._solution_key = solution_key
+        self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
+        self.task_spec = TaskDataSpec(
+            task_name=name,
+            prompt_file=prompt_file,
+            system_prompt_file=system_prompt_file,
+        )
+        self.processor = processors.math_data_processor
+
+    def _rekey(self, data: dict[str, Any]):
+        return {
+            "problem": data[self._problem_key],
+            "expected_answer": data[self._solution_key],
+        }
diff --git a/nemo_rl/data/eval_datasets/math.py b/nemo_rl/data/eval_datasets/math.py
new file mode 100644
index 0000000000..290902657e
--- /dev/null
+++ b/nemo_rl/data/eval_datasets/math.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Math dataset and its variants."""
+
+from typing import Any, Literal, Optional
+
+from datasets import load_dataset
+
+from nemo_rl.data import processors
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class MathDataset:
+    def __init__(
+        self,
+        variant: Literal["math_test", "math_500_test"] = "math_test",
+        prompt_file: Optional[str] = None,
+        system_prompt_file: Optional[str] = None,
+    ):
+        ds = load_dataset(
+            "csv",
+            data_files=f"https://openaipublic.blob.core.windows.net/simple-evals/{variant}.csv",
+            split="train",
+        )
+        self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
+        self.task_spec = TaskDataSpec(
+            task_name=f"{variant}",
+            prompt_file=prompt_file,
+            system_prompt_file=system_prompt_file,
+        )
+        self.processor = processors.math_data_processor
+
+    def _rekey(self, data: dict[str, Any]):
+        return {
+            "problem": data["Question"],
+            "expected_answer": data["Answer"],
+        }
diff --git a/nemo_rl/data/eval_datasets/mmlu.py b/nemo_rl/data/eval_datasets/mmlu.py
new file mode 100644
index 0000000000..f8b75d3b56
--- /dev/null
+++ b/nemo_rl/data/eval_datasets/mmlu.py
@@ -0,0 +1,56 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""MMLU dataset and its variants."""
+
+from typing import Any, Optional
+
+from datasets import load_dataset
+
+from nemo_rl.data import processors
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class MMLUDataset:
+    def __init__(
+        self,
+        prompt_file: Optional[str] = None,
+        system_prompt_file: Optional[str] = None,
+    ):
+        ds = load_dataset(
+            "csv",
+            data_files="https://openaipublic.blob.core.windows.net/simple-evals/mmlu.csv",
+            split="train",
+        )
+        self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
+
+        self.task_spec = TaskDataSpec(
+            task_name="MMLU",
+            prompt_file=prompt_file,
+            system_prompt_file=system_prompt_file,
+        )
+        self.processor = processors.multichoice_qa_processor
+
+    def _rekey(self, data: dict[str, Any]):
+        return {
+            "question": data["Question"],
+            "options": dict(
+                A=data["A"],
+                B=data["B"],
+                C=data["C"],
+                D=data["D"],
+            ),
+            "answer": data["Answer"],
+            "subject": data["Subject"],
+        }
diff --git a/nemo_rl/data/eval_datasets/mmlu_pro.py b/nemo_rl/data/eval_datasets/mmlu_pro.py
new file mode 100644
index 0000000000..159d4d1738
--- /dev/null
+++ b/nemo_rl/data/eval_datasets/mmlu_pro.py
@@ -0,0 +1,44 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""MMLU-Pro dataset."""
+
+from typing import Any, Optional
+
+from datasets import load_dataset
+
+from nemo_rl.data import processors
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class MMLUProDataset:
+    def __init__(self, prompt_file: str, system_prompt_file: Optional[str] = None):
+        ds = load_dataset("TIGER-Lab/MMLU-Pro", split="test")
+        self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
+
+        self.task_spec = TaskDataSpec(
+            task_name="MMLU-Pro",
+            prompt_file=prompt_file,
+            system_prompt_file=system_prompt_file,
+        )
+        self.processor = processors.multichoice_qa_processor
+
+    def _rekey(self, data: dict[str, Any]):
+        options = {chr(ord("A") + i): op for i, op in enumerate(data["options"])}
+        return {
+            "question": data["question"],
+            "options": options,
+            "answer": data["answer"],
+            "subject": data["category"],
+        }
diff --git a/nemo_rl/data/processors.py b/nemo_rl/data/processors.py
new file mode 100644
index 0000000000..67e3658882
--- /dev/null
+++ b/nemo_rl/data/processors.py
@@ -0,0 +1,168 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains data processors for evaluation."""
+
+from typing import Any, cast
+
+import torch
+from transformers import PreTrainedTokenizerBase
+
+from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType, TaskDataSpec
+
+TokenizerType = PreTrainedTokenizerBase
+
+
+# Example of a generic math data processor
+def math_data_processor(
+    datum_dict: dict[str, Any],
+    task_data_spec: TaskDataSpec,
+    tokenizer: TokenizerType,
+    max_seq_length: int,
+    idx: int,
+) -> DatumSpec:
+    """Process a datum dictionary (directly loaded from dataset) into a DatumSpec for the Math Environment."""
+    problem = datum_dict["problem"]
+    solution = str(datum_dict["expected_answer"])
+    extra_env_info = {"ground_truth": solution}
+
+    message_log: LLMMessageLogType = []
+
+    # system prompt
+    if task_data_spec.system_prompt:
+        sys_prompt: dict[str, str | torch.Tensor] = {
+            "role": "system",
+            "content": task_data_spec.system_prompt,
+        }
+        sys = tokenizer.apply_chat_template(
+            [cast(dict[str, str], sys_prompt)],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+        sys_prompt["token_ids"] = tokenizer(sys, return_tensors="pt")["input_ids"][0]
+        message_log.append(sys_prompt)
+
+    # user prompt
+    if task_data_spec.prompt:
+        problem = task_data_spec.prompt.format(problem)
+    user_message = {"role": "user", "content": problem}
+    message = tokenizer.apply_chat_template(
+        [user_message],
+        tokenize=False,
+        add_generation_prompt=True,
+        add_special_tokens=False,
+    )
+    user_message["token_ids"] = tokenizer(message, return_tensors="pt")["input_ids"][0]
+    user_message["content"] = message
+    message_log.append(user_message)
+
+    length = sum(len(m["token_ids"]) for m in message_log)
+
+    loss_multiplier = 1.0
+    if length > max_seq_length:
+        # make smaller and mask out
+        for indiv_message in message_log:
+            indiv_message["token_ids"] = indiv_message["token_ids"][
+                : min(4, max_seq_length // len(message_log))
+            ]
+        loss_multiplier = 0.0
+
+    output: DatumSpec = {
+        "message_log": message_log,
+        "length": length,
+        "extra_env_info": extra_env_info,
+        "loss_multiplier": loss_multiplier,
+        "idx": idx,
+    }
+    if "task_name" in datum_dict:
+        output["task_name"] = datum_dict["task_name"]
+    return output
+
+
+def _construct_multichoice_prompt(
+    prompt: str, question: str, options: dict[str, str]
+) -> str:
+    """Construct prompt from question and options."""
+    output = prompt
+    output += f"\n\nQuestion: {question}\nOptions:\n"
+    output += "\n".join(
+        [
+            f"{letter}) {option}"
+            for letter, option in options.items()
+            if option is not None
+        ]
+    )
+    return output
+
+
+def multichoice_qa_processor(
+    datum_dict: dict[str, Any],
+    task_data_spec: TaskDataSpec,
+    tokenizer: TokenizerType,
+    max_seq_length: int,
+    idx: int,
+) -> DatumSpec:
+    """Process a datum dictionary (directly loaded from dataset) into a DatumSpec for multiple-choice problems."""
+    question = datum_dict["question"]
+    answer = str(datum_dict["answer"])
+    options = datum_dict["options"]
+    extra_env_info = {"ground_truth": answer}
+    if "subject" in datum_dict:
+        extra_env_info.update({"subject": datum_dict["subject"]})
+
+    message_log = []
+
+    # system prompt
+    if task_data_spec.system_prompt:
+        sys_prompt: dict[str, str | torch.Tensor] = {
+            "role": "system",
+            "content": task_data_spec.system_prompt,
+        }
+        sys = tokenizer.apply_chat_template(
+            [cast(dict[str, str], sys_prompt)],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+        sys_prompt["token_ids"] = tokenizer(sys, return_tensors="pt")["input_ids"][0]
+        message_log.append(sys_prompt)
+
+    # user prompt
+    if task_data_spec.prompt:
+        question = _construct_multichoice_prompt(
+            task_data_spec.prompt, question, options
+        )
+    user_message = {"role": "user", "content": question}
+    message = tokenizer.apply_chat_template(
+        [user_message],
+        tokenize=False,
+        add_generation_prompt=True,
+        add_special_tokens=False,
+    )
+    user_message["token_ids"] = tokenizer(message, return_tensors="pt")["input_ids"][0]
+    user_message["content"] = message
+    message_log.append(user_message)
+
+    length = sum(len(m["token_ids"]) for m in message_log)
+    output: DatumSpec = {
+        "message_log": message_log,
+        "length": length,
+        "extra_env_info": extra_env_info,
+        "loss_multiplier": 1.0,
+        "idx": idx,
+    }
+    if "task_name" in datum_dict:
+        output["task_name"] = datum_dict["task_name"]
+    return output
diff --git a/nemo_rl/environments/math_environment.py b/nemo_rl/environments/math_environment.py
index e8a47db06f..8dd5247f1c 100644
--- a/nemo_rl/environments/math_environment.py
+++ b/nemo_rl/environments/math_environment.py
@@ -14,6 +14,7 @@
 import contextlib
 import io
 import logging
+import re
 from typing import Any, Optional, TypedDict
 
 import ray
@@ -32,11 +33,13 @@
     calculate_pass_rate_per_prompt,
 )
 from nemo_rl.environments.utils import chunk_list_to_workers
+from nemo_rl.evals import answer_parsing
 
 
 class MathEnvConfig(TypedDict):
     num_workers: int
     stop_strings: Optional[list[str]]  # Default stop strings for this env
+    verifier_type: Optional[str]
 
 
 @contextlib.contextmanager
@@ -97,6 +100,39 @@ def verify(
         return results
 
 
+@ray.remote
+class MultichoiceVerifyWorker:
+    def verify(
+        self, pred_responses: list[str], ground_truths: list[str]
+    ) -> list[float]:
+        """Verify the correctness of the predicted responses against the ground truth.
+
+        Args:
+            pred_responses: list[str]. The predicted responses from the LLM.
+            ground_truths: list[str]. The ground truth responses.
+
+        Returns:
+            list[float]. The rewards for each predicted response.
+        """
+        results = []
+        for response, ground_truth in zip(pred_responses, ground_truths):
+            response = answer_parsing.normalize_response(response)
+            extracted_answer = None
+            for answer_regex in answer_parsing.MULTILINGUAL_ANSWER_REGEXES:
+                regex = answer_parsing.MULTILINGUAL_ANSWER_PATTERN_TEMPLATE.format(
+                    answer_regex
+                )
+                match = re.search(regex, response)
+                if match:
+                    extracted_answer = answer_parsing.normalize_extracted_answer(
+                        match.group(1)
+                    )
+                    break
+            score = 1.0 if extracted_answer == ground_truth else 0.0
+            results.append(score)
+        return results
+
+
 class MathEnvironmentMetadata(TypedDict):
     ground_truth: str
 
@@ -106,8 +142,13 @@ class MathEnvironment(EnvironmentInterface):
     def __init__(self, cfg: MathEnvConfig):
         self.cfg = cfg
         self.num_workers = cfg["num_workers"]
+        worker_cls = (
+            MultichoiceVerifyWorker
+            if cfg.get("verifier_type", "math") == "multichoice"
+            else HFVerifyWorker
+        )
         self.workers = [
-            HFVerifyWorker.options(  # type: ignore # (decorated with @ray.remote)
+            worker_cls.options(  # type: ignore # (decorated with @ray.remote)
                 runtime_env={"py_executable": PY_EXECUTABLES.SYSTEM}
             ).remote()
             for _ in range(self.num_workers)
diff --git a/nemo_rl/evals/answer_parsing.py b/nemo_rl/evals/answer_parsing.py
new file mode 100644
index 0000000000..dcf020774a
--- /dev/null
+++ b/nemo_rl/evals/answer_parsing.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains utility functions for answer parsing."""
+
+MULTILINGUAL_ANSWER_PATTERN_TEMPLATE = (
+    "(?i){}[ \t]*([A-D]|[أ-د]|[অ]|[ব]|[ড]|[ঢ]|[Ａ]|[Ｂ]|[Ｃ]|[Ｄ])"
+)
+# All the different ways "Answer" is written in different languages
+MULTILINGUAL_ANSWER_REGEXES = [
+    "Answer\s*:",
+    "Answer\s*:​​​​​​",  # Korean invisible character
+    "উত্তর\s*:",
+    "उत्तर\s*:",
+    "উত্তরঃ",
+    "উত্তর\s*:",
+    "Antwort\s*:",
+    "답변\s*:",
+    "정답\s*:",
+    "답\s*:",
+    "答案\s*：",
+    "答案\s*:",
+    "答\s*：",
+    "答\s*:",
+    "答复\s*：",
+    "答曰\s*：",
+    "الإجابة:",
+    "الجواب:",
+    "إجابة:",
+    "الإجابة النهائية:",
+    "الإجابة الصحيحة:",
+    "الإجابة الصحيحة هي:",
+    "الإجابة هي:",
+    "الجواب النهائي:",
+    "Respuesta\s*:",
+    "Risposta\s*:",
+    "答え\s*:",
+    "答え\s*：",
+    "回答\s*:",
+    "回答\s*：",
+    "解答\s*:",
+    "Jawaban\s*:",
+    "Réponse\s*:",
+    "Resposta\s*:",
+    "Jibu\s*:",
+    "Idahun\s*:",
+    "Ìdáhùn\s*:",
+    "Idáhùn\s*:",
+    "Àmọ̀nà\s*:",
+    "Àdáhùn\s*:",
+    "Ànúgọ\s*:",
+    "Àṣàyàn\s*:",
+]
+
+
+def normalize_extracted_answer(extracted_answer: str) -> str:
+    return (
+        # In arabic these are the letters used for A-D in multiple choice questions
+        extracted_answer.replace("أ", " A")
+        .replace("ب", " B")
+        .replace("ج", " C")
+        .replace("د", " D")
+        # In Bengali these are the letters used for A-D in multiple choice questions
+        .replace("অ", " A")
+        .replace("ব", " B")
+        .replace("ড", " C")
+        .replace("ঢ", " D")
+        # In Japanese these are the letters sometimes used for A-D in multiple choice questions
+        .replace("Ａ", " A")
+        .replace("Ｂ", " B")
+        .replace("Ｃ", " C")
+        .replace("Ｄ", " D")
+        .strip()
+    )
+
+
+def normalize_response(response: str) -> str:
+    """Normalize the response by removing markdown and LaTeX formatting that may prevent a match."""
+    return (
+        response.replace("**", "")
+        .replace("$\\boxed{", "")
+        .replace("}$", "")
+        .replace("\\$", "")
+        .replace("$\\text{", "")
+        .replace("$", "")
+        .replace("\\mathrm{", "")
+        .replace("\\{", "")
+        .replace("\\text", "")
+        .replace("\\(", "")
+        .replace("\\mathbf{", "")
+        .replace("{", "")
+        .replace("\\boxed", "")
+    )
diff --git a/tests/functional/test_converter_roundtrip.py b/tests/functional/test_converter_roundtrip.py
index ea865be9b2..9679fcc724 100644
--- a/tests/functional/test_converter_roundtrip.py
+++ b/tests/functional/test_converter_roundtrip.py
@@ -13,20 +13,6 @@
 # limitations under the License.
 #!/usr/bin/env python3
 
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 """
 Functional test for converter roundtrip functionality.
 
diff --git a/tests/unit/data/eval_datasets/test_gpqa.py b/tests/unit/data/eval_datasets/test_gpqa.py
new file mode 100644
index 0000000000..3441f11974
--- /dev/null
+++ b/tests/unit/data/eval_datasets/test_gpqa.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from transformers import AutoTokenizer
+
+from nemo_rl.data.eval_datasets.gpqa import GPQADataset
+
+
+@pytest.mark.skip(reason="dataset download is flaky")
+def test_gpqa_dataset():
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+    gpqa_dataset = GPQADataset()
+
+    # check that the dataset is formatted correctly
+    for example in gpqa_dataset.rekeyed_ds.take(5):
+        assert "question" in example
+        assert "options" in example
+        assert "answer" in example
+
+        ## check that applying chat template works as expected
+        default_templated = tokenizer.apply_chat_template(
+            [{"role": "user", "content": example["question"]}],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+
+        assert (
+            default_templated
+            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n"
+        )
diff --git a/tests/unit/data/eval_datasets/test_math.py b/tests/unit/data/eval_datasets/test_math.py
new file mode 100644
index 0000000000..3bab184f1a
--- /dev/null
+++ b/tests/unit/data/eval_datasets/test_math.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from transformers import AutoTokenizer
+
+from nemo_rl.data.eval_datasets.math import MathDataset
+
+
+@pytest.mark.skip(reason="dataset download is flaky")
+def test_math_dataset():
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+    math_dataset = MathDataset()
+
+    # check that the dataset is formatted correctly
+    for example in math_dataset.rekeyed_ds.take(5):
+        assert "problem" in example
+        assert "expected_answer" in example
+
+        ## check that applying chat template works as expected
+        default_templated = tokenizer.apply_chat_template(
+            [{"role": "user", "content": example["problem"]}],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+
+        assert (
+            default_templated
+            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['problem']}<|im_end|>\n"
+        )
diff --git a/tests/unit/data/eval_datasets/test_mmlu.py b/tests/unit/data/eval_datasets/test_mmlu.py
new file mode 100644
index 0000000000..02c1936003
--- /dev/null
+++ b/tests/unit/data/eval_datasets/test_mmlu.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from transformers import AutoTokenizer
+
+from nemo_rl.data.eval_datasets.mmlu import MMLUDataset
+
+
+@pytest.mark.skip(reason="dataset download is flaky")
+def test_mmlu_dataset():
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+    mmlu_dataset = MMLUDataset()
+
+    # check that the dataset is formatted correctly
+    for example in mmlu_dataset.rekeyed_ds.take(5):
+        assert "question" in example
+        assert "options" in example
+        assert "answer" in example
+        assert "subject" in example
+
+        ## check that applying chat template works as expected
+        default_templated = tokenizer.apply_chat_template(
+            [{"role": "user", "content": example["question"]}],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+
+        assert (
+            default_templated
+            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n"
+        )
diff --git a/tests/unit/data/test_data_processor.py b/tests/unit/data/test_data_processor.py
index 302dfece77..dc88bebee3 100644
--- a/tests/unit/data/test_data_processor.py
+++ b/tests/unit/data/test_data_processor.py
@@ -20,10 +20,10 @@
 abspath = os.path.abspath(__file__)
 sys.path.append("/".join(abspath.split("/")[:-4]))
 
-from examples.run_grpo_math import math_data_processor
 from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.data.datasets import AllTaskProcessedDataset
 from nemo_rl.data.interfaces import TaskDataSpec
+from nemo_rl.data.processors import math_data_processor
 from nemo_rl.models.policy import TokenizerConfig
 
 basic_tokenizer_test_config: TokenizerConfig = {
diff --git a/tests/unit/environments/test_math_environment.py b/tests/unit/environments/test_math_environment.py
index 386a21e2f8..b254f2ef5f 100644
--- a/tests/unit/environments/test_math_environment.py
+++ b/tests/unit/environments/test_math_environment.py
@@ -42,6 +42,25 @@ def math_env():
     time.sleep(0.1)
 
 
+@pytest.fixture(scope="module")
+def multichoice_env():
+    """Create a MathEnvironment actor for testing."""
+    env = MathEnvironment.options(
+        runtime_env={
+            "py_executable": get_actor_python_env(
+                "nemo_rl.environments.math_environment.MathEnvironment"
+            ),
+            "env_vars": dict(os.environ),
+        }
+    ).remote({"num_workers": 2, "verifier_type": "multichoice"})
+    yield env
+    # Clean up the actor and wait for it to be killed
+    env.shutdown.remote()
+    ray.kill(env)
+    # Give some time for cleanup
+    time.sleep(0.1)
+
+
 @pytest.fixture
 def basic_test_data():
     """Common test data for basic math problems."""
@@ -68,6 +87,41 @@ def basic_test_data():
     }
 
 
+@pytest.fixture
+def basic_multichoice_test_data():
+    """Common test data for basic multichoice problems."""
+    return {
+        "message_log_batch": [
+            [
+                {
+                    "role": "user",
+                    "content": "Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD",
+                },
+                {"role": "assistant", "content": "\nAnswer: C"},
+            ],
+            [
+                {
+                    "role": "user",
+                    "content": "Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD",
+                },
+                {"role": "assistant", "content": "\nAnswer: B"},
+            ],
+            [
+                {
+                    "role": "user",
+                    "content": "Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD",
+                },
+                {"role": "assistant", "content": "\nAnswer: D"},
+            ],
+        ],
+        "metadata": [
+            {"ground_truth": "C"},
+            {"ground_truth": "B"},
+            {"ground_truth": "B"},
+        ],
+    }
+
+
 @pytest.fixture
 def mixed_test_data():
     """Test data with mix of correct and incorrect responses."""
@@ -148,6 +202,47 @@ def test_math_env_step_basic(math_env, basic_test_data):
     assert all(result.terminateds == 1.0), "All terminated flags should be 1.0"
 
 
+def test_multichoice_env_step_basic(multichoice_env, basic_multichoice_test_data):
+    """Test basic functionality of MathEnvironment step with multichoice verifier."""
+    result = ray.get(
+        multichoice_env.step.remote(
+            basic_multichoice_test_data["message_log_batch"],
+            basic_multichoice_test_data["metadata"],
+        )
+    )
+
+    # Check observations using field access
+    assert len(result.observations) == 3, (
+        "Should return observations for all 3 messages"
+    )
+    assert all(obs["role"] == "environment" for obs in result.observations), (
+        "All observations should be from environment"
+    )
+    assert all(
+        obs["content"] == "Environment: correct" for obs in result.observations[:2]
+    ), "The first two responses should be correct"
+    assert result.observations[2]["content"] == "Environment: incorrect", (
+        "The third response should be incorrect"
+    )
+
+    # Check metadata
+    assert len(result.metadata) == 3, "Should return metadata for all 3 messages"
+    assert result.metadata == basic_multichoice_test_data["metadata"], (
+        "Metadata should be unchanged"
+    )
+
+    # Check rewards and done flags
+    assert result.rewards.shape == (3,), "Rewards should be a tensor of shape (3,)"
+    assert all(result.rewards[:2] == 1.0), (
+        "The first two rewards should be 1.0 for correct answers"
+    )
+    assert result.rewards[2] == 0.0, "The thrid  reward should be 0.0 for wrong answer"
+    assert result.terminateds.shape == (3,), (
+        "Terminated flags should be a tensor of shape (3,)"
+    )
+    assert all(result.terminateds == 1.0), "All terminated flags should be 1.0"
+
+
 def test_math_env_step_mixed(math_env, mixed_test_data):
     """Test MathEnvironment step with a mix of correct and incorrect responses."""
     result = ray.get(

From 9325915bdd4c745e70b31dbf02bab6426e5bc5a0 Mon Sep 17 00:00:00 2001
From: Parth Chadha <pchadha@nvidia.com>
Date: Thu, 3 Jul 2025 09:27:38 -0700
Subject: [PATCH 04/59] fix: enable expandable segments for hopper+ (#594)

Signed-off-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .../models/policy/dtensor_policy_worker.py    | 10 ++-----
 .../models/policy/megatron_policy_worker.py   |  9 ++-----
 nemo_rl/models/policy/utils.py                | 26 +++++++++++++++++++
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 46e1e8a52a..6872250d10 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -41,7 +41,6 @@
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
-from nemo_rl.distributed.worker_group_utils import get_nsight_config_if_pattern_matches
 from nemo_rl.models.dtensor.parallelize import (
     _parallelize_model,
     clip_grad_by_total_norm_,
@@ -57,6 +56,7 @@
 )
 from nemo_rl.models.policy.utils import (
     get_gpu_info,
+    get_runtime_env_for_policy_worker,
     import_class_from_path,
     sliding_window_overwrite,
 )
@@ -114,13 +114,7 @@ def get_cpu_state_dict(
     return new_state_dict
 
 
-@ray.remote(
-    runtime_env={
-        # TODO: This option causes a crash on Ampere. It's okay to enable on Hopper.
-        # "env_vars": {"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True"},
-        **get_nsight_config_if_pattern_matches("dtensor_policy_worker"),
-    }
-)
+@ray.remote(runtime_env=get_runtime_env_for_policy_worker("dtensor_policy_worker"))
 class DTensorPolicyWorker:
     def __repr__(self) -> str:
         """Customizes the actor's prefix in the Ray logs.
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 89eb263674..9113723af0 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -89,7 +89,6 @@
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.distributed.model_utils import from_parallel_logits_to_logprobs
 from nemo_rl.distributed.named_sharding import NamedSharding
-from nemo_rl.distributed.worker_group_utils import get_nsight_config_if_pattern_matches
 from nemo_rl.models.generation.interfaces import (
     GenerationDatumSpec,
     GenerationOutputSpec,
@@ -112,7 +111,7 @@
     LogprobOutputSpec,
     ReferenceLogprobOutputSpec,
 )
-from nemo_rl.models.policy.utils import get_gpu_info
+from nemo_rl.models.policy.utils import get_gpu_info, get_runtime_env_for_policy_worker
 
 TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase)
 
@@ -322,11 +321,7 @@ def destroy_parallel_state():
         pass
 
 
-@ray.remote(
-    runtime_env={
-        **get_nsight_config_if_pattern_matches("megatron_policy_worker"),
-    }
-)
+@ray.remote(runtime_env=get_runtime_env_for_policy_worker("megatron_policy_worker"))
 class MegatronPolicyWorker:
     def __repr__(self):
         """Customizes the actor's prefix in the Ray logs.
diff --git a/nemo_rl/models/policy/utils.py b/nemo_rl/models/policy/utils.py
index f19914576d..485dea9011 100644
--- a/nemo_rl/models/policy/utils.py
+++ b/nemo_rl/models/policy/utils.py
@@ -19,6 +19,8 @@
 import torch
 from transformers import AutoConfig
 
+from nemo_rl.distributed.worker_group_utils import get_nsight_config_if_pattern_matches
+
 
 def import_class_from_path(name: str) -> Any:
     """Import a class from a string path (e.g. 'torch.optim.AdamW').
@@ -127,3 +129,27 @@ def sliding_window_overwrite(model_name: str) -> dict[str, Any]:
         )
 
     return overwrite_dict
+
+
+def get_runtime_env_for_policy_worker(policy_worker_name: str) -> dict[str, Any]:
+    """Get runtime environment configuration for DTensorPolicyWorker.
+
+    Conditionally enables expandable_segments on Hopper GPUs only,
+    as it causes crashes on Ampere GPUs.
+    """
+    runtime_env = {
+        **get_nsight_config_if_pattern_matches(policy_worker_name),
+    }
+
+    # Only enable expandable_segments on Hopper and newer architectures (compute capability 9.x+)
+    try:
+        compute_capability = torch.cuda.get_device_properties(0).major
+        if compute_capability >= 9:  # Hopper+
+            runtime_env["env_vars"] = {
+                "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True"
+            }
+    except Exception:
+        # If we can't detect GPU capability, don't enable expandable_segments for safety
+        pass
+
+    return runtime_env

From ab84468b8afab3cf41bc9850d706c037225c5939 Mon Sep 17 00:00:00 2001
From: Jimmy Zhang <133159885+jiemingz@users.noreply.github.com>
Date: Thu, 3 Jul 2025 15:15:23 -0400
Subject: [PATCH 05/59] feat: Enable vLLM cudagraphs (#498)

Signed-off-by: Jimmy Zhang <jiemingz@nvidia.com>
Signed-off-by: Jimmy Zhang <133159885+jiemingz@users.noreply.github.com>
Co-authored-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/evals/eval.yaml                            | 1 +
 examples/configs/grpo-deepscaler-1.5b-8K.yaml               | 1 +
 examples/configs/grpo_math_1B.yaml                          | 1 +
 examples/configs/grpo_math_8B.yaml                          | 1 +
 .../recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml        | 1 +
 .../grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml   | 1 +
 .../grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml    | 1 +
 .../llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml     | 1 +
 .../grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml  | 1 +
 .../llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml   | 1 +
 .../recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml | 1 +
 .../llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml    | 1 +
 .../grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml   | 1 +
 nemo_rl/models/generation/vllm.py                           | 6 ++++--
 tests/unit/experience/test_rollouts.py                      | 1 +
 tests/unit/models/generation/test_vllm_generation.py        | 1 +
 tests/unit/models/generation/test_vllm_large_model.py       | 1 +
 17 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/examples/configs/evals/eval.yaml b/examples/configs/evals/eval.yaml
index eab0f1db21..85e193dcae 100644
--- a/examples/configs/evals/eval.yaml
+++ b/examples/configs/evals/eval.yaml
@@ -22,6 +22,7 @@ generation:
     pipeline_parallel_size: 1
     gpu_memory_utilization: 0.9
     max_model_len: 2048
+    enforce_eager: False
   colocated:
     # true: generation shares training GPUs
     # false: uses dedicated generation resources
diff --git a/examples/configs/grpo-deepscaler-1.5b-8K.yaml b/examples/configs/grpo-deepscaler-1.5b-8K.yaml
index 1013f3d4c2..ce5ed73c17 100644
--- a/examples/configs/grpo-deepscaler-1.5b-8K.yaml
+++ b/examples/configs/grpo-deepscaler-1.5b-8K.yaml
@@ -99,6 +99,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
+      enforce_eager: False
       # For most cases, use "dummy" to load the initial weights, since they will be overwritten during refit
       # For Gemma models, we need to use "auto" due to a vllm bug
       load_format: dummy
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
index 1842b01497..fd944fa9e7 100644
--- a/examples/configs/grpo_math_1B.yaml
+++ b/examples/configs/grpo_math_1B.yaml
@@ -107,6 +107,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
+      enforce_eager: False
     colocated:
       # true: generation shares training GPUs
       # false: uses dedicated generation resources
diff --git a/examples/configs/grpo_math_8B.yaml b/examples/configs/grpo_math_8B.yaml
index 429a1d7663..a857b08858 100644
--- a/examples/configs/grpo_math_8B.yaml
+++ b/examples/configs/grpo_math_8B.yaml
@@ -58,6 +58,7 @@ policy:
       tensor_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
+      enforce_eager: False
 
 cluster:
   gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
index 1248c28622..6bbcd95edd 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
@@ -89,6 +89,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
index 2458739e2e..af4bb6945d 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
@@ -90,6 +90,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 16384
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
index 8f6327e1e9..b854eb7d38 100644
--- a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
@@ -90,6 +90,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 4096
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
index cd05c86dbb..9f92be089b 100644
--- a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -90,6 +90,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
index c5ebb4f8eb..2a1a151ea5 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
@@ -90,6 +90,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 16384
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
index 6d7a858749..06ae6b4637 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
@@ -90,6 +90,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 16384
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
index bd22cd760e..fe2de660ce 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
@@ -87,6 +87,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 4096
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
index d6176ddd22..00a40de4d0 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
@@ -90,6 +90,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 4096
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
index d1303bb444..d3bbc266f2 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -90,6 +90,7 @@ policy:
       pipeline_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: 512
+      enforce_eager: False
     colocated:
       enabled: true
       resources:
diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index 9506a063d3..64e97c3314 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -131,6 +131,9 @@ def configure_worker(
                 seed = node_idx * 1024 + bundle_id
 
             init_kwargs["seed"] = seed
+            # Need to give each DP group its own vllm cache to address:
+            # https://github.com/vllm-project/vllm/issues/18851
+            env_vars["VLLM_CACHE_ROOT"] = os.path.expanduser(f"~/.cache/vllm_{seed}")
 
         # Check if this worker is part of a parallel group (TP or TP+PP).
         # A worker is part of a parallel group if it's a secondary member (local_bundle_indices is None)
@@ -334,8 +337,7 @@ def _patch_vllm_init_workers_ray():
             enable_prefix_caching=torch.cuda.get_device_capability()[0] >= 8,
             dtype=self.cfg["vllm_cfg"]["precision"],
             seed=seed,
-            # Don't use cuda-graph by default as it leads to convergence issues (see https://github.com/NVIDIA-NeMo/RL/issues/186)
-            enforce_eager=True,
+            enforce_eager=self.cfg["vllm_cfg"]["enforce_eager"],
             max_model_len=self.cfg["vllm_cfg"]["max_model_len"],
             trust_remote_code=True,
             worker_extension_cls="nemo_rl.models.generation.vllm_backend.VllmInternalWorkerExtension",
diff --git a/tests/unit/experience/test_rollouts.py b/tests/unit/experience/test_rollouts.py
index 08d1c0ffd6..db41fe2d39 100644
--- a/tests/unit/experience/test_rollouts.py
+++ b/tests/unit/experience/test_rollouts.py
@@ -241,6 +241,7 @@ def initial_multi_step_calculator_batch(rollout_tokenizer):
         "disable_log_stats": True,
         "disable_log_requests": True,
         "gpu_memory_utilization": 0.6,
+        "enforce_eager": "False",
     },
     "colocated": {
         "enabled": True,
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 1404b02337..8371fababb 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -56,6 +56,7 @@
         "async_engine": False,  # Default to False for synchronous tests
         "skip_tokenizer_init": False,
         "load_format": "auto",
+        "enforce_eager": "False",
     },
     "colocated": {
         "enabled": True,
diff --git a/tests/unit/models/generation/test_vllm_large_model.py b/tests/unit/models/generation/test_vllm_large_model.py
index 9735b5f03d..d24a0c0f31 100644
--- a/tests/unit/models/generation/test_vllm_large_model.py
+++ b/tests/unit/models/generation/test_vllm_large_model.py
@@ -50,6 +50,7 @@
         "async_engine": True,
         "skip_tokenizer_init": False,
         "load_format": "auto",
+        "enforce_eager": "False",
     },
     "colocated": {
         "enabled": True,

From b14ae901464aa6cbb986cdd02ab01cdfab88b4ba Mon Sep 17 00:00:00 2001
From: abukharin-nv <abukharin@nvidia.com>
Date: Thu, 3 Jul 2025 17:09:27 -0400
Subject: [PATCH 06/59] docs: Update guide to include minimum compute
 requirement (#505)

Signed-off-by: abukharin-nv <abukharin@nvidia.com>
Co-authored-by: Sahil Jain <48468750+SahilJain314@users.noreply.github.com>
Co-authored-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 docs/guides/grpo-deepscaler.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/grpo-deepscaler.md b/docs/guides/grpo-deepscaler.md
index 4404b42949..bc177f2fda 100644
--- a/docs/guides/grpo-deepscaler.md
+++ b/docs/guides/grpo-deepscaler.md
@@ -19,7 +19,7 @@ At the end of each stage, you need to specify the Hugging Face checkpoint to con
 uv run examples/converters/convert_dcp_to_hf.py --config=results/grpo-deepscaler-1.5b-8K/step_240/config.yaml --dcp-ckpt-path=results/grpo-deepscaler-1.5b-8K/step_240/policy/weights --hf-ckpt-path=results/grpo-deepscaler-1.5b-8K/step_240/hf
 ```
 
-When running the next command, we use the Hugging Face checkpoint as the initial checkpoint. We train with an 8K context window for 240 steps, a 16K context window for 290 steps, and a 24K context window for 50 steps. We run all experiments on a single 8XH100 80GB node or on a single 8XA100 80GB node.
+When running the next command, we use the Hugging Face checkpoint as the initial checkpoint. We train with an 8K context window for 240 steps, a 16K context window for 290 steps, and a 24K context window for 50 steps. We run all experiments on a single 8XH100 80GB node. If you're running on 8XA100 80GB, you will need at least 1 node for 8K training and 2 nodes for 16-24k training.
 
 ## Training Curve
 When using the above commands, we get the following training curve:

From e694d38358450fe2dd6a53602a1771967f7a76c4 Mon Sep 17 00:00:00 2001
From: yuki <48991475+yuki-666@users.noreply.github.com>
Date: Fri, 4 Jul 2025 12:43:56 +0800
Subject: [PATCH 07/59] fix: skip HelpSteer3 unit test if downloading failed
 (#612)

Signed-off-by: Yuki Huang <yukih@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 tests/unit/data/hf_datasets/test_helpsteer.py | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/tests/unit/data/hf_datasets/test_helpsteer.py b/tests/unit/data/hf_datasets/test_helpsteer.py
index 5573179b99..036ba75669 100644
--- a/tests/unit/data/hf_datasets/test_helpsteer.py
+++ b/tests/unit/data/hf_datasets/test_helpsteer.py
@@ -13,12 +13,24 @@
 # limitations under the License.
 
 
+import pytest
+
 from nemo_rl.data.hf_datasets.helpsteer3 import (
     HelpSteer3Dataset,
     format_helpsteer3,
 )
 
 
+@pytest.fixture(scope="module")
+def helpsteer3_dataset():
+    try:
+        dataset = HelpSteer3Dataset()
+        yield dataset
+    except Exception as e:
+        print(f"Error during loading HelpSteer3Dataset: {e}")
+        yield
+
+
 def test_format_helpsteer3():
     """Test the format_helpsteer3 function with different preference values."""
     # Test case 1: response1 is preferred (overall_preference < 0)
@@ -60,19 +72,23 @@ def test_format_helpsteer3():
     assert result3["rejected_response"] == "It's sunny today."
 
 
-def test_helpsteer3_dataset_initialization():
+def test_helpsteer3_dataset_initialization(helpsteer3_dataset):
     """Test that HelpSteer3Dataset initializes correctly."""
 
-    dataset = HelpSteer3Dataset()
+    dataset = helpsteer3_dataset
+    if dataset is None:
+        pytest.skip("dataset download is flaky")
 
     # Verify dataset initialization
     assert dataset.task_spec.task_name == "HelpSteer3"
 
 
-def test_helpsteer3_dataset_data_format():
+def test_helpsteer3_dataset_data_format(helpsteer3_dataset):
     """Test that HelpSteer3Dataset correctly formats the data."""
 
-    dataset = HelpSteer3Dataset()
+    dataset = helpsteer3_dataset
+    if dataset is None:
+        pytest.skip("dataset download is flaky")
 
     assert isinstance(dataset.formatted_ds, dict)
     assert "train" in dataset.formatted_ds

From 4e9bd60e87ad50abe3f833b3c85e8fb2f122b929 Mon Sep 17 00:00:00 2001
From: Jonas Yang CN <joyang@nvidia.com>
Date: Sat, 5 Jul 2025 13:57:22 +0800
Subject: [PATCH 08/59] feat: optimize get logprobs when cp enabled. (#528)

Signed-off-by: Jonas yang <joyang@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/algorithms/loss_functions.py          |  9 ++--
 nemo_rl/distributed/model_utils.py            | 45 ++++++++++++++++---
 nemo_rl/models/dtensor/parallelize.py         | 22 ++++++---
 .../models/policy/dtensor_policy_worker.py    | 31 ++++++++-----
 .../models/policy/megatron_policy_worker.py   |  2 +-
 5 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/nemo_rl/algorithms/loss_functions.py b/nemo_rl/algorithms/loss_functions.py
index 1078da5fa3..1bf472d830 100644
--- a/nemo_rl/algorithms/loss_functions.py
+++ b/nemo_rl/algorithms/loss_functions.py
@@ -122,6 +122,7 @@ def __call__(
         prev_logprobs = data["prev_logprobs"][:, 1:]
         generation_logprobs = data["generation_logprobs"][:, 1:]
         reference_policy_logprobs = data["reference_policy_logprobs"][:, 1:]
+        seq_index = data.get("seq_index", None)
 
         mask = token_mask * sample_mask.unsqueeze(-1)
 
@@ -146,12 +147,12 @@ def __call__(
                 data["input_ids"],
                 vocab_start_index=vocab_parallel_rank * next_token_logits.shape[-1],
                 vocab_end_index=(vocab_parallel_rank + 1) * next_token_logits.shape[-1],
-                group=vocab_parallel_group,
+                tp_group=vocab_parallel_group,
                 inference_only=False,
             )
         elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
             curr_logprobs = get_logprobs_from_vocab_parallel_logits(
-                next_token_logits, data["input_ids"]
+                next_token_logits, data["input_ids"], seq_index=seq_index
             )
         else:
             next_token_logits_wo_last = next_token_logits[
@@ -332,7 +333,7 @@ def __call__(
                 data["input_ids"],
                 vocab_start_index=vocab_parallel_rank * next_token_logits.shape[-1],
                 vocab_end_index=(vocab_parallel_rank + 1) * next_token_logits.shape[-1],
-                group=vocab_parallel_group,
+                tp_group=vocab_parallel_group,
                 inference_only=False,
             )
         elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
@@ -480,7 +481,7 @@ def _preference_loss(
                 data["input_ids"],
                 vocab_start_index=vocab_parallel_rank * next_token_logits.shape[-1],
                 vocab_end_index=(vocab_parallel_rank + 1) * next_token_logits.shape[-1],
-                group=vocab_parallel_group,
+                tp_group=vocab_parallel_group,
                 inference_only=False,
             )
         elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
diff --git a/nemo_rl/distributed/model_utils.py b/nemo_rl/distributed/model_utils.py
index f1e2e6ac81..31ac71cc23 100644
--- a/nemo_rl/distributed/model_utils.py
+++ b/nemo_rl/distributed/model_utils.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any
+from typing import Any, Optional
 
 import torch
+from torch.distributed.tensor import DTensor, distribute_tensor
 
 
 @torch.no_grad()
@@ -121,11 +122,12 @@ def backward(
 
 def from_parallel_logits_to_logprobs(
     vocab_parallel_logits: torch.Tensor,
-    target: torch.Tensor,
+    target: torch.Tensor | DTensor,
     vocab_start_index: int,
     vocab_end_index: int,
-    group: torch.distributed.ProcessGroup,
+    tp_group: torch.distributed.ProcessGroup,
     inference_only: bool = False,
+    seq_index: Optional[torch.Tensor] = None,
 ) -> torch.Tensor:
     """Get log probabilities from TP sharded vocab logits.
 
@@ -136,8 +138,10 @@ def from_parallel_logits_to_logprobs(
             NOTE: Must be the unmodified targets as this function will shift them internally.
         vocab_start_index (int): Starting vocabulary index for this worker's partition.
         vocab_end_index (int): Ending vocabulary index for this worker's partition.
-        group (torch.distributed.ProcessGroup): Process group for distributed communication.
+        tp_group (torch.distributed.ProcessGroup): Process group for distributed communication.
         inference_only (bool, optional): If True, tensors won't be saved for backward pass. Defaults to False.
+        seq_index (Optional[torch.Tensor]): Sequence index tensor with shape [seq_len].
+            It is only provided for cp sharded logits. It represents how tensor is sharded across the sequence dimension.
 
     Returns:
         torch.Tensor: Log probabilities tensor with shape [batch_size, seq_len-1].
@@ -145,13 +149,42 @@ def from_parallel_logits_to_logprobs(
 
     Taken from: https://github.com/NVIDIA/NeMo-Aligner/blob/9faab404f21994a7eb1d6ed5890b76152b941636/nemo_aligner/utils/distributed.py#L354
     """
-    target = target.roll(shifts=-1, dims=-1)
+    cp_size = 1
+
+    if isinstance(target, DTensor) and "cp" in target.device_mesh.mesh_dim_names:
+        cp_dim_index = target.device_mesh.mesh_dim_names.index("cp")
+        cp_size = target.device_mesh.shape[cp_dim_index]
+
+    if cp_size > 1:
+        assert seq_index is not None, "seq_index must be provided for cp sharded logits"
+        target_shape = torch.Size(target.shape)
+        cp_mesh = target.device_mesh
+        cp_placements = target.placements
+        _, sorted_indices = torch.sort(seq_index)
+        # Recover the original order of the target
+        target = target.full_tensor()[:, sorted_indices]
+        target = target.roll(shifts=-1, dims=-1)[:, seq_index]
+
+        # Reshard
+        target = distribute_tensor(target, cp_mesh, cp_placements)
+        target = target.to_local()
+    else:
+        target = target.roll(shifts=-1, dims=-1)
+
     probs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
         vocab_parallel_logits,
         target,
         vocab_start_index,
         vocab_end_index,
-        group,
+        tp_group,
         inference_only,
     ).contiguous()
+
+    if cp_size > 1:
+        # probs is sharded on the sequence dimension.
+        # Get full sequence tensor, vocab dim has been reduced already.
+        probs_dtensor = DTensor.from_local(probs, cp_mesh, cp_placements)
+        probs = probs_dtensor.full_tensor()[:, sorted_indices]
+        assert probs.shape == target_shape
+
     return probs[:, :-1]
diff --git a/nemo_rl/models/dtensor/parallelize.py b/nemo_rl/models/dtensor/parallelize.py
index fb9c720c20..370624a163 100644
--- a/nemo_rl/models/dtensor/parallelize.py
+++ b/nemo_rl/models/dtensor/parallelize.py
@@ -616,7 +616,9 @@ def get_grad_norm(
 
 
 def get_logprobs_from_vocab_parallel_logits(
-    vocab_parallel_logits: DTensor, input_ids: torch.Tensor
+    vocab_parallel_logits: DTensor,
+    input_ids: torch.Tensor | DTensor,
+    seq_index: Optional[torch.Tensor] = None,
 ):
     """Computes log probabilities from vocabulary-parallel logits.
 
@@ -632,16 +634,26 @@ def get_logprobs_from_vocab_parallel_logits(
     Returns:
         torch.Tensor: Log probabilities for the given input IDs.
     """
-    tp_mesh = vocab_parallel_logits.device_mesh
-    tp_rank: int = tp_mesh.get_local_rank()
+    device_mesh = vocab_parallel_logits.device_mesh
+    if seq_index is not None:
+        assert "cp" in device_mesh.mesh_dim_names, (
+            "seq_index must be provided for cp sharded logits"
+        )
+
+    tp_size = 1
+
+    tp_group = device_mesh.get_group("tp")
+    tp_rank = tp_group.rank()
+    tp_size = tp_group.size()
 
-    vocab_interval_per_rank = vocab_parallel_logits.shape[-1] // tp_mesh.size()
+    vocab_interval_per_rank = vocab_parallel_logits.shape[-1] // tp_size
 
     return from_parallel_logits_to_logprobs(
         vocab_parallel_logits.to_local(),
         input_ids,
         vocab_interval_per_rank * tp_rank,
         (tp_rank + 1) * vocab_interval_per_rank,
-        tp_mesh.get_group(),
+        tp_group,
         inference_only=not torch.is_grad_enabled(),
+        seq_index=seq_index,
     )
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 6872250d10..b94e95fd72 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -574,7 +574,8 @@ def train(
                                 .full_tensor()
                                 .squeeze(0)
                             )
-                            _, sorted_indices = torch.sort(seq_index_dtensor)
+
+                            mb["seq_index"] = seq_index_dtensor
 
                             for tensor_name in mb:
                                 current_tensor = mb[tensor_name]
@@ -587,18 +588,28 @@ def train(
                                             current_tensor,
                                             device_mesh=self.cp_mesh,
                                             placements=[Shard(sequence_dim)],
-                                        ).full_tensor()[:, sorted_indices]
+                                        )
                                         break
 
                             if isinstance(logits, DTensor):
-                                logits = logits.full_tensor()
-
-                            logits_dtensor = DTensor.from_local(
-                                logits,
-                                device_mesh=self.cp_mesh,
-                                placements=[Shard(sequence_dim)],
-                            )
-                            logits = logits_dtensor.full_tensor()[:, sorted_indices]
+                                # Must be tp sharded
+                                assert (
+                                    logits.device_mesh.ndim == 1
+                                    and logits.device_mesh.mesh_dim_names[0] == "tp"
+                                ), "logits must be tp sharded"
+
+                                # CP is implicitly sharded on the seq dim, so we need to redistribute to the tp dim
+                                logits = DTensor.from_local(
+                                    logits.to_local(),
+                                    device_mesh=self.device_mesh[("cp", "tp")],
+                                    placements=[Shard(sequence_dim), Shard(-1)],
+                                )
+                            else:
+                                logits = DTensor.from_local(
+                                    logits,
+                                    device_mesh=self.device_mesh[("cp", "tp")],
+                                    placements=[Shard(sequence_dim), Shard(-1)],
+                                )
 
                         loss, loss_metrics = loss_fn(
                             logits, mb, global_valid_seqs, global_valid_toks
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 9113723af0..415da80a40 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -942,7 +942,7 @@ def collection_fn(output_tensor):
                     target=input_ids,
                     vocab_start_index=tp_rank * output_tensor.shape[-1],
                     vocab_end_index=(tp_rank + 1) * output_tensor.shape[-1],
-                    group=tp_grp,
+                    tp_group=tp_grp,
                     inference_only=True,
                 )
 

From 6d4594908d46fce4fd3a0c1fa899230ed363b4a0 Mon Sep 17 00:00:00 2001
From: Jimmy Zhang <133159885+jiemingz@users.noreply.github.com>
Date: Sat, 5 Jul 2025 05:13:48 -0400
Subject: [PATCH 09/59] enable mcore rope fusion (#608)

Signed-off-by: Jimmy Zhang <jiemingz@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/dpo.yaml                                     | 4 +++-
 examples/configs/grpo_math_1B_megatron.yaml                   | 4 +++-
 .../recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml   | 4 +++-
 .../dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml   | 4 +++-
 .../recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml   | 4 +++-
 examples/configs/sft.yaml                                     | 2 ++
 nemo_rl/models/policy/megatron_policy_worker.py               | 1 +
 tests/unit/models/generation/test_vllm_generation.py          | 1 +
 tests/unit/models/policy/test_megatron_worker.py              | 1 +
 9 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
index db6fb7fa6d..25f5b59ef2 100755
--- a/examples/configs/dpo.yaml
+++ b/examples/configs/dpo.yaml
@@ -98,7 +98,9 @@ policy:
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: true
-
+    #gives ~20% training perf speedup with sequence packing 
+    apply_rope_fusion: True
+    
     optimizer:
       optimizer: "adam"
       lr: 5.0e-6 #4.0e-5
diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index 237fbb0df1..e9bcbf20b8 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -79,7 +79,9 @@ policy:
     context_parallel_size: 1
     pipeline_dtype: ${policy.precision}
     sequence_parallel: false
-
+    #gives ~20% training perf speedup with sequence packing 
+    apply_rope_fusion: True
+    
     optimizer:
       optimizer: "adam"
       lr: 5.0e-6
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
index 1fd336d0b4..5b2b073691 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
@@ -55,7 +55,9 @@ policy:
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: true
-
+    #gives ~20% training perf speedup with sequence packing 
+    apply_rope_fusion: True
+    
     optimizer:
       optimizer: "adam"
       lr: 5.0e-6 #4.0e-5
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
index 73008f3154..9388e8ed6f 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
@@ -55,7 +55,9 @@ policy:
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: true
-
+    #gives ~20% training perf speedup with sequence packing 
+    apply_rope_fusion: True
+    
     optimizer:
       optimizer: "adam"
       lr: 5.0e-6 #4.0e-5
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
index ddd53920e6..68f0d177cd 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
@@ -43,7 +43,9 @@ policy:
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: false
-
+    #gives ~20% training perf speedup with sequence packing 
+    apply_rope_fusion: True
+    
     optimizer:
       optimizer: "adam"
       lr: 5.0e-6
diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
index e3c614e2a7..4127411af6 100644
--- a/examples/configs/sft.yaml
+++ b/examples/configs/sft.yaml
@@ -73,6 +73,8 @@ policy:
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: false
+    #gives ~20% training perf speedup with sequence packing 
+    apply_rope_fusion: True   
 
     optimizer:
       optimizer: "adam"
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 415da80a40..36bc8b7b76 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -466,6 +466,7 @@ def __init__(
                 "a lambda and couldn't be serialized). This is based on this check "
                 "https://github.com/NVIDIA/Megatron-LM/blob/1ab876ddc4c1893c76f26d775226a8d1dcdfb3d2/megatron/core/transformer/mlp.py#L174."
             )
+        model_cfg.apply_rope_fusion = self.cfg["megatron_cfg"]["apply_rope_fusion"]
 
         checkpoint_config = CheckpointConfig(
             save_interval=100,
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 8371fababb..612630ded0 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -163,6 +163,7 @@ def get_basic_megatron_test_config(
             "context_parallel_size": 1,
             "pipeline_dtype": precision,
             "sequence_parallel": sequence_parallel,
+            "apply_rope_fusion": True,
             "optimizer": {
                 "optimizer": "adam",
                 "lr": 5.0e-6,
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index 03f41a8b1f..7b56977258 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -79,6 +79,7 @@ def create_megatron_test_config(
             "context_parallel_size": 1,
             "pipeline_dtype": precision,
             "sequence_parallel": sequence_parallel,
+            "apply_rope_fusion": True,
             "optimizer": {
                 "optimizer": "adam",
                 "lr": 5.0e-6,

From d1bd4a51c9704dfefba8d356b7b16e08c72fe5ae Mon Sep 17 00:00:00 2001
From: yuki <48991475+yuki-666@users.noreply.github.com>
Date: Sun, 6 Jul 2025 12:17:42 +0800
Subject: [PATCH 10/59] fix: fix non-colocated with vllm tp>1 (#601)

Signed-off-by: Yuki Huang <yukih@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/distributed/worker_groups.py          | 19 +++++++---
 nemo_rl/models/generation/vllm.py             | 13 ++++---
 .../models/policy/dtensor_policy_worker.py    |  2 +-
 .../models/generation/test_vllm_generation.py | 36 ++++++++++++-------
 4 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/nemo_rl/distributed/worker_groups.py b/nemo_rl/distributed/worker_groups.py
index c2e849cbee..b008452f1c 100644
--- a/nemo_rl/distributed/worker_groups.py
+++ b/nemo_rl/distributed/worker_groups.py
@@ -317,6 +317,7 @@ def __init__(
         name_prefix: str = "",
         bundle_indices_list: Optional[list[tuple[int, list[int]]]] = None,
         sharding_annotations: Optional[NamedSharding] = None,
+        env_vars: dict[str, str] = {},
     ):
         """Initialize a group of distributed Ray workers.
 
@@ -391,7 +392,7 @@ def __init__(
 
         # Create workers based on the bundle_indices_list
         self._create_workers_from_bundle_indices(
-            remote_worker_builder, bundle_indices_list
+            remote_worker_builder, bundle_indices_list, env_vars=env_vars
         )
 
     def get_dp_leader_worker_idx(self, dp_shard_idx: int) -> int:
@@ -407,6 +408,7 @@ def _create_workers_from_bundle_indices(
         self,
         remote_worker_builder: RayWorkerBuilder,
         bundle_indices_list: list[tuple[int, list[int]]],
+        env_vars: dict[str, str] = {},
     ) -> None:
         """Create workers based on explicit bundle indices for tied worker groups.
 
@@ -421,6 +423,10 @@ def _create_workers_from_bundle_indices(
             self.cluster.get_master_address_and_port()
         )
 
+        # Update env_vars with the current environment variables
+        env_vars.update(dict(os.environ))
+
+        # Get the python environment for the actor
         actor_python_env = get_actor_python_env(
             remote_worker_builder.ray_actor_class_fqn
         )
@@ -459,8 +465,8 @@ def _create_workers_from_bundle_indices(
 
             for local_rank, bundle_idx in enumerate(local_bundle_indices):
                 # Set up basic distributed environment variables
-                env_vars = dict(os.environ)
-                env_vars.update(
+                worker_env_vars = deepcopy(env_vars)
+                worker_env_vars.update(
                     {
                         "RANK": str(global_rank),
                         "LOCAL_RANK": str(bundle_idx),
@@ -470,7 +476,7 @@ def _create_workers_from_bundle_indices(
                         "NODE_RANK": str(pg_idx),
                     }
                 )
-                env_vars.pop("RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES", None)
+                worker_env_vars.pop("RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES", None)
 
                 # Only the first worker in each group gets bundle_indices
                 # This ensures only one worker per group is the model owner
@@ -494,7 +500,10 @@ def _create_workers_from_bundle_indices(
                 )
 
                 # Pass these options to the remote_worker_builder
-                runtime_env = {"env_vars": env_vars, "py_executable": py_executable}
+                runtime_env = {
+                    "env_vars": worker_env_vars,
+                    "py_executable": py_executable,
+                }
                 runtime_env["env_vars"]["VIRTUAL_ENV"] = py_executable
                 runtime_env["env_vars"]["UV_PROJECT_ENVIRONMENT"] = py_executable
 
diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index 64e97c3314..9d7c7873e9 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -319,10 +319,6 @@ def _patch_vllm_init_workers_ray():
         os.environ["VLLM_USE_V1"] = os.environ.get("NRL_VLLM_USE_V1", "1")
         os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
 
-        if not self.cfg["colocated"]["enabled"]:
-            os.environ["NCCL_SHM_DISABLE"] = "1"
-            os.environ["NCCL_P2P_DISABLE"] = "1"
-
         load_format = self.cfg["vllm_cfg"]["load_format"]
         if ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(self.model_name):
             load_format = "auto"
@@ -1225,6 +1221,13 @@ def __init__(
             "nemo_rl.models.generation.vllm.VllmGenerationWorker", config
         )
 
+        # It's necessary to set env_vars here to ensure that vllm non-leader workers also have these env_vars
+        # Disable NCCL SHM if training and generation are not co-located: https://github.com/NVIDIA-NeMo/RL/issues/564
+        env_vars = {}
+        if not self.cfg["colocated"]["enabled"]:
+            env_vars["NCCL_SHM_DISABLE"] = "1"
+            env_vars["NCCL_P2P_DISABLE"] = "1"
+
         # Check if we need parallelism-aware worker group creation
         if self.model_parallel_size > 1:
             # For parallelism, create node-aware worker groups
@@ -1236,6 +1239,7 @@ def __init__(
                 name_prefix=name_prefix,
                 bundle_indices_list=node_bundle_indices,
                 sharding_annotations=self.sharding_annotations,
+                env_vars=env_vars,
             )
         else:
             # Use standard worker group creation for non-parallel case
@@ -1245,6 +1249,7 @@ def __init__(
                 name_prefix=name_prefix,
                 workers_per_node=workers_per_node,
                 sharding_annotations=self.sharding_annotations,
+                env_vars=env_vars,
             )
 
         # Number of data parallel groups is the number of tied worker groups
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index b94e95fd72..ea6535de83 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -387,7 +387,7 @@ def init_collective(self, ip: str, port: int, world_size: int) -> None:
         from vllm.distributed.utils import StatelessProcessGroup
 
         # keep the same behavior as vllm
-        # see https://github.com/vllm-project/vllm/blob/v0.8.5/vllm/env_override.py#L25
+        # see https://github.com/vllm-project/vllm/blob/v0.9.0/vllm/env_override.py#L25
         if not os.path.exists("/dev/nvidia-caps-imex-channels"):
             os.environ["NCCL_CUMEM_ENABLE"] = "0"
 
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 612630ded0..8f37e1e23d 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -265,17 +265,15 @@ def policy_cluster_separate():
         print(f"Error during policy_cluster_separate shutdown: {e}")
 
 
-@pytest.fixture(scope="function")
-def generation_cluster_separate():
-    """Create a virtual cluster for the VllmGeneration policy, using 1 GPU."""
-    cluster = _create_ray_virtual_cluster_for_test(
-        "vllm-test-generation-cluster-separate"
+def get_generation_cluster_separate(num_gpus_per_node: int = 1) -> RayVirtualCluster:
+    """Create a virtual cluster for the VllmGeneration policy, using num_gpus_per_node GPU."""
+    return RayVirtualCluster(
+        bundle_ct_per_node_list=[num_gpus_per_node],
+        use_gpus=True,
+        max_colocated_worker_groups=1,
+        num_gpus_per_node=num_gpus_per_node,
+        name="vllm-test-generation-cluster-separate",
     )
-    yield cluster
-    try:
-        cluster.shutdown()
-    except Exception as e:
-        print(f"Error during generation_cluster_separate shutdown: {e}")
 
 
 @pytest.fixture(scope="function")
@@ -1178,13 +1176,22 @@ def test_vllm_non_divisible_batch_handling(policy):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("async_engine", [True, False])
+@pytest.mark.parametrize("tensor_parallel_size", [1, 2])
 async def test_vllm_refit_non_collocated_update_weights(
     policy_cluster_separate,
-    generation_cluster_separate,
     tokenizer,
     test_input_data,
     async_engine,
+    tensor_parallel_size,
 ):
+    # Skip tensor_parallel_size == 2 until we have resources in CI
+    if tensor_parallel_size == 2:
+        pytest.skip(
+            "Test requires at least three GPUs to run with tensor_parallel_size == 2 on separate clusters."
+        )
+
+    generation_cluster_separate = get_generation_cluster_separate(tensor_parallel_size)
+
     if (
         policy_cluster_separate.num_gpus_per_node < 1
         or generation_cluster_separate.num_gpus_per_node < 1
@@ -1195,7 +1202,6 @@ async def test_vllm_refit_non_collocated_update_weights(
 
     # Create Policy on its own cluster
     hf_config = get_basic_hf_test_config(enable_dtensor=True)
-    hf_config["dtensor_cfg"]["tensor_parallel_size"] = 1
     hf_config["generation"]["colocated"]["enabled"] = False
     lm_policy = Policy(policy_cluster_separate, hf_config, tokenizer)
 
@@ -1203,7 +1209,7 @@ async def test_vllm_refit_non_collocated_update_weights(
     vllm_config = deepcopy(basic_vllm_test_config)
     vllm_config = configure_generation_config(vllm_config, tokenizer, is_eval=True)
     vllm_config["vllm_cfg"]["async_engine"] = async_engine
-    vllm_config["vllm_cfg"]["tensor_parallel_size"] = 1
+    vllm_config["vllm_cfg"]["tensor_parallel_size"] = tensor_parallel_size
     vllm_config["colocated"]["enabled"] = False
     vllm_generation = VllmGeneration(generation_cluster_separate, vllm_config)
 
@@ -1235,6 +1241,10 @@ async def test_vllm_refit_non_collocated_update_weights(
     # Clean up
     vllm_generation.shutdown()
     lm_policy.shutdown()
+    try:
+        generation_cluster_separate.shutdown()
+    except Exception as e:
+        print(f"Error during generation_cluster_separate shutdown: {e}")
 
 
 @pytest.mark.timeout(210)

From 9bc851c8be7cc261239d95ae5d62f302b784b043 Mon Sep 17 00:00:00 2001
From: guyueh1 <140554423+guyueh1@users.noreply.github.com>
Date: Mon, 7 Jul 2025 16:23:25 -0700
Subject: [PATCH 11/59] feat: Refit: reduce the number of IPC calls by packing
 weights (#589)

Signed-off-by: Guyue Huang <guyueh@nvidia.com>
Co-authored-by: Parth Chadha <pchadha@nvidia.com>
Co-authored-by: yuki <48991475+yuki-666@users.noreply.github.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/models/generation/vllm_backend.py     | 39 ++++++++--
 .../models/policy/dtensor_policy_worker.py    |  5 +-
 nemo_rl/models/policy/fsdp1_policy_worker.py  |  5 +-
 .../models/policy/megatron_policy_worker.py   | 74 ++++++++++++++++---
 4 files changed, 101 insertions(+), 22 deletions(-)

diff --git a/nemo_rl/models/generation/vllm_backend.py b/nemo_rl/models/generation/vllm_backend.py
index c40aea4418..8fe204e21f 100644
--- a/nemo_rl/models/generation/vllm_backend.py
+++ b/nemo_rl/models/generation/vllm_backend.py
@@ -59,17 +59,40 @@ def update_weights_from_ipc_handles(self, ipc_handles):
             # Get handles for this device
             device_uuid = self.report_device_id()
             handles = ipc_handles[device_uuid]
+            is_tensor_packed = handles[0]
+            if is_tensor_packed:
+                _, all_handles, tensor_metadata = handles
+            else:
+                _, name_and_handle_list = handles
+
             device_id = self.device.index
             weights = []
 
-            # Process each handle to get the tensor
-            for name, handle in handles:
-                func, args = handle
-                list_args = list(args)
-                # Update device ID to match the current device
-                list_args[6] = device_id
-                tensor = func(*list_args)
-                weights.append((name, tensor))
+            if is_tensor_packed:
+                # Extract packed tensor from IPC handle
+                dtype_to_packed_tensor = {}
+                for dtype, tensor_handle in all_handles:
+                    func, args = tensor_handle
+                    list_args = list(args)
+                    list_args[6] = device_id
+                    tensor = func(*list_args)
+                    dtype_to_packed_tensor[dtype] = tensor
+
+                # Unpack tensor to weights. Here we only return a view of the tensor to avoid
+                # using extra memory.
+                for key, (shape, dtype, offset, size) in tensor_metadata.items():
+                    tensor = dtype_to_packed_tensor[dtype][offset : offset + size].view(
+                        *shape
+                    )
+                    weights.append((key, tensor))
+            else:
+                # Process each handle to get the tensor
+                for name, handle in name_and_handle_list:
+                    func, args = handle
+                    list_args = list(args)
+                    list_args[6] = device_id
+                    tensor = func(*list_args)
+                    weights.append((name, tensor))
 
             # Load weights into the model
             self.model_runner.model.load_weights(weights=weights)
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index ea6535de83..379ce8ba52 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -954,7 +954,10 @@ def get_weights_ipc_handles(self, keys: Iterable[str]) -> dict[str, Any]:
             handle = reduce_tensor(p.detach())
             all_handles.append((key, handle))
 
-        return {device_uuid: all_handles}
+        # (pack_tensor_for_ipc: bool, handles: list)
+        serialized = (False, all_handles)
+
+        return {device_uuid: serialized}
 
     @torch.no_grad()
     def prepare_info_for_collective(self) -> dict[str, Any]:
diff --git a/nemo_rl/models/policy/fsdp1_policy_worker.py b/nemo_rl/models/policy/fsdp1_policy_worker.py
index f4ec53daa0..418f280e46 100644
--- a/nemo_rl/models/policy/fsdp1_policy_worker.py
+++ b/nemo_rl/models/policy/fsdp1_policy_worker.py
@@ -901,7 +901,10 @@ def get_weights_ipc_handles(self, keys: list[str]) -> dict[str, Any]:
             handle = reduce_tensor(p.detach())
             all_handles.append((key, handle))
 
-        return {device_uuid: all_handles}
+        # (pack_tensor_for_ipc: bool, handles: list)
+        serialized = (False, all_handles)
+
+        return {device_uuid: serialized}
 
     def prepare_for_lp_inference(self) -> None:
         self.model = self.manual_load_to_gpu(self.model)
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 36bc8b7b76..368ce72dc2 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -1339,18 +1339,68 @@ def get_weights_ipc_handles(self, *, keys: list[str]) -> dict[str, Any]:
         from torch.multiprocessing.reductions import reduce_tensor
 
         # Create IPC handles for each parameter
-        all_handles = []
-        for key, tensor in gathered_hf_params.items():
-            handle = reduce_tensor(tensor.detach())
-            all_handles.append((key, handle))
-
-        # Store references to avoid premature garbage collection
-        self._held_gather_buffer = gathered_hf_params
-        shapes = {}
-        for key, tensor in gathered_hf_params.items():
-            shapes[key] = tensor.shape
-
-        return {device_uuid: all_handles}
+        tensor_number_threshold = os.getenv(
+            "NEMO_RL_MEGATRON_IPC_TENSOR_PACKING_THRESHOLD", "32"
+        )  # an arbitrary threshold
+        if len(gathered_hf_params) >= int(tensor_number_threshold):
+            pack_tensor_for_ipc = True
+        else:
+            pack_tensor_for_ipc = False
+
+        if pack_tensor_for_ipc:
+            # Pack tensors in gathered_hf_params into consolidated tensors by dtype
+            # First calculate total size needed for each dtype
+            type_to_total_size = defaultdict(lambda: 0)
+            tensor_metadata = dict()
+
+            for key, tensor in gathered_hf_params.items():
+                tensor_metadata[key] = (
+                    tensor.shape,  # shape of the tensor
+                    tensor.dtype,  # dtype of the tensor
+                    type_to_total_size[tensor.dtype],  # offset of the tensor
+                    # in packed buffer
+                    tensor.numel(),  # size of the tensor
+                )
+                type_to_total_size[tensor.dtype] += tensor.numel()
+
+            # Allocate consolidated tensors for each dtype
+            packed_tensors = {
+                dtype: torch.empty(
+                    total_size,
+                    device=next(iter(gathered_hf_params.values())).device,
+                    dtype=dtype,
+                    requires_grad=False,
+                )
+                for dtype, total_size in type_to_total_size.items()
+            }
+
+            # Copy tensors into consolidated buffers
+            for key, tensor in gathered_hf_params.items():
+                metadata = tensor_metadata[key]
+                _, dtype, offset, size = metadata
+                packed_tensors[dtype][offset : offset + size].copy_(
+                    tensor.detach().view(-1)
+                )
+
+            # Create IPC handles for consolidated tensors
+            all_handles = [
+                (dtype, reduce_tensor(tensor.detach()))
+                for dtype, tensor in packed_tensors.items()
+            ]
+
+            # Store reference to prevent garbage collection
+            self._held_gather_buffer = packed_tensors
+
+            serialized = (pack_tensor_for_ipc, all_handles, tensor_metadata)
+        else:
+            all_handles = []
+            for key, tensor in gathered_hf_params.items():
+                handle = reduce_tensor(tensor.detach())
+                all_handles.append((key, handle))
+            self._held_gather_buffer = gathered_hf_params
+            serialized = (False, all_handles)
+
+        return {device_uuid: serialized}
 
     def prepare_for_lp_inference(self):
         self.model = self.move_model(self.model, "cuda", move_grads=False)

From 01cbc28ee28ba9d2c7f7c60976234cd3fd0a6af4 Mon Sep 17 00:00:00 2001
From: Terry Kong <terrycurtiskong@gmail.com>
Date: Tue, 8 Jul 2025 09:27:09 -0700
Subject: [PATCH 12/59] feat: add flash-attn==2.7.4.post1 to backend
 dependencies (#622)

Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Sahil Jain <sahilj@nvidia.com>
Co-authored-by: Sahil Jain <sahilj@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .github/workflows/cicd-main.yml               |  49 +-
 3rdparty/Megatron-LM-workspace/pyproject.toml |   1 +
 README.md                                     |  10 +-
 docker/Dockerfile                             |  22 +-
 pyproject.toml                                |  25 +-
 tools/build-flash-attn-in-uv-cache.sh         |  15 +
 uv.lock                                       | 939 +++++++++---------
 7 files changed, 597 insertions(+), 464 deletions(-)
 create mode 100755 tools/build-flash-attn-in-uv-cache.sh

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 77d8a4b594..e3cf6cc42b 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -112,11 +112,24 @@ jobs:
         uses: actions/checkout@v4
         with:
           submodules: 'recursive'
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.7.2"
+          enable-cache: true
+          prune-cache: false
+      # Faster than uv python install since it caches python alongside runner
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: ".python-version"
       - name: Check lint
         run: |
-          pip install pre-commit==3.6.0
-          pre-commit install
-          pre-commit run --all-files --show-diff-on-failure --color=always
+          uv venv
+          uv run --group dev pre-commit install
+          uv run --group dev pre-commit run --all-files --show-diff-on-failure --color=always
+      - name: Minimize uv cache
+        run: uv cache prune --ci
 
   mypy-check:
     name: Mypy check
@@ -125,10 +138,23 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.7.2"
+          enable-cache: true
+          prune-cache: false
+      # Faster than uv python install since it caches python alongside runner
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: ".python-version"
       - name: Check mypy
         run: |
-          pip install uv
+          uv venv
           uv run --group test mypy nemo_rl examples
+      - name: Minimize uv cache
+        run: uv cache prune --ci
 
   sphinx-build:
     if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
@@ -140,11 +166,24 @@ jobs:
         uses: actions/checkout@v4
         with:
           submodules: 'recursive'
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.7.2"
+          enable-cache: true
+          prune-cache: false
+      # Faster than uv python install since it caches python alongside runner
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: ".python-version"
       - name: build docs
         run: |
-          pip install uv
+          uv venv
           cd docs/
           uv run --group docs sphinx-build --fail-on-warning --builder html . _build/html
+      - name: Minimize uv cache
+        run: uv cache prune --ci
 
   build-container:
     if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
diff --git a/3rdparty/Megatron-LM-workspace/pyproject.toml b/3rdparty/Megatron-LM-workspace/pyproject.toml
index 8a87b4389d..77f09f838a 100644
--- a/3rdparty/Megatron-LM-workspace/pyproject.toml
+++ b/3rdparty/Megatron-LM-workspace/pyproject.toml
@@ -5,6 +5,7 @@ requires = [
     "setuptools",
     "pybind11",
 ]
+build-backend = "setuptools.build_meta"
 
 [project]
 name = "megatron-core"
diff --git a/README.md b/README.md
index 4dc2f7395f..45900c78f0 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
   - [📣 News](#-news)
   - [Features](#features)
   - [Prerequisites](#prerequisites)
-  - [Supported Training Backends](#training-backends)
+  - [Training Backends](#training-backends)
   - [GRPO](#grpo)
     - [GRPO Single Node](#grpo-single-node)
     - [GRPO Multi-node](#grpo-multi-node)
@@ -105,6 +105,13 @@ pip install uv
 #       This ensures that the version of python used is always what we prescribe.
 uv venv
 
+# If working outside a container, it can help to build flash-attn and warm the
+# uv cache before your first run. The NeMo RL Dockerfile will warm the uv cache
+# with flash-attn. See https://docs.nvidia.com/nemo/rl/latest/docker.html for
+# instructions if you are looking for the NeMo RL container.
+bash tools/build-flash-attn-in-uv-cache.sh
+# If sucessful, you should see "✅ flash-attn successfully added to uv cache"
+
 # If you cannot install at the system level, you can install for your user with
 # pip install --user uv
 
@@ -120,6 +127,7 @@ uv venv
 
 - Use the `uv run <command>` to execute scripts within the managed environment. This helps maintain consistency across different shells and sessions.
 - Ensure you have the necessary CUDA drivers and PyTorch installed compatible with your hardware.
+- On the first install, `flash-attn` can take a while to install (~45min with 48 CPU hyperthreads). After it is built once, it is cached in your `uv`'s cache dir making subsequent installs much quicker.
 - **Reminder**: Don't forget to set your `HF_HOME`, `WANDB_API_KEY`, and `HF_DATASETS_CACHE` (if needed). You'll need to do a `huggingface-cli login` as well for Llama models.
 
 ## Training Backends
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 0d1c5367f8..828156d039 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -47,21 +47,28 @@ FROM base AS hermetic
 
 WORKDIR /opt/nemo-rl
 
-# First copy only the dependency files
-COPY pyproject.toml uv.lock ./
-COPY --link 3rdparty/ ./3rdparty/
-
 # Variables to control the build of TE. If there are issues with parallelization, consider
 # setting these to 1.
-ARG MAX_JOBS=4
-ARG NVTE_BUILD_THREADS_PER_JOB=1
+ARG MAX_JOBS
+ARG NVTE_BUILD_THREADS_PER_JOB
 
 ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv
 ENV UV_LINK_MODE=copy
 
-# Create and activate virtual environment
+# This step is to warm the uv cache with flash-attn without invalidating it due to COPY layers
+# This layer has to be manually updated
 RUN <<"EOF" bash -exu
 uv venv ${UV_PROJECT_ENVIRONMENT}
+
+VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT uv pip install --link-mode symlink setuptools torch==2.7.0 psutil ninja --torch-backend=cu128
+VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT uv pip install --link-mode symlink flash-attn==2.7.4.post1 --no-build-isolation
+EOF
+
+# First copy only the dependency files
+COPY pyproject.toml uv.lock ./
+COPY --link 3rdparty/ ./3rdparty/
+
+RUN <<"EOF" bash -exu
 # uv sync has a more reliable resolver than simple uv pip install which can fail
 
 # Sync each training + inference backend one at a time (since they may conflict)
@@ -96,3 +103,4 @@ ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
 # Copy in source and prefetch all virtual environments
 COPY . /opt/nemo-rl
 RUN UV_LINK_MODE=symlink uv run nemo_rl/utils/prefetch_venvs.py
+
diff --git a/pyproject.toml b/pyproject.toml
index 6b1371de83..a17cced3e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,6 +21,7 @@ requires-python = ">=3.12"
 license = {text = "Apache 2.0"}
 dependencies = [
     "setuptools",
+    "ninja",  # for flash-attn parallel build
     "torch==2.7.0",
     "triton",
     "colored==2.2.3",
@@ -46,8 +47,17 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+# Currently unused, but after https://github.com/NVIDIA-NeMo/RL/issues/501 is resolved, we should use this for the "BASE" PYEXECUTABLE
+automodel = [
+    # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular)
+    # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108
+    # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76
+    "flash-attn==2.7.4.post1",
+]
 vllm = [
     "vllm==0.9.0",
+    # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
+    "flash-attn==2.7.4.post1",
 ]
 mcore = [
     # also need cudnn (https://developer.nvidia.com/cudnn-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_network)
@@ -58,6 +68,10 @@ mcore = [
     "transformer-engine[pytorch]==2.3.0",
     "megatron-core",
     "nemo-tron",
+    # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular)
+    # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108
+    # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76
+    "flash-attn==2.7.4.post1",
 ]
 
 [dependency-groups]
@@ -74,6 +88,8 @@ build = [
     "hatchling",
     # Build requirement for mcore
     "pybind11",
+    # Build requirement for flash-attn
+    "psutil",
 ]
 docs = [
     "sphinx",
@@ -125,8 +141,7 @@ url = "https://download.pytorch.org/whl/cu128"
 explicit = true
 
 [tool.uv]
-# Currently, TE must be built with no build-isolation b/c it requires torch
-no-build-isolation-package = ["transformer-engine-torch", "transformer-engine"]
+no-build-isolation-package = ["transformer-engine-torch", "transformer-engine", "flash-attn"]
 # Always apply the build group since dependencies like TE/mcore/nemo-run require build dependencies
 # and this lets us assume they are implicitly installed with a simply `uv sync`. Ideally, we'd
 # avoid including these in the default dependency set, but for now it's required.
@@ -137,6 +152,11 @@ default-groups = ["dev", "build"]
 #  --link-mode=symlink (fastest option when uv cache and venv on different file-system; caveat: venv is brittle since it depends on the environment/container)
 link-mode = "copy"
 
+# Needed when building from source
+[[tool.uv.dependency-metadata]]
+name = "flash-attn"
+requires-dist = ["torch", "einops", "setuptools", "psutil", "ninja"]
+
 [tool.black]
 line-length = 120
 include = '\.pyi?$'
@@ -182,3 +202,4 @@ convention = "google"
 "*_test.py" = ["D"]
 # Ignore F401 (import but unused) in __init__.py
 "__init__.py" = ["F401"]
+
diff --git a/tools/build-flash-attn-in-uv-cache.sh b/tools/build-flash-attn-in-uv-cache.sh
new file mode 100755
index 0000000000..d1015ccb6c
--- /dev/null
+++ b/tools/build-flash-attn-in-uv-cache.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+set -eou pipefail
+
+if ! command -v uv &> /dev/null; then
+    echo "uv could not be found, please install it with 'pip install uv'"
+    exit 1
+fi
+
+# setuptools, torch, psutil (required by flash-attn), ninja (enables parallel flash-attn build)
+uv sync
+uv pip install ninja
+uv sync --extra automodel
+uv sync
+echo "✅ flash-attn successfully added to uv cache"
diff --git a/uv.lock b/uv.lock
index e2a02bdd23..fdce004e44 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,18 +2,15 @@ version = 1
 revision = 2
 requires-python = ">=3.12"
 resolution-markers = [
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
     "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'arm64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
     "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'darwin'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'darwin'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'darwin'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'darwin'",
-    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux'",
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
 ]
 
 [manifest]
@@ -23,6 +20,10 @@ members = [
     "nemo-tron",
 ]
 
+[[manifest.dependency-metadata]]
+name = "flash-attn"
+requires-dist = ["torch", "einops", "setuptools", "psutil", "ninja"]
+
 [[package]]
 name = "absl-py"
 version = "2.3.0"
@@ -34,7 +35,7 @@ wheels = [
 
 [[package]]
 name = "accelerate"
-version = "1.7.0"
+version = "1.8.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
@@ -45,9 +46,9 @@ dependencies = [
     { name = "safetensors" },
     { name = "torch" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/97/33/47bbd507e3a851d33d19ce7b2141c5ea3689bfae91ba168044d7db24b0e9/accelerate-1.7.0.tar.gz", hash = "sha256:e8a2a5503d6237b9eee73cc8d36cf543f9c2d8dd2c6713450b322f5e6d53a610", size = 376026, upload-time = "2025-05-15T10:00:52.117Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bd/c2/b9e33ad13232606dded4c546e654fb06a15f1dbcbd95d81c9f9dd3ccc771/accelerate-1.8.1.tar.gz", hash = "sha256:f60df931671bc4e75077b852990469d4991ce8bd3a58e72375c3c95132034db9", size = 380872, upload-time = "2025-06-20T15:36:14.618Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/bb/be8146c196ad6e4dec78385d91e92591f8a433576c4e04c342a636fcd811/accelerate-1.7.0-py3-none-any.whl", hash = "sha256:cf57165cca28769c6cf2650812371c81b18e05743dfa3c748524b1bb4f2b272f", size = 362095, upload-time = "2025-05-15T10:00:49.914Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d9/e044c9d42d8ad9afa96533b46ecc9b7aea893d362b3c52bd78fb9fe4d7b3/accelerate-1.8.1-py3-none-any.whl", hash = "sha256:c47b8994498875a2b1286e945bd4d20e476956056c7941d512334f4eb44ff991", size = 365338, upload-time = "2025-06-20T15:36:12.71Z" },
 ]
 
 [[package]]
@@ -73,7 +74,7 @@ wheels = [
 
 [[package]]
 name = "aiohttp"
-version = "3.12.12"
+version = "3.12.13"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
@@ -84,42 +85,42 @@ dependencies = [
     { name = "propcache" },
     { name = "yarl" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f2/84/ea27e6ad14747d8c51afe201fb88a5c8282b6278256d30a6f71f730add88/aiohttp-3.12.12.tar.gz", hash = "sha256:05875595d2483d96cb61fa9f64e75262d7ac6251a7e3c811d8e26f7d721760bd", size = 7818643, upload-time = "2025-06-10T05:22:00.247Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/df/e6/df14ec151942818ecc5e685fa8a4b07d3d3d8a9e4a7d2701047c89290551/aiohttp-3.12.12-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:98451ce9ce229d092f278a74a7c2a06b3aa72984673c87796126d7ccade893e9", size = 700494, upload-time = "2025-06-10T05:19:46.18Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/dc/7bc6e17adcd7a82b0d0317ad3e792ac22c93fb672077f0eade93e8d70182/aiohttp-3.12.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:adbac7286d89245e1aff42e948503fdc6edf6d5d65c8e305a67c40f6a8fb95f4", size = 475095, upload-time = "2025-06-10T05:19:48.246Z" },
-    { url = "https://files.pythonhosted.org/packages/80/fd/c4e8846ad9d9ecdb7d5ba96de65b7bf2c1582f0b2732f2023080c1c05255/aiohttp-3.12.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0728882115bfa85cbd8d0f664c8ccc0cfd5bd3789dd837596785450ae52fac31", size = 467929, upload-time = "2025-06-10T05:19:50.79Z" },
-    { url = "https://files.pythonhosted.org/packages/70/40/abebcf5c81f5e65b4379c05929773be2731ce12414264d3e0fe09ee241eb/aiohttp-3.12.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf3b9d9e767f9d0e09fb1a31516410fc741a62cc08754578c40abc497d09540", size = 1714729, upload-time = "2025-06-10T05:19:52.989Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/67/4c4f96ef6f16405e7c5205ab3c28852c7e904493b6ddc1c744dda1c97a81/aiohttp-3.12.12-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c944860e86b9f77a462321a440ccf6fa10f5719bb9d026f6b0b11307b1c96c7b", size = 1697380, upload-time = "2025-06-10T05:19:55.832Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/a2/dae9ebea4caa8030170c0237e55fa0960df44b3596a849ab9ea621964054/aiohttp-3.12.12-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b1979e1f0c98c06fd0cd940988833b102fa3aa56751f6c40ffe85cabc51f6fd", size = 1752474, upload-time = "2025-06-10T05:19:58.007Z" },
-    { url = "https://files.pythonhosted.org/packages/31/ef/f3d9073565ac7ad5257aaa1490ebfc2f182dfc817d3ccfd38c8ab35b2247/aiohttp-3.12.12-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:120b7dd084e96cfdad85acea2ce1e7708c70a26db913eabb8d7b417c728f5d84", size = 1798631, upload-time = "2025-06-10T05:20:00.393Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/0b/8b1978662274c80c8e4a739d9be1ae9ef25e5ce42b55838d6a9d1a4e3497/aiohttp-3.12.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e58f5ae79649ffa247081c2e8c85e31d29623cf2a3137dda985ae05c9478aae", size = 1718071, upload-time = "2025-06-10T05:20:02.812Z" },
-    { url = "https://files.pythonhosted.org/packages/56/aa/35786137db867901b41cb3d2c19c0f4c56dfe581694dba99dec2683d8f8d/aiohttp-3.12.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aa5f049e3e2745b0141f13e5a64e7c48b1a1427ed18bbb7957b348f282fee56", size = 1633871, upload-time = "2025-06-10T05:20:05.127Z" },
-    { url = "https://files.pythonhosted.org/packages/63/1d/34d45497dd04d08d662ecda875c44e91d271bbc5d21f4c9e4cbd3ddf7ae2/aiohttp-3.12.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7163cc9cf3722d90f1822f8a38b211e3ae2fc651c63bb55449f03dc1b3ff1d44", size = 1694933, upload-time = "2025-06-10T05:20:07.431Z" },
-    { url = "https://files.pythonhosted.org/packages/29/c7/41e09a4517449eabbb0a7fe6d60f584fe5b21d4bff761197eb0b81e70034/aiohttp-3.12.12-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ef97c4d035b721de6607f3980fa3e4ef0ec3aca76474b5789b7fac286a8c4e23", size = 1716386, upload-time = "2025-06-10T05:20:09.787Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/32/907bd2010b51b70de5314ad707dfc4e898ea0011ff3d678cdf43d6f8980a/aiohttp-3.12.12-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:1c14448d6a86acadc3f7b2f4cc385d1fb390acb6f37dce27f86fe629410d92e3", size = 1657039, upload-time = "2025-06-10T05:20:12.198Z" },
-    { url = "https://files.pythonhosted.org/packages/60/27/8d87344a33346dcd39273adc33060aeb135e0ef70d1d6e71a3b03894a8e9/aiohttp-3.12.12-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a1b6df6255cfc493454c79221183d64007dd5080bcda100db29b7ff181b8832c", size = 1736599, upload-time = "2025-06-10T05:20:14.519Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/45/57c7ef1af694a6d0906abab6edde03787c8c6b0cf5d8359b69d1eb0679df/aiohttp-3.12.12-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:60fc7338dfb0626c2927bfbac4785de3ea2e2bbe3d328ba5f3ece123edda4977", size = 1764575, upload-time = "2025-06-10T05:20:16.993Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/cc/b1f918cd702efa9ead9d41f89214e9225cda4e5d013d6eed7f1915c17d0a/aiohttp-3.12.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d2afc72207ef4c9d4ca9fcd00689a6a37ef2d625600c3d757b5c2b80c9d0cf9a", size = 1724184, upload-time = "2025-06-10T05:20:19.296Z" },
-    { url = "https://files.pythonhosted.org/packages/47/55/089762ee32c2a2e0f523d9ab38c9da2a344cac0e0cc8d16ecf206517ef7e/aiohttp-3.12.12-cp312-cp312-win32.whl", hash = "sha256:8098a48f93b2cbcdb5778e7c9a0e0375363e40ad692348e6e65c3b70d593b27c", size = 421762, upload-time = "2025-06-10T05:20:22.063Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/47/151f657e429972916f61399bd52b410e9072d5a2cae1b794f890930e5797/aiohttp-3.12.12-cp312-cp312-win_amd64.whl", hash = "sha256:d1c1879b2e0fc337d7a1b63fe950553c2b9e93c071cf95928aeea1902d441403", size = 447863, upload-time = "2025-06-10T05:20:24.326Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/3e/396a7d1c47aa7a74612b186dc716857506c61afac72337a7a96215c2a124/aiohttp-3.12.12-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ea5d604318234427929d486954e3199aded65f41593ac57aa0241ab93dda3d15", size = 694901, upload-time = "2025-06-10T05:20:26.58Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/97/235e48eadf73a1854b4d4da29b88d00049309d897d55a511e1cbe4412603/aiohttp-3.12.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e03ff38250b8b572dce6fcd7b6fb6ee398bb8a59e6aa199009c5322d721df4fc", size = 472552, upload-time = "2025-06-10T05:20:28.957Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/73/cd7c9439e8cab4113650541017c6524bd0e675b219dfdbbf945a78305e3f/aiohttp-3.12.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:71125b1fc2b6a94bccc63bbece620906a4dead336d2051f8af9cbf04480bc5af", size = 464853, upload-time = "2025-06-10T05:20:31.652Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/33/eea88ee55ed4b3f74732d9fc773e6fcf134a2971a19c7ecc49a291e7e57f/aiohttp-3.12.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:784a66f9f853a22c6b8c2bd0ff157f9b879700f468d6d72cfa99167df08c5c9c", size = 1703671, upload-time = "2025-06-10T05:20:33.969Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/e3/a67ecf9c154b13bad9e2a86ea3782a4b73e889343ffde8c1aadcf9099c09/aiohttp-3.12.12-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a5be0b58670b54301404bd1840e4902570a1c3be00358e2700919cb1ea73c438", size = 1684934, upload-time = "2025-06-10T05:20:36.721Z" },
-    { url = "https://files.pythonhosted.org/packages/89/f0/3aaea866531be2f2fcf3a87607e1f55fa72e6ce5acd6b058941a4fc35e15/aiohttp-3.12.12-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8f13566fc7bf5a728275b434bc3bdea87a7ed3ad5f734102b02ca59d9b510f", size = 1737004, upload-time = "2025-06-10T05:20:39.533Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/7a/15867a4c7d39d8fd9bd02191cf60b1d06415fc407bbd4ff2f9660845f1cb/aiohttp-3.12.12-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d736e57d1901683bc9be648aa308cb73e646252c74b4c639c35dcd401ed385ea", size = 1786378, upload-time = "2025-06-10T05:20:42.03Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/61/82b15f87088b35705e01fce55806241b45a1099b3470bbca0bed8ee98662/aiohttp-3.12.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2007eaa7aae9102f211c519d1ec196bd3cecb1944a095db19eeaf132b798738", size = 1708707, upload-time = "2025-06-10T05:20:44.474Z" },
-    { url = "https://files.pythonhosted.org/packages/28/f2/aed0786d5a1c2ed1f5a13ff2a98baacc27206b81d93812da28fc49d8a5d0/aiohttp-3.12.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a813e61583cab6d5cdbaa34bc28863acdb92f9f46e11de1b3b9251a1e8238f6", size = 1622410, upload-time = "2025-06-10T05:20:46.957Z" },
-    { url = "https://files.pythonhosted.org/packages/17/54/8305f49a960376136ada977be1370fddb584c63d40bd1b9bef59469f28c7/aiohttp-3.12.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e408293aa910b0aea48b86a28eace41d497a85ba16c20f619f0c604597ef996c", size = 1675435, upload-time = "2025-06-10T05:20:49.379Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/dc/0a55350025bc297265cfa6c6b1b1f7508f4226ca3238697cbe5e772a7d76/aiohttp-3.12.12-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:f3d31faf290f5a30acba46b388465b67c6dbe8655d183e9efe2f6a1d594e6d9d", size = 1707099, upload-time = "2025-06-10T05:20:51.974Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/70/d949a1612b996e49d540c10ed77a0a1465c482a590e9a59c1c7897746119/aiohttp-3.12.12-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0b84731697325b023902aa643bd1726d999f5bc7854bc28b17ff410a81151d4b", size = 1649693, upload-time = "2025-06-10T05:20:54.973Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/ea/fb87beb7135e25576a1e6fbe98106c037d9fcf1543f19108f9ceb73c192c/aiohttp-3.12.12-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a324c6852b6e327811748446e56cc9bb6eaa58710557922183175816e82a4234", size = 1725825, upload-time = "2025-06-10T05:20:57.433Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/1f/adbeb3e440d49b733cef499ace94723ab1fe9fb516425e219379e03b7c9a/aiohttp-3.12.12-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:22fd867fbd72612dcf670c90486dbcbaf702cb807fb0b42bc0b7a142a573574a", size = 1759300, upload-time = "2025-06-10T05:21:00.444Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/c1/2fe007ad930f409d0d7fd9916cd55ec9b78b6a611a237424266ed71da48b/aiohttp-3.12.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3e092f1a970223794a4bf620a26c0e4e4e8e36bccae9b0b5da35e6d8ee598a03", size = 1708189, upload-time = "2025-06-10T05:21:02.969Z" },
-    { url = "https://files.pythonhosted.org/packages/85/5e/ed3ed640fafae3972eae6cd26f66240108cf62452ac8128d59970d538cb1/aiohttp-3.12.12-cp313-cp313-win32.whl", hash = "sha256:7f5f5eb8717ef8ba15ab35fcde5a70ad28bbdc34157595d1cddd888a985f5aae", size = 420783, upload-time = "2025-06-10T05:21:06.287Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/db/57d2bb4af52dd0c6f62c42c7d34b82495b2902e50440134f70bfb7ee0fdd/aiohttp-3.12.12-cp313-cp313-win_amd64.whl", hash = "sha256:ace2499bdd03c329c054dc4b47361f2b19d5aa470f7db5c7e0e989336761b33c", size = 446721, upload-time = "2025-06-10T05:21:08.738Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/42/6e/ab88e7cb2a4058bed2f7870276454f85a7c56cd6da79349eb314fc7bbcaa/aiohttp-3.12.13.tar.gz", hash = "sha256:47e2da578528264a12e4e3dd8dd72a7289e5f812758fe086473fab037a10fcce", size = 7819160, upload-time = "2025-06-14T15:15:41.354Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b4/6a/ce40e329788013cd190b1d62bbabb2b6a9673ecb6d836298635b939562ef/aiohttp-3.12.13-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0aa580cf80558557285b49452151b9c69f2fa3ad94c5c9e76e684719a8791b73", size = 700491, upload-time = "2025-06-14T15:14:00.048Z" },
+    { url = "https://files.pythonhosted.org/packages/28/d9/7150d5cf9163e05081f1c5c64a0cdf3c32d2f56e2ac95db2a28fe90eca69/aiohttp-3.12.13-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b103a7e414b57e6939cc4dece8e282cfb22043efd0c7298044f6594cf83ab347", size = 475104, upload-time = "2025-06-14T15:14:01.691Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/91/d42ba4aed039ce6e449b3e2db694328756c152a79804e64e3da5bc19dffc/aiohttp-3.12.13-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f64e748e9e741d2eccff9597d09fb3cd962210e5b5716047cbb646dc8fe06f", size = 467948, upload-time = "2025-06-14T15:14:03.561Z" },
+    { url = "https://files.pythonhosted.org/packages/99/3b/06f0a632775946981d7c4e5a865cddb6e8dfdbaed2f56f9ade7bb4a1039b/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c955989bf4c696d2ededc6b0ccb85a73623ae6e112439398935362bacfaaf6", size = 1714742, upload-time = "2025-06-14T15:14:05.558Z" },
+    { url = "https://files.pythonhosted.org/packages/92/a6/2552eebad9ec5e3581a89256276009e6a974dc0793632796af144df8b740/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d640191016763fab76072c87d8854a19e8e65d7a6fcfcbf017926bdbbb30a7e5", size = 1697393, upload-time = "2025-06-14T15:14:07.194Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/9f/bd08fdde114b3fec7a021381b537b21920cdd2aa29ad48c5dffd8ee314f1/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4dc507481266b410dede95dd9f26c8d6f5a14315372cc48a6e43eac652237d9b", size = 1752486, upload-time = "2025-06-14T15:14:08.808Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/e1/affdea8723aec5bd0959171b5490dccd9a91fcc505c8c26c9f1dca73474d/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8a94daa873465d518db073bd95d75f14302e0208a08e8c942b2f3f1c07288a75", size = 1798643, upload-time = "2025-06-14T15:14:10.767Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/9d/666d856cc3af3a62ae86393baa3074cc1d591a47d89dc3bf16f6eb2c8d32/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177f52420cde4ce0bb9425a375d95577fe082cb5721ecb61da3049b55189e4e6", size = 1718082, upload-time = "2025-06-14T15:14:12.38Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/ce/3c185293843d17be063dada45efd2712bb6bf6370b37104b4eda908ffdbd/aiohttp-3.12.13-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f7df1f620ec40f1a7fbcb99ea17d7326ea6996715e78f71a1c9a021e31b96b8", size = 1633884, upload-time = "2025-06-14T15:14:14.415Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/5b/f3413f4b238113be35dfd6794e65029250d4b93caa0974ca572217745bdb/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3062d4ad53b36e17796dce1c0d6da0ad27a015c321e663657ba1cc7659cfc710", size = 1694943, upload-time = "2025-06-14T15:14:16.48Z" },
+    { url = "https://files.pythonhosted.org/packages/82/c8/0e56e8bf12081faca85d14a6929ad5c1263c146149cd66caa7bc12255b6d/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:8605e22d2a86b8e51ffb5253d9045ea73683d92d47c0b1438e11a359bdb94462", size = 1716398, upload-time = "2025-06-14T15:14:18.589Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/f3/33192b4761f7f9b2f7f4281365d925d663629cfaea093a64b658b94fc8e1/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:54fbbe6beafc2820de71ece2198458a711e224e116efefa01b7969f3e2b3ddae", size = 1657051, upload-time = "2025-06-14T15:14:20.223Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/0b/26ddd91ca8f84c48452431cb4c5dd9523b13bc0c9766bda468e072ac9e29/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:050bd277dfc3768b606fd4eae79dd58ceda67d8b0b3c565656a89ae34525d15e", size = 1736611, upload-time = "2025-06-14T15:14:21.988Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/8d/e04569aae853302648e2c138a680a6a2f02e374c5b6711732b29f1e129cc/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2637a60910b58f50f22379b6797466c3aa6ae28a6ab6404e09175ce4955b4e6a", size = 1764586, upload-time = "2025-06-14T15:14:23.979Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/98/c193c1d1198571d988454e4ed75adc21c55af247a9fda08236602921c8c8/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e986067357550d1aaa21cfe9897fa19e680110551518a5a7cf44e6c5638cb8b5", size = 1724197, upload-time = "2025-06-14T15:14:25.692Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/9e/07bb8aa11eec762c6b1ff61575eeeb2657df11ab3d3abfa528d95f3e9337/aiohttp-3.12.13-cp312-cp312-win32.whl", hash = "sha256:ac941a80aeea2aaae2875c9500861a3ba356f9ff17b9cb2dbfb5cbf91baaf5bf", size = 421771, upload-time = "2025-06-14T15:14:27.364Z" },
+    { url = "https://files.pythonhosted.org/packages/52/66/3ce877e56ec0813069cdc9607cd979575859c597b6fb9b4182c6d5f31886/aiohttp-3.12.13-cp312-cp312-win_amd64.whl", hash = "sha256:671f41e6146a749b6c81cb7fd07f5a8356d46febdaaaf07b0e774ff04830461e", size = 447869, upload-time = "2025-06-14T15:14:29.05Z" },
+    { url = "https://files.pythonhosted.org/packages/11/0f/db19abdf2d86aa1deec3c1e0e5ea46a587b97c07a16516b6438428b3a3f8/aiohttp-3.12.13-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d4a18e61f271127465bdb0e8ff36e8f02ac4a32a80d8927aa52371e93cd87938", size = 694910, upload-time = "2025-06-14T15:14:30.604Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/81/0ab551e1b5d7f1339e2d6eb482456ccbe9025605b28eed2b1c0203aaaade/aiohttp-3.12.13-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:532542cb48691179455fab429cdb0d558b5e5290b033b87478f2aa6af5d20ace", size = 472566, upload-time = "2025-06-14T15:14:32.275Z" },
+    { url = "https://files.pythonhosted.org/packages/34/3f/6b7d336663337672d29b1f82d1f252ec1a040fe2d548f709d3f90fa2218a/aiohttp-3.12.13-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d7eea18b52f23c050ae9db5d01f3d264ab08f09e7356d6f68e3f3ac2de9dfabb", size = 464856, upload-time = "2025-06-14T15:14:34.132Z" },
+    { url = "https://files.pythonhosted.org/packages/26/7f/32ca0f170496aa2ab9b812630fac0c2372c531b797e1deb3deb4cea904bd/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad7c8e5c25f2a26842a7c239de3f7b6bfb92304593ef997c04ac49fb703ff4d7", size = 1703683, upload-time = "2025-06-14T15:14:36.034Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/53/d5513624b33a811c0abea8461e30a732294112318276ce3dbf047dbd9d8b/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6af355b483e3fe9d7336d84539fef460120c2f6e50e06c658fe2907c69262d6b", size = 1684946, upload-time = "2025-06-14T15:14:38Z" },
+    { url = "https://files.pythonhosted.org/packages/37/72/4c237dd127827b0247dc138d3ebd49c2ded6114c6991bbe969058575f25f/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a95cf9f097498f35c88e3609f55bb47b28a5ef67f6888f4390b3d73e2bac6177", size = 1737017, upload-time = "2025-06-14T15:14:39.951Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/67/8a7eb3afa01e9d0acc26e1ef847c1a9111f8b42b82955fcd9faeb84edeb4/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8ed8c38a1c584fe99a475a8f60eefc0b682ea413a84c6ce769bb19a7ff1c5ef", size = 1786390, upload-time = "2025-06-14T15:14:42.151Z" },
+    { url = "https://files.pythonhosted.org/packages/48/19/0377df97dd0176ad23cd8cad4fd4232cfeadcec6c1b7f036315305c98e3f/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0b9170d5d800126b5bc89d3053a2363406d6e327afb6afaeda2d19ee8bb103", size = 1708719, upload-time = "2025-06-14T15:14:44.039Z" },
+    { url = "https://files.pythonhosted.org/packages/61/97/ade1982a5c642b45f3622255173e40c3eed289c169f89d00eeac29a89906/aiohttp-3.12.13-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:372feeace612ef8eb41f05ae014a92121a512bd5067db8f25101dd88a8db11da", size = 1622424, upload-time = "2025-06-14T15:14:45.945Z" },
+    { url = "https://files.pythonhosted.org/packages/99/ab/00ad3eea004e1d07ccc406e44cfe2b8da5acb72f8c66aeeb11a096798868/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a946d3702f7965d81f7af7ea8fb03bb33fe53d311df48a46eeca17e9e0beed2d", size = 1675447, upload-time = "2025-06-14T15:14:47.911Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/fe/74e5ce8b2ccaba445fe0087abc201bfd7259431d92ae608f684fcac5d143/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a0c4725fae86555bbb1d4082129e21de7264f4ab14baf735278c974785cd2041", size = 1707110, upload-time = "2025-06-14T15:14:50.334Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/c4/39af17807f694f7a267bd8ab1fbacf16ad66740862192a6c8abac2bff813/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9b28ea2f708234f0a5c44eb6c7d9eb63a148ce3252ba0140d050b091b6e842d1", size = 1649706, upload-time = "2025-06-14T15:14:52.378Z" },
+    { url = "https://files.pythonhosted.org/packages/38/e8/f5a0a5f44f19f171d8477059aa5f28a158d7d57fe1a46c553e231f698435/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d4f5becd2a5791829f79608c6f3dc745388162376f310eb9c142c985f9441cc1", size = 1725839, upload-time = "2025-06-14T15:14:54.617Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/ac/81acc594c7f529ef4419d3866913f628cd4fa9cab17f7bf410a5c3c04c53/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:60f2ce6b944e97649051d5f5cc0f439360690b73909230e107fd45a359d3e911", size = 1759311, upload-time = "2025-06-14T15:14:56.597Z" },
+    { url = "https://files.pythonhosted.org/packages/38/0d/aabe636bd25c6ab7b18825e5a97d40024da75152bec39aa6ac8b7a677630/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:69fc1909857401b67bf599c793f2183fbc4804717388b0b888f27f9929aa41f3", size = 1708202, upload-time = "2025-06-14T15:14:58.598Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/ab/561ef2d8a223261683fb95a6283ad0d36cb66c87503f3a7dde7afe208bb2/aiohttp-3.12.13-cp313-cp313-win32.whl", hash = "sha256:7d7e68787a2046b0e44ba5587aa723ce05d711e3a3665b6b7545328ac8e3c0dd", size = 420794, upload-time = "2025-06-14T15:15:00.939Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/47/b11d0089875a23bff0abd3edb5516bcd454db3fefab8604f5e4b07bd6210/aiohttp-3.12.13-cp313-cp313-win_amd64.whl", hash = "sha256:5a178390ca90419bfd41419a809688c368e63c86bd725e1186dd97f6b89c2706", size = 446735, upload-time = "2025-06-14T15:15:02.858Z" },
 ]
 
 [[package]]
@@ -148,11 +149,11 @@ wheels = [
 
 [[package]]
 name = "airportsdata"
-version = "20250523"
+version = "20250622"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4f/0d/72a1dcdf3c70ece43a4d8ce1337b1b8683f8c47ca9f7b1ba8355baa550e0/airportsdata-20250523.tar.gz", hash = "sha256:78e0eb72efccd63bda2decf1c6ec0a8e1d3ae8312764a85baa56496607c8f3de", size = 903156, upload-time = "2025-05-23T09:56:50.107Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/40/98/5f055f0d3fce23411948c30b6be359bb28821c4919069b2bea3f2af78d70/airportsdata-20250622.tar.gz", hash = "sha256:7adaa4cffdc6e8122d16a63e958ab1eb0b2e57e8c1bf0d10b8218f64067550e6", size = 903216, upload-time = "2025-06-22T06:55:49.743Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/72/b78feb161ae6c82c6fb45d0eb6cb1881bc04c05207646e1ecd936a0d6c35/airportsdata-20250523-py3-none-any.whl", hash = "sha256:88ce8a928ee45d650b5214a3b16273f0bf1d04a4494c78a216aea067e42a0233", size = 912679, upload-time = "2025-05-23T09:56:48.273Z" },
+    { url = "https://files.pythonhosted.org/packages/10/24/4daf2c931855ffd9c3fe8cbea133ca0689afaed9d8c0b04597f8e074d79e/airportsdata-20250622-py3-none-any.whl", hash = "sha256:80954c0109bb05fda7c745a1f7ed1d91c29c5fc196ce9b39aa6e8b43617bac4c", size = 912712, upload-time = "2025-06-22T06:55:47.639Z" },
 ]
 
 [[package]]
@@ -398,11 +399,11 @@ wheels = [
 
 [[package]]
 name = "certifi"
-version = "2025.4.26"
+version = "2025.6.15"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e8/9e/c05b3920a3b7d20d3d3310465f50348e5b3694f4f88c6daf736eef3024c4/certifi-2025.4.26.tar.gz", hash = "sha256:0a816057ea3cdefcef70270d2c515e4506bbc954f417fa5ade2021213bb8f0c6", size = 160705, upload-time = "2025-04-26T02:12:29.51Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/f7/f14b46d4bcd21092d7d3ccef689615220d8a08fb25e564b65d20738e672e/certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b", size = 158753, upload-time = "2025-06-15T02:45:51.329Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/7e/3db2bd1b1f9e95f7cddca6d6e75e2f2bd9f51b1246e546d88addca0106bd/certifi-2025.4.26-py3-none-any.whl", hash = "sha256:30350364dfe371162649852c63336a15c70c6510c2ad5015b21c2345311805f3", size = 159618, upload-time = "2025-04-26T02:12:27.662Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ae/320161bd181fc06471eed047ecce67b693fd7515b16d495d8932db763426/certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", size = 157650, upload-time = "2025-06-15T02:45:49.977Z" },
 ]
 
 [[package]]
@@ -590,44 +591,44 @@ wheels = [
 
 [[package]]
 name = "coverage"
-version = "7.8.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ba/07/998afa4a0ecdf9b1981ae05415dad2d4e7716e1b1f00abbd91691ac09ac9/coverage-7.8.2.tar.gz", hash = "sha256:a886d531373a1f6ff9fad2a2ba4a045b68467b779ae729ee0b3b10ac20033b27", size = 812759, upload-time = "2025-05-23T11:39:57.856Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8d/2a/1da1ada2e3044fcd4a3254fb3576e160b8fe5b36d705c8a31f793423f763/coverage-7.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e2f6fe3654468d061942591aef56686131335b7a8325684eda85dacdf311356c", size = 211876, upload-time = "2025-05-23T11:38:29.01Z" },
-    { url = "https://files.pythonhosted.org/packages/70/e9/3d715ffd5b6b17a8be80cd14a8917a002530a99943cc1939ad5bb2aa74b9/coverage-7.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76090fab50610798cc05241bf83b603477c40ee87acd358b66196ab0ca44ffa1", size = 212130, upload-time = "2025-05-23T11:38:30.675Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/02/fdce62bb3c21649abfd91fbdcf041fb99be0d728ff00f3f9d54d97ed683e/coverage-7.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2bd0a0a5054be160777a7920b731a0570284db5142abaaf81bcbb282b8d99279", size = 246176, upload-time = "2025-05-23T11:38:32.395Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/52/decbbed61e03b6ffe85cd0fea360a5e04a5a98a7423f292aae62423b8557/coverage-7.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da23ce9a3d356d0affe9c7036030b5c8f14556bd970c9b224f9c8205505e3b99", size = 243068, upload-time = "2025-05-23T11:38:33.989Z" },
-    { url = "https://files.pythonhosted.org/packages/38/6c/d0e9c0cce18faef79a52778219a3c6ee8e336437da8eddd4ab3dbd8fadff/coverage-7.8.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9392773cffeb8d7e042a7b15b82a414011e9d2b5fdbbd3f7e6a6b17d5e21b20", size = 245328, upload-time = "2025-05-23T11:38:35.568Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/70/f703b553a2f6b6c70568c7e398ed0789d47f953d67fbba36a327714a7bca/coverage-7.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:876cbfd0b09ce09d81585d266c07a32657beb3eaec896f39484b631555be0fe2", size = 245099, upload-time = "2025-05-23T11:38:37.627Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/fb/4cbb370dedae78460c3aacbdad9d249e853f3bc4ce5ff0e02b1983d03044/coverage-7.8.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3da9b771c98977a13fbc3830f6caa85cae6c9c83911d24cb2d218e9394259c57", size = 243314, upload-time = "2025-05-23T11:38:39.238Z" },
-    { url = "https://files.pythonhosted.org/packages/39/9f/1afbb2cb9c8699b8bc38afdce00a3b4644904e6a38c7bf9005386c9305ec/coverage-7.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9a990f6510b3292686713bfef26d0049cd63b9c7bb17e0864f133cbfd2e6167f", size = 244489, upload-time = "2025-05-23T11:38:40.845Z" },
-    { url = "https://files.pythonhosted.org/packages/79/fa/f3e7ec7d220bff14aba7a4786ae47043770cbdceeea1803083059c878837/coverage-7.8.2-cp312-cp312-win32.whl", hash = "sha256:bf8111cddd0f2b54d34e96613e7fbdd59a673f0cf5574b61134ae75b6f5a33b8", size = 214366, upload-time = "2025-05-23T11:38:43.551Z" },
-    { url = "https://files.pythonhosted.org/packages/54/aa/9cbeade19b7e8e853e7ffc261df885d66bf3a782c71cba06c17df271f9e6/coverage-7.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:86a323a275e9e44cdf228af9b71c5030861d4d2610886ab920d9945672a81223", size = 215165, upload-time = "2025-05-23T11:38:45.148Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/73/e2528bf1237d2448f882bbebaec5c3500ef07301816c5c63464b9da4d88a/coverage-7.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:820157de3a589e992689ffcda8639fbabb313b323d26388d02e154164c57b07f", size = 213548, upload-time = "2025-05-23T11:38:46.74Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/93/eb6400a745ad3b265bac36e8077fdffcf0268bdbbb6c02b7220b624c9b31/coverage-7.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ea561010914ec1c26ab4188aef8b1567272ef6de096312716f90e5baa79ef8ca", size = 211898, upload-time = "2025-05-23T11:38:49.066Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/7c/bdbf113f92683024406a1cd226a199e4200a2001fc85d6a6e7e299e60253/coverage-7.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cb86337a4fcdd0e598ff2caeb513ac604d2f3da6d53df2c8e368e07ee38e277d", size = 212171, upload-time = "2025-05-23T11:38:51.207Z" },
-    { url = "https://files.pythonhosted.org/packages/91/22/594513f9541a6b88eb0dba4d5da7d71596dadef6b17a12dc2c0e859818a9/coverage-7.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a4636ddb666971345541b59899e969f3b301143dd86b0ddbb570bd591f1e85", size = 245564, upload-time = "2025-05-23T11:38:52.857Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/f4/2860fd6abeebd9f2efcfe0fd376226938f22afc80c1943f363cd3c28421f/coverage-7.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5040536cf9b13fb033f76bcb5e1e5cb3b57c4807fef37db9e0ed129c6a094257", size = 242719, upload-time = "2025-05-23T11:38:54.529Z" },
-    { url = "https://files.pythonhosted.org/packages/89/60/f5f50f61b6332451520e6cdc2401700c48310c64bc2dd34027a47d6ab4ca/coverage-7.8.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc67994df9bcd7e0150a47ef41278b9e0a0ea187caba72414b71dc590b99a108", size = 244634, upload-time = "2025-05-23T11:38:57.326Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/70/7f4e919039ab7d944276c446b603eea84da29ebcf20984fb1fdf6e602028/coverage-7.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e6c86888fd076d9e0fe848af0a2142bf606044dc5ceee0aa9eddb56e26895a0", size = 244824, upload-time = "2025-05-23T11:38:59.421Z" },
-    { url = "https://files.pythonhosted.org/packages/26/45/36297a4c0cea4de2b2c442fe32f60c3991056c59cdc3cdd5346fbb995c97/coverage-7.8.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:684ca9f58119b8e26bef860db33524ae0365601492e86ba0b71d513f525e7050", size = 242872, upload-time = "2025-05-23T11:39:01.049Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/71/e041f1b9420f7b786b1367fa2a375703889ef376e0d48de9f5723fb35f11/coverage-7.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8165584ddedb49204c4e18da083913bdf6a982bfb558632a79bdaadcdafd0d48", size = 244179, upload-time = "2025-05-23T11:39:02.709Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/db/3c2bf49bdc9de76acf2491fc03130c4ffc51469ce2f6889d2640eb563d77/coverage-7.8.2-cp313-cp313-win32.whl", hash = "sha256:34759ee2c65362163699cc917bdb2a54114dd06d19bab860725f94ef45a3d9b7", size = 214393, upload-time = "2025-05-23T11:39:05.457Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/dc/947e75d47ebbb4b02d8babb1fad4ad381410d5bc9da7cfca80b7565ef401/coverage-7.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:2f9bc608fbafaee40eb60a9a53dbfb90f53cc66d3d32c2849dc27cf5638a21e3", size = 215194, upload-time = "2025-05-23T11:39:07.171Z" },
-    { url = "https://files.pythonhosted.org/packages/90/31/a980f7df8a37eaf0dc60f932507fda9656b3a03f0abf188474a0ea188d6d/coverage-7.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9fe449ee461a3b0c7105690419d0b0aba1232f4ff6d120a9e241e58a556733f7", size = 213580, upload-time = "2025-05-23T11:39:08.862Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/6a/25a37dd90f6c95f59355629417ebcb74e1c34e38bb1eddf6ca9b38b0fc53/coverage-7.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8369a7c8ef66bded2b6484053749ff220dbf83cba84f3398c84c51a6f748a008", size = 212734, upload-time = "2025-05-23T11:39:11.109Z" },
-    { url = "https://files.pythonhosted.org/packages/36/8b/3a728b3118988725f40950931abb09cd7f43b3c740f4640a59f1db60e372/coverage-7.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:159b81df53a5fcbc7d45dae3adad554fdbde9829a994e15227b3f9d816d00b36", size = 212959, upload-time = "2025-05-23T11:39:12.751Z" },
-    { url = "https://files.pythonhosted.org/packages/53/3c/212d94e6add3a3c3f412d664aee452045ca17a066def8b9421673e9482c4/coverage-7.8.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6fcbbd35a96192d042c691c9e0c49ef54bd7ed865846a3c9d624c30bb67ce46", size = 257024, upload-time = "2025-05-23T11:39:15.569Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/40/afc03f0883b1e51bbe804707aae62e29c4e8c8bbc365c75e3e4ddeee9ead/coverage-7.8.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05364b9cc82f138cc86128dc4e2e1251c2981a2218bfcd556fe6b0fbaa3501be", size = 252867, upload-time = "2025-05-23T11:39:17.64Z" },
-    { url = "https://files.pythonhosted.org/packages/18/a2/3699190e927b9439c6ded4998941a3c1d6fa99e14cb28d8536729537e307/coverage-7.8.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46d532db4e5ff3979ce47d18e2fe8ecad283eeb7367726da0e5ef88e4fe64740", size = 255096, upload-time = "2025-05-23T11:39:19.328Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/06/16e3598b9466456b718eb3e789457d1a5b8bfb22e23b6e8bbc307df5daf0/coverage-7.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4000a31c34932e7e4fa0381a3d6deb43dc0c8f458e3e7ea6502e6238e10be625", size = 256276, upload-time = "2025-05-23T11:39:21.077Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/d5/4b5a120d5d0223050a53d2783c049c311eea1709fa9de12d1c358e18b707/coverage-7.8.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:43ff5033d657cd51f83015c3b7a443287250dc14e69910577c3e03bd2e06f27b", size = 254478, upload-time = "2025-05-23T11:39:22.838Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/85/f9ecdb910ecdb282b121bfcaa32fa8ee8cbd7699f83330ee13ff9bbf1a85/coverage-7.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94316e13f0981cbbba132c1f9f365cac1d26716aaac130866ca812006f662199", size = 255255, upload-time = "2025-05-23T11:39:24.644Z" },
-    { url = "https://files.pythonhosted.org/packages/50/63/2d624ac7d7ccd4ebbd3c6a9eba9d7fc4491a1226071360d59dd84928ccb2/coverage-7.8.2-cp313-cp313t-win32.whl", hash = "sha256:3f5673888d3676d0a745c3d0e16da338c5eea300cb1f4ada9c872981265e76d8", size = 215109, upload-time = "2025-05-23T11:39:26.722Z" },
-    { url = "https://files.pythonhosted.org/packages/22/5e/7053b71462e970e869111c1853afd642212568a350eba796deefdfbd0770/coverage-7.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:2c08b05ee8d7861e45dc5a2cc4195c8c66dca5ac613144eb6ebeaff2d502e73d", size = 216268, upload-time = "2025-05-23T11:39:28.429Z" },
-    { url = "https://files.pythonhosted.org/packages/07/69/afa41aa34147655543dbe96994f8a246daf94b361ccf5edfd5df62ce066a/coverage-7.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:1e1448bb72b387755e1ff3ef1268a06617afd94188164960dba8d0245a46004b", size = 214071, upload-time = "2025-05-23T11:39:30.55Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/1a/0b9c32220ad694d66062f571cc5cedfa9997b64a591e8a500bb63de1bd40/coverage-7.8.2-py3-none-any.whl", hash = "sha256:726f32ee3713f7359696331a18daf0c3b3a70bb0ae71141b9d3c52be7c595e32", size = 203623, upload-time = "2025-05-23T11:39:53.846Z" },
+version = "7.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/e0/98670a80884f64578f0c22cd70c5e81a6e07b08167721c7487b4d70a7ca0/coverage-7.9.1.tar.gz", hash = "sha256:6cf43c78c4282708a28e466316935ec7489a9c487518a77fa68f716c67909cec", size = 813650, upload-time = "2025-06-13T13:02:28.627Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/68/d9/7f66eb0a8f2fce222de7bdc2046ec41cb31fe33fb55a330037833fb88afc/coverage-7.9.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8de12b4b87c20de895f10567639c0797b621b22897b0af3ce4b4e204a743626", size = 212336, upload-time = "2025-06-13T13:01:10.909Z" },
+    { url = "https://files.pythonhosted.org/packages/20/20/e07cb920ef3addf20f052ee3d54906e57407b6aeee3227a9c91eea38a665/coverage-7.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5add197315a054e92cee1b5f686a2bcba60c4c3e66ee3de77ace6c867bdee7cb", size = 212571, upload-time = "2025-06-13T13:01:12.518Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f8/96f155de7e9e248ca9c8ff1a40a521d944ba48bec65352da9be2463745bf/coverage-7.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600a1d4106fe66f41e5d0136dfbc68fe7200a5cbe85610ddf094f8f22e1b0300", size = 246377, upload-time = "2025-06-13T13:01:14.87Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/cf/1d783bd05b7bca5c10ded5f946068909372e94615a4416afadfe3f63492d/coverage-7.9.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a876e4c3e5a2a1715a6608906aa5a2e0475b9c0f68343c2ada98110512ab1d8", size = 243394, upload-time = "2025-06-13T13:01:16.23Z" },
+    { url = "https://files.pythonhosted.org/packages/02/dd/e7b20afd35b0a1abea09fb3998e1abc9f9bd953bee548f235aebd2b11401/coverage-7.9.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81f34346dd63010453922c8e628a52ea2d2ccd73cb2487f7700ac531b247c8a5", size = 245586, upload-time = "2025-06-13T13:01:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/38/b30b0006fea9d617d1cb8e43b1bc9a96af11eff42b87eb8c716cf4d37469/coverage-7.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:888f8eee13f2377ce86d44f338968eedec3291876b0b8a7289247ba52cb984cd", size = 245396, upload-time = "2025-06-13T13:01:19.164Z" },
+    { url = "https://files.pythonhosted.org/packages/31/e4/4d8ec1dc826e16791f3daf1b50943e8e7e1eb70e8efa7abb03936ff48418/coverage-7.9.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9969ef1e69b8c8e1e70d591f91bbc37fc9a3621e447525d1602801a24ceda898", size = 243577, upload-time = "2025-06-13T13:01:22.433Z" },
+    { url = "https://files.pythonhosted.org/packages/25/f4/b0e96c5c38e6e40ef465c4bc7f138863e2909c00e54a331da335faf0d81a/coverage-7.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:60c458224331ee3f1a5b472773e4a085cc27a86a0b48205409d364272d67140d", size = 244809, upload-time = "2025-06-13T13:01:24.143Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/65/27e0a1fa5e2e5079bdca4521be2f5dabf516f94e29a0defed35ac2382eb2/coverage-7.9.1-cp312-cp312-win32.whl", hash = "sha256:5f646a99a8c2b3ff4c6a6e081f78fad0dde275cd59f8f49dc4eab2e394332e74", size = 214724, upload-time = "2025-06-13T13:01:25.435Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/a8/d5b128633fd1a5e0401a4160d02fa15986209a9e47717174f99dc2f7166d/coverage-7.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:30f445f85c353090b83e552dcbbdad3ec84c7967e108c3ae54556ca69955563e", size = 215535, upload-time = "2025-06-13T13:01:27.861Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/37/84bba9d2afabc3611f3e4325ee2c6a47cd449b580d4a606b240ce5a6f9bf/coverage-7.9.1-cp312-cp312-win_arm64.whl", hash = "sha256:af41da5dca398d3474129c58cb2b106a5d93bbb196be0d307ac82311ca234342", size = 213904, upload-time = "2025-06-13T13:01:29.202Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/a7/a027970c991ca90f24e968999f7d509332daf6b8c3533d68633930aaebac/coverage-7.9.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:31324f18d5969feef7344a932c32428a2d1a3e50b15a6404e97cba1cc9b2c631", size = 212358, upload-time = "2025-06-13T13:01:30.909Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/48/6aaed3651ae83b231556750280682528fea8ac7f1232834573472d83e459/coverage-7.9.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0c804506d624e8a20fb3108764c52e0eef664e29d21692afa375e0dd98dc384f", size = 212620, upload-time = "2025-06-13T13:01:32.256Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/2a/f4b613f3b44d8b9f144847c89151992b2b6b79cbc506dee89ad0c35f209d/coverage-7.9.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef64c27bc40189f36fcc50c3fb8f16ccda73b6a0b80d9bd6e6ce4cffcd810bbd", size = 245788, upload-time = "2025-06-13T13:01:33.948Z" },
+    { url = "https://files.pythonhosted.org/packages/04/d2/de4fdc03af5e4e035ef420ed26a703c6ad3d7a07aff2e959eb84e3b19ca8/coverage-7.9.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4fe2348cc6ec372e25adec0219ee2334a68d2f5222e0cba9c0d613394e12d86", size = 243001, upload-time = "2025-06-13T13:01:35.285Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e8/eed18aa5583b0423ab7f04e34659e51101135c41cd1dcb33ac1d7013a6d6/coverage-7.9.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34ed2186fe52fcc24d4561041979a0dec69adae7bce2ae8d1c49eace13e55c43", size = 244985, upload-time = "2025-06-13T13:01:36.712Z" },
+    { url = "https://files.pythonhosted.org/packages/17/f8/ae9e5cce8885728c934eaa58ebfa8281d488ef2afa81c3dbc8ee9e6d80db/coverage-7.9.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:25308bd3d00d5eedd5ae7d4357161f4df743e3c0240fa773ee1b0f75e6c7c0f1", size = 245152, upload-time = "2025-06-13T13:01:39.303Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/c8/272c01ae792bb3af9b30fac14d71d63371db227980682836ec388e2c57c0/coverage-7.9.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:73e9439310f65d55a5a1e0564b48e34f5369bee943d72c88378f2d576f5a5751", size = 243123, upload-time = "2025-06-13T13:01:40.727Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d0/2819a1e3086143c094ab446e3bdf07138527a7b88cb235c488e78150ba7a/coverage-7.9.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:37ab6be0859141b53aa89412a82454b482c81cf750de4f29223d52268a86de67", size = 244506, upload-time = "2025-06-13T13:01:42.184Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/4e/9f6117b89152df7b6112f65c7a4ed1f2f5ec8e60c4be8f351d91e7acc848/coverage-7.9.1-cp313-cp313-win32.whl", hash = "sha256:64bdd969456e2d02a8b08aa047a92d269c7ac1f47e0c977675d550c9a0863643", size = 214766, upload-time = "2025-06-13T13:01:44.482Z" },
+    { url = "https://files.pythonhosted.org/packages/27/0f/4b59f7c93b52c2c4ce7387c5a4e135e49891bb3b7408dcc98fe44033bbe0/coverage-7.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:be9e3f68ca9edb897c2184ad0eee815c635565dbe7a0e7e814dc1f7cbab92c0a", size = 215568, upload-time = "2025-06-13T13:01:45.772Z" },
+    { url = "https://files.pythonhosted.org/packages/09/1e/9679826336f8c67b9c39a359352882b24a8a7aee48d4c9cad08d38d7510f/coverage-7.9.1-cp313-cp313-win_arm64.whl", hash = "sha256:1c503289ffef1d5105d91bbb4d62cbe4b14bec4d13ca225f9c73cde9bb46207d", size = 213939, upload-time = "2025-06-13T13:01:47.087Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/5b/5c6b4e7a407359a2e3b27bf9c8a7b658127975def62077d441b93a30dbe8/coverage-7.9.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0b3496922cb5f4215bf5caaef4cf12364a26b0be82e9ed6d050f3352cf2d7ef0", size = 213079, upload-time = "2025-06-13T13:01:48.554Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/22/1e2e07279fd2fd97ae26c01cc2186e2258850e9ec125ae87184225662e89/coverage-7.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9565c3ab1c93310569ec0d86b017f128f027cab0b622b7af288696d7ed43a16d", size = 213299, upload-time = "2025-06-13T13:01:49.997Z" },
+    { url = "https://files.pythonhosted.org/packages/14/c0/4c5125a4b69d66b8c85986d3321520f628756cf524af810baab0790c7647/coverage-7.9.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2241ad5dbf79ae1d9c08fe52b36d03ca122fb9ac6bca0f34439e99f8327ac89f", size = 256535, upload-time = "2025-06-13T13:01:51.314Z" },
+    { url = "https://files.pythonhosted.org/packages/81/8b/e36a04889dda9960be4263e95e777e7b46f1bb4fc32202612c130a20c4da/coverage-7.9.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bb5838701ca68b10ebc0937dbd0eb81974bac54447c55cd58dea5bca8451029", size = 252756, upload-time = "2025-06-13T13:01:54.403Z" },
+    { url = "https://files.pythonhosted.org/packages/98/82/be04eff8083a09a4622ecd0e1f31a2c563dbea3ed848069e7b0445043a70/coverage-7.9.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b30a25f814591a8c0c5372c11ac8967f669b97444c47fd794926e175c4047ece", size = 254912, upload-time = "2025-06-13T13:01:56.769Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/25/c26610a2c7f018508a5ab958e5b3202d900422cf7cdca7670b6b8ca4e8df/coverage-7.9.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2d04b16a6062516df97969f1ae7efd0de9c31eb6ebdceaa0d213b21c0ca1a683", size = 256144, upload-time = "2025-06-13T13:01:58.19Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/8b/fb9425c4684066c79e863f1e6e7ecebb49e3a64d9f7f7860ef1688c56f4a/coverage-7.9.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7931b9e249edefb07cd6ae10c702788546341d5fe44db5b6108a25da4dca513f", size = 254257, upload-time = "2025-06-13T13:01:59.645Z" },
+    { url = "https://files.pythonhosted.org/packages/93/df/27b882f54157fc1131e0e215b0da3b8d608d9b8ef79a045280118a8f98fe/coverage-7.9.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52e92b01041151bf607ee858e5a56c62d4b70f4dac85b8c8cb7fb8a351ab2c10", size = 255094, upload-time = "2025-06-13T13:02:01.37Z" },
+    { url = "https://files.pythonhosted.org/packages/41/5f/cad1c3dbed8b3ee9e16fa832afe365b4e3eeab1fb6edb65ebbf745eabc92/coverage-7.9.1-cp313-cp313t-win32.whl", hash = "sha256:684e2110ed84fd1ca5f40e89aa44adf1729dc85444004111aa01866507adf363", size = 215437, upload-time = "2025-06-13T13:02:02.905Z" },
+    { url = "https://files.pythonhosted.org/packages/99/4d/fad293bf081c0e43331ca745ff63673badc20afea2104b431cdd8c278b4c/coverage-7.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:437c576979e4db840539674e68c84b3cda82bc824dd138d56bead1435f1cb5d7", size = 216605, upload-time = "2025-06-13T13:02:05.638Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/56/4ee027d5965fc7fc126d7ec1187529cc30cc7d740846e1ecb5e92d31b224/coverage-7.9.1-cp313-cp313t-win_arm64.whl", hash = "sha256:18a0912944d70aaf5f399e350445738a1a20b50fbea788f640751c2ed9208b6c", size = 214392, upload-time = "2025-06-13T13:02:07.642Z" },
+    { url = "https://files.pythonhosted.org/packages/08/b8/7ddd1e8ba9701dea08ce22029917140e6f66a859427406579fd8d0ca7274/coverage-7.9.1-py3-none-any.whl", hash = "sha256:66b974b145aa189516b6bf2d8423e888b742517d37872f6ee4c5be0073bd9a3c", size = 204000, upload-time = "2025-06-13T13:02:27.173Z" },
 ]
 
 [[package]]
@@ -934,16 +935,16 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.115.12"
+version = "0.115.13"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f4/55/ae499352d82338331ca1e28c7f4a63bfd09479b16395dce38cf50a39e2c2/fastapi-0.115.12.tar.gz", hash = "sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681", size = 295236, upload-time = "2025-03-23T22:55:43.822Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/20/64/ec0788201b5554e2a87c49af26b77a4d132f807a0fa9675257ac92c6aa0e/fastapi-0.115.13.tar.gz", hash = "sha256:55d1d25c2e1e0a0a50aceb1c8705cd932def273c102bff0b1c1da88b3c6eb307", size = 295680, upload-time = "2025-06-17T11:49:45.575Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/50/b3/b51f09c2ba432a576fe63758bddc81f78f0c6309d9e5c10d194313bf021e/fastapi-0.115.12-py3-none-any.whl", hash = "sha256:e94613d6c05e27be7ffebdd6ea5f388112e5e430c8f7d6494a9d1d88d43e814d", size = 95164, upload-time = "2025-03-23T22:55:42.101Z" },
+    { url = "https://files.pythonhosted.org/packages/59/4a/e17764385382062b0edbb35a26b7cf76d71e27e456546277a42ba6545c6e/fastapi-0.115.13-py3-none-any.whl", hash = "sha256:0a0cab59afa7bab22f5eb347f8c9864b681558c278395e94035a741fc10cd865", size = 95315, upload-time = "2025-06-17T11:49:44.106Z" },
 ]
 
 [package.optional-dependencies]
@@ -1017,6 +1018,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" },
 ]
 
+[[package]]
+name = "flash-attn"
+version = "2.7.4.post1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "einops" },
+    { name = "ninja" },
+    { name = "psutil" },
+    { name = "setuptools" },
+    { name = "torch" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/11/34/9bf60e736ed7bbe15055ac2dab48ec67d9dbd088d2b4ae318fd77190ab4e/flash_attn-2.7.4.post1.tar.gz", hash = "sha256:f03485c9a49a4d68d0733acdcad80ab0e72afa025a777fdc2966ceccf9d51765", size = 5986610, upload-time = "2025-01-30T06:39:51.93Z" }
+
 [[package]]
 name = "flask"
 version = "3.1.1"
@@ -1051,27 +1065,27 @@ wheels = [
 
 [[package]]
 name = "fonttools"
-version = "4.58.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b6/a9/3319c6ae07fd9dde51064ddc6d82a2b707efad8ed407d700a01091121bbc/fonttools-4.58.2.tar.gz", hash = "sha256:4b491ddbfd50b856e84b0648b5f7941af918f6d32f938f18e62b58426a8d50e2", size = 3524285, upload-time = "2025-06-06T14:50:58.643Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/68/7ec64584dc592faf944d540307c3562cd893256c48bb028c90de489e4750/fonttools-4.58.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c6eeaed9c54c1d33c1db928eb92b4e180c7cb93b50b1ee3e79b2395cb01f25e9", size = 2741645, upload-time = "2025-06-06T14:50:08.706Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/0c/b327838f63baa7ebdd6db3ffdf5aff638e883f9236d928be4f32c692e1bd/fonttools-4.58.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bbe1d9c72b7f981bed5c2a61443d5e3127c1b3aca28ca76386d1ad93268a803f", size = 2311100, upload-time = "2025-06-06T14:50:10.401Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/c7/dec024a1c873c79a4db98fe0104755fa62ec2b4518e09d6fda28246c3c9b/fonttools-4.58.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85babe5b3ce2cbe57fc0d09c0ee92bbd4d594fd7ea46a65eb43510a74a4ce773", size = 4815841, upload-time = "2025-06-06T14:50:12.496Z" },
-    { url = "https://files.pythonhosted.org/packages/94/33/57c81abad641d6ec9c8b06c99cd28d687cb4849efb6168625b5c6b8f9fa4/fonttools-4.58.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:918a2854537fcdc662938057ad58b633bc9e0698f04a2f4894258213283a7932", size = 4882659, upload-time = "2025-06-06T14:50:14.361Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/37/2f8faa2bf8bd1ba016ea86a94c72a5e8ef8ea1c52ec64dada617191f0515/fonttools-4.58.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3b379cf05bf776c336a0205632596b1c7d7ab5f7135e3935f2ca2a0596d2d092", size = 4876128, upload-time = "2025-06-06T14:50:16.653Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/ca/f1caac24ae7028a33f2a95e66c640571ff0ce5cb06c4c9ca1f632e98e22c/fonttools-4.58.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:99ab3547a15a5d168c265e139e21756bbae1de04782ac9445c9ef61b8c0a32ce", size = 5027843, upload-time = "2025-06-06T14:50:18.582Z" },
-    { url = "https://files.pythonhosted.org/packages/52/6e/3200fa2bafeed748a3017e4e6594751fd50cce544270919265451b21b75c/fonttools-4.58.2-cp312-cp312-win32.whl", hash = "sha256:6764e7a3188ce36eea37b477cdeca602ae62e63ae9fc768ebc176518072deb04", size = 2177374, upload-time = "2025-06-06T14:50:20.454Z" },
-    { url = "https://files.pythonhosted.org/packages/55/ab/8f3e726f3f3ef3062ce9bbb615727c55beb11eea96d1f443f79cafca93ee/fonttools-4.58.2-cp312-cp312-win_amd64.whl", hash = "sha256:41f02182a1d41b79bae93c1551855146868b04ec3e7f9c57d6fef41a124e6b29", size = 2226685, upload-time = "2025-06-06T14:50:22.087Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/01/29f81970a508408af20b434ff5136cd1c7ef92198957eb8ddadfbb9ef177/fonttools-4.58.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:829048ef29dbefec35d95cc6811014720371c95bdc6ceb0afd2f8e407c41697c", size = 2732398, upload-time = "2025-06-06T14:50:23.821Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/f1/095f2338359333adb2f1c51b8b2ad94bf9a2fa17e5fcbdf8a7b8e3672d2d/fonttools-4.58.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:64998c5993431e45b474ed5f579f18555f45309dd1cf8008b594d2fe0a94be59", size = 2306390, upload-time = "2025-06-06T14:50:25.942Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/d4/9eba134c7666a26668c28945355cd86e5d57828b6b8d952a5489fe45d7e2/fonttools-4.58.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b887a1cf9fbcb920980460ee4a489c8aba7e81341f6cdaeefa08c0ab6529591c", size = 4795100, upload-time = "2025-06-06T14:50:27.653Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/34/345f153a24c1340daa62340c3be2d1e5ee6c1ee57e13f6d15613209e688b/fonttools-4.58.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27d74b9f6970cefbcda33609a3bee1618e5e57176c8b972134c4e22461b9c791", size = 4864585, upload-time = "2025-06-06T14:50:29.915Z" },
-    { url = "https://files.pythonhosted.org/packages/01/5f/091979a25c9a6c4ba064716cfdfe9431f78ed6ffba4bd05ae01eee3532e9/fonttools-4.58.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec26784610056a770e15a60f9920cee26ae10d44d1e43271ea652dadf4e7a236", size = 4866191, upload-time = "2025-06-06T14:50:32.188Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/09/3944d0ece4a39560918cba37c2e0453a5f826b665a6db0b43abbd9dbe7e1/fonttools-4.58.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ed0a71d57dd427c0fb89febd08cac9b925284d2a8888e982a6c04714b82698d7", size = 5003867, upload-time = "2025-06-06T14:50:34.323Z" },
-    { url = "https://files.pythonhosted.org/packages/68/97/190b8f9ba22f8b7d07df2faa9fd7087b453776d0705d3cb5b0cbd89b8ef0/fonttools-4.58.2-cp313-cp313-win32.whl", hash = "sha256:994e362b01460aa863ef0cb41a29880bc1a498c546952df465deff7abf75587a", size = 2175688, upload-time = "2025-06-06T14:50:36.211Z" },
-    { url = "https://files.pythonhosted.org/packages/94/ea/0e6d4a39528dbb6e0f908c2ad219975be0a506ed440fddf5453b90f76981/fonttools-4.58.2-cp313-cp313-win_amd64.whl", hash = "sha256:f95dec862d7c395f2d4efe0535d9bdaf1e3811e51b86432fa2a77e73f8195756", size = 2226464, upload-time = "2025-06-06T14:50:38.862Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/e5/c1cb8ebabb80be76d4d28995da9416816653f8f572920ab5e3d2e3ac8285/fonttools-4.58.2-py3-none-any.whl", hash = "sha256:84f4b0bcfa046254a65ee7117094b4907e22dc98097a220ef108030eb3c15596", size = 1114597, upload-time = "2025-06-06T14:50:56.619Z" },
+version = "4.58.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/5a/1124b2c8cb3a8015faf552e92714040bcdbc145dfa29928891b02d147a18/fonttools-4.58.4.tar.gz", hash = "sha256:928a8009b9884ed3aae17724b960987575155ca23c6f0b8146e400cc9e0d44ba", size = 3525026, upload-time = "2025-06-13T17:25:15.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/3c/1d1792bfe91ef46f22a3d23b4deb514c325e73c17d4f196b385b5e2faf1c/fonttools-4.58.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:462211c0f37a278494e74267a994f6be9a2023d0557aaa9ecbcbfce0f403b5a6", size = 2754082, upload-time = "2025-06-13T17:24:24.862Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/1f/2b261689c901a1c3bc57a6690b0b9fc21a9a93a8b0c83aae911d3149f34e/fonttools-4.58.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0c7a12fb6f769165547f00fcaa8d0df9517603ae7e04b625e5acb8639809b82d", size = 2321677, upload-time = "2025-06-13T17:24:26.815Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/6b/4607add1755a1e6581ae1fc0c9a640648e0d9cdd6591cc2d581c2e07b8c3/fonttools-4.58.4-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2d42c63020a922154add0a326388a60a55504629edc3274bc273cd3806b4659f", size = 4896354, upload-time = "2025-06-13T17:24:28.428Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/95/34b4f483643d0cb11a1f830b72c03fdd18dbd3792d77a2eb2e130a96fada/fonttools-4.58.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f2b4e6fd45edc6805f5f2c355590b092ffc7e10a945bd6a569fc66c1d2ae7aa", size = 4941633, upload-time = "2025-06-13T17:24:30.568Z" },
+    { url = "https://files.pythonhosted.org/packages/81/ac/9bafbdb7694059c960de523e643fa5a61dd2f698f3f72c0ca18ae99257c7/fonttools-4.58.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f155b927f6efb1213a79334e4cb9904d1e18973376ffc17a0d7cd43d31981f1e", size = 4886170, upload-time = "2025-06-13T17:24:32.724Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/44/a3a3b70d5709405f7525bb7cb497b4e46151e0c02e3c8a0e40e5e9fe030b/fonttools-4.58.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e38f687d5de97c7fb7da3e58169fb5ba349e464e141f83c3c2e2beb91d317816", size = 5037851, upload-time = "2025-06-13T17:24:35.034Z" },
+    { url = "https://files.pythonhosted.org/packages/21/cb/e8923d197c78969454eb876a4a55a07b59c9c4c46598f02b02411dc3b45c/fonttools-4.58.4-cp312-cp312-win32.whl", hash = "sha256:636c073b4da9db053aa683db99580cac0f7c213a953b678f69acbca3443c12cc", size = 2187428, upload-time = "2025-06-13T17:24:36.996Z" },
+    { url = "https://files.pythonhosted.org/packages/46/e6/fe50183b1a0e1018e7487ee740fa8bb127b9f5075a41e20d017201e8ab14/fonttools-4.58.4-cp312-cp312-win_amd64.whl", hash = "sha256:82e8470535743409b30913ba2822e20077acf9ea70acec40b10fcf5671dceb58", size = 2236649, upload-time = "2025-06-13T17:24:38.985Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/4f/c05cab5fc1a4293e6bc535c6cb272607155a0517700f5418a4165b7f9ec8/fonttools-4.58.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5f4a64846495c543796fa59b90b7a7a9dff6839bd852741ab35a71994d685c6d", size = 2745197, upload-time = "2025-06-13T17:24:40.645Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d3/49211b1f96ae49308f4f78ca7664742377a6867f00f704cdb31b57e4b432/fonttools-4.58.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e80661793a5d4d7ad132a2aa1eae2e160fbdbb50831a0edf37c7c63b2ed36574", size = 2317272, upload-time = "2025-06-13T17:24:43.428Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/11/c9972e46a6abd752a40a46960e431c795ad1f306775fc1f9e8c3081a1274/fonttools-4.58.4-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fe5807fc64e4ba5130f1974c045a6e8d795f3b7fb6debfa511d1773290dbb76b", size = 4877184, upload-time = "2025-06-13T17:24:45.527Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/24/5017c01c9ef8df572cc9eaf9f12be83ad8ed722ff6dc67991d3d752956e4/fonttools-4.58.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b610b9bef841cb8f4b50472494158b1e347d15cad56eac414c722eda695a6cfd", size = 4939445, upload-time = "2025-06-13T17:24:47.647Z" },
+    { url = "https://files.pythonhosted.org/packages/79/b0/538cc4d0284b5a8826b4abed93a69db52e358525d4b55c47c8cef3669767/fonttools-4.58.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2daa7f0e213c38f05f054eb5e1730bd0424aebddbeac094489ea1585807dd187", size = 4878800, upload-time = "2025-06-13T17:24:49.766Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/9b/a891446b7a8250e65bffceb248508587958a94db467ffd33972723ab86c9/fonttools-4.58.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:66cccb6c0b944496b7f26450e9a66e997739c513ffaac728d24930df2fd9d35b", size = 5021259, upload-time = "2025-06-13T17:24:51.754Z" },
+    { url = "https://files.pythonhosted.org/packages/17/b2/c4d2872cff3ace3ddd1388bf15b76a1d8d5313f0a61f234e9aed287e674d/fonttools-4.58.4-cp313-cp313-win32.whl", hash = "sha256:94d2aebb5ca59a5107825520fde596e344652c1f18170ef01dacbe48fa60c889", size = 2185824, upload-time = "2025-06-13T17:24:54.324Z" },
+    { url = "https://files.pythonhosted.org/packages/98/57/cddf8bcc911d4f47dfca1956c1e3aeeb9f7c9b8e88b2a312fe8c22714e0b/fonttools-4.58.4-cp313-cp313-win_amd64.whl", hash = "sha256:b554bd6e80bba582fd326ddab296e563c20c64dca816d5e30489760e0c41529f", size = 2236382, upload-time = "2025-06-13T17:24:56.291Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/2f/c536b5b9bb3c071e91d536a4d11f969e911dbb6b227939f4c5b0bca090df/fonttools-4.58.4-py3-none-any.whl", hash = "sha256:a10ce13a13f26cbb9f37512a4346bb437ad7e002ff6fa966a7ce7ff5ac3528bd", size = 1114660, upload-time = "2025-06-13T17:25:13.321Z" },
 ]
 
 [[package]]
@@ -1150,17 +1164,16 @@ http = [
 
 [[package]]
 name = "gguf"
-version = "0.17.0"
+version = "0.17.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
     { name = "pyyaml" },
-    { name = "sentencepiece" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/93/10/33f386df10ca5b65f0f336c54affd047ace12975eebbe308f9b0eb2d9c36/gguf-0.17.0.tar.gz", hash = "sha256:e3f88278e6f6778e0348fbc97313a4a2f8af63b08fe25dc381251d9c611dae03", size = 88950, upload-time = "2025-05-29T13:44:05.595Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/08/08/7de1ca4b71e7bf33b547f82bb22505e221b5fa42f67d635e200e0ad22ad6/gguf-0.17.1.tar.gz", hash = "sha256:36ad71aad900a3e75fc94ebe96ea6029f03a4e44be7627ef7ad3d03e8c7bcb53", size = 89338, upload-time = "2025-06-19T14:00:33.705Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/41/5f/0a703bd54e78ddaffcadcb0c5f903236757f993c536bae53dd8600404e94/gguf-0.17.0-py3-none-any.whl", hash = "sha256:52f2759c6e0ab3d228d4d44f871e3eb140004712c31aed72e2ae82f61aa5aa05", size = 95798, upload-time = "2025-05-29T13:44:04.005Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/31/6a93a887617ee7deeaa602ca3d02d1c12a6cb8a742a695de5d128f5fa46a/gguf-0.17.1-py3-none-any.whl", hash = "sha256:7bc5aa7eeb1931f7d39b48fdc5b38fda6b294b9dca75cf607ac69557840a3943", size = 96224, upload-time = "2025-06-19T14:00:32.88Z" },
 ]
 
 [[package]]
@@ -1189,7 +1202,7 @@ wheels = [
 
 [[package]]
 name = "google-api-core"
-version = "2.25.0"
+version = "2.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "google-auth" },
@@ -1198,9 +1211,9 @@ dependencies = [
     { name = "protobuf" },
     { name = "requests" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/98/a2/8176b416ca08106b2ae30cd4a006c8176945f682c3a5b42f141c9173f505/google_api_core-2.25.0.tar.gz", hash = "sha256:9b548e688702f82a34ed8409fb8a6961166f0b7795032f0be8f48308dff4333a", size = 164914, upload-time = "2025-06-02T14:45:34.789Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/21/e9d043e88222317afdbdb567165fdbc3b0aad90064c7e0c9eb0ad9955ad8/google_api_core-2.25.1.tar.gz", hash = "sha256:d2aaa0b13c78c61cb3f4282c464c046e45fbd75755683c9c525e6e8f7ed0a5e8", size = 165443, upload-time = "2025-06-12T20:52:20.439Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ac/ca/149e41a277bb0855e8ded85fd7579d7747c1223e253d82c5c0f1be236875/google_api_core-2.25.0-py3-none-any.whl", hash = "sha256:1db79d1281dcf9f3d10023283299ba38f3dc9f639ec41085968fd23e5bcf512e", size = 160668, upload-time = "2025-06-02T14:45:33.272Z" },
+    { url = "https://files.pythonhosted.org/packages/14/4b/ead00905132820b623732b175d66354e9d3e69fcf2a5dcdab780664e7896/google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7", size = 160807, upload-time = "2025-06-12T20:52:19.334Z" },
 ]
 
 [[package]]
@@ -1231,11 +1244,11 @@ wheels = [
 
 [[package]]
 name = "graphviz"
-version = "0.20.3"
+version = "0.21"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fa/83/5a40d19b8347f017e417710907f824915fba411a9befd092e52746b63e9f/graphviz-0.20.3.zip", hash = "sha256:09d6bc81e6a9fa392e7ba52135a9d49f1ed62526f96499325930e87ca1b5925d", size = 256455, upload-time = "2024-03-21T07:50:45.772Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/b3/3ac91e9be6b761a4b30d66ff165e54439dcd48b83f4e20d644867215f6ca/graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78", size = 200434, upload-time = "2025-06-15T09:35:05.824Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/be/d59db2d1d52697c6adc9eacaf50e8965b6345cc143f671e1ed068818d5cf/graphviz-0.20.3-py3-none-any.whl", hash = "sha256:81f848f2904515d8cd359cc611faba817598d2feaac4027b266aa3eda7b3dde5", size = 47126, upload-time = "2024-03-21T07:50:43.091Z" },
+    { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" },
 ]
 
 [[package]]
@@ -1313,17 +1326,17 @@ wheels = [
 
 [[package]]
 name = "hf-xet"
-version = "1.1.3"
+version = "1.1.5"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/75/dc/dc091aeeb671e71cbec30e84963f9c0202c17337b24b0a800e7d205543e8/hf_xet-1.1.3.tar.gz", hash = "sha256:a5f09b1dd24e6ff6bcedb4b0ddab2d81824098bb002cf8b4ffa780545fa348c3", size = 488127, upload-time = "2025-06-04T00:47:27.456Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ed/d4/7685999e85945ed0d7f0762b686ae7015035390de1161dcea9d5276c134c/hf_xet-1.1.5.tar.gz", hash = "sha256:69ebbcfd9ec44fdc2af73441619eeb06b94ee34511bbcf57cd423820090f5694", size = 495969, upload-time = "2025-06-20T21:48:38.007Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9b/1f/bc01a4c0894973adebbcd4aa338a06815c76333ebb3921d94dcbd40dae6a/hf_xet-1.1.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c3b508b5f583a75641aebf732853deb058953370ce8184f5dabc49f803b0819b", size = 2256929, upload-time = "2025-06-04T00:47:21.206Z" },
-    { url = "https://files.pythonhosted.org/packages/78/07/6ef50851b5c6b45b77a6e018fa299c69a2db3b8bbd0d5af594c0238b1ceb/hf_xet-1.1.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:b788a61977fbe6b5186e66239e2a329a3f0b7e7ff50dad38984c0c74f44aeca1", size = 2153719, upload-time = "2025-06-04T00:47:19.302Z" },
-    { url = "https://files.pythonhosted.org/packages/52/48/e929e6e3db6e4758c2adf0f2ca2c59287f1b76229d8bdc1a4c9cfc05212e/hf_xet-1.1.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd2da210856444a34aad8ada2fc12f70dabed7cc20f37e90754d1d9b43bc0534", size = 4820519, upload-time = "2025-06-04T00:47:17.244Z" },
-    { url = "https://files.pythonhosted.org/packages/28/2e/03f89c5014a5aafaa9b150655f811798a317036646623bdaace25f485ae8/hf_xet-1.1.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8203f52827e3df65981984936654a5b390566336956f65765a8aa58c362bb841", size = 4964121, upload-time = "2025-06-04T00:47:15.17Z" },
-    { url = "https://files.pythonhosted.org/packages/47/8b/5cd399a92b47d98086f55fc72d69bc9ea5e5c6f27a9ed3e0cdd6be4e58a3/hf_xet-1.1.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:30c575a5306f8e6fda37edb866762140a435037365eba7a17ce7bd0bc0216a8b", size = 5283017, upload-time = "2025-06-04T00:47:23.239Z" },
-    { url = "https://files.pythonhosted.org/packages/53/e3/2fcec58d2fcfd25ff07feb876f466cfa11f8dcf9d3b742c07fe9dd51ee0a/hf_xet-1.1.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7c1a6aa6abed1f696f8099aa9796ca04c9ee778a58728a115607de9cc4638ff1", size = 4970349, upload-time = "2025-06-04T00:47:25.383Z" },
-    { url = "https://files.pythonhosted.org/packages/53/bf/10ca917e335861101017ff46044c90e517b574fbb37219347b83be1952f6/hf_xet-1.1.3-cp37-abi3-win_amd64.whl", hash = "sha256:b578ae5ac9c056296bb0df9d018e597c8dc6390c5266f35b5c44696003cde9f3", size = 2310934, upload-time = "2025-06-04T00:47:29.632Z" },
+    { url = "https://files.pythonhosted.org/packages/00/89/a1119eebe2836cb25758e7661d6410d3eae982e2b5e974bcc4d250be9012/hf_xet-1.1.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f52c2fa3635b8c37c7764d8796dfa72706cc4eded19d638331161e82b0792e23", size = 2687929, upload-time = "2025-06-20T21:48:32.284Z" },
+    { url = "https://files.pythonhosted.org/packages/de/5f/2c78e28f309396e71ec8e4e9304a6483dcbc36172b5cea8f291994163425/hf_xet-1.1.5-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:9fa6e3ee5d61912c4a113e0708eaaef987047616465ac7aa30f7121a48fc1af8", size = 2556338, upload-time = "2025-06-20T21:48:30.079Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/2f/6cad7b5fe86b7652579346cb7f85156c11761df26435651cbba89376cd2c/hf_xet-1.1.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc874b5c843e642f45fd85cda1ce599e123308ad2901ead23d3510a47ff506d1", size = 3102894, upload-time = "2025-06-20T21:48:28.114Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/54/0fcf2b619720a26fbb6cc941e89f2472a522cd963a776c089b189559447f/hf_xet-1.1.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dbba1660e5d810bd0ea77c511a99e9242d920790d0e63c0e4673ed36c4022d18", size = 3002134, upload-time = "2025-06-20T21:48:25.906Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/92/1d351ac6cef7c4ba8c85744d37ffbfac2d53d0a6c04d2cabeba614640a78/hf_xet-1.1.5-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ab34c4c3104133c495785d5d8bba3b1efc99de52c02e759cf711a91fd39d3a14", size = 3171009, upload-time = "2025-06-20T21:48:33.987Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/65/4b2ddb0e3e983f2508528eb4501288ae2f84963586fbdfae596836d5e57a/hf_xet-1.1.5-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:83088ecea236d5113de478acb2339f92c95b4fb0462acaa30621fac02f5a534a", size = 3279245, upload-time = "2025-06-20T21:48:36.051Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/55/ef77a85ee443ae05a9e9cba1c9f0dd9241eb42da2aeba1dc50f51154c81a/hf_xet-1.1.5-cp37-abi3-win_amd64.whl", hash = "sha256:73e167d9807d166596b4b2f0b585c6d5bd84a26dea32843665a8b58f6edba245", size = 2738931, upload-time = "2025-06-20T21:48:39.482Z" },
 ]
 
 [[package]]
@@ -1378,7 +1391,7 @@ wheels = [
 
 [[package]]
 name = "huggingface-hub"
-version = "0.32.5"
+version = "0.33.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -1390,9 +1403,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f5/96/179f323578414b6fcc6aa254ecc4d27dc87ded90a271b147c634cf860784/huggingface_hub-0.32.5.tar.gz", hash = "sha256:8328f848218e3212647cec77eab9fdfc2590e8117d979b925439bc01042a20de", size = 425011, upload-time = "2025-06-10T16:03:12.527Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/8a/1362d565fefabaa4185cf3ae842a98dbc5b35146f5694f7080f043a6952f/huggingface_hub-0.33.0.tar.gz", hash = "sha256:aa31f70d29439d00ff7a33837c03f1f9dd83971ce4e29ad664d63ffb17d3bb97", size = 426179, upload-time = "2025-06-11T17:08:07.913Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/6e/36c929e105c9ff57a0a792780deaadf2c02f055f65ef96dc9cd597be9f23/huggingface_hub-0.32.5-py3-none-any.whl", hash = "sha256:6df8d5f42034a1b61daac60eed04acf348d337a4bd83aa448d4235cfb003e379", size = 512792, upload-time = "2025-06-10T16:03:10.106Z" },
+    { url = "https://files.pythonhosted.org/packages/33/fb/53587a89fbc00799e4179796f51b3ad713c5de6bb680b2becb6d37c94649/huggingface_hub-0.33.0-py3-none-any.whl", hash = "sha256:e8668875b40c68f9929150d99727d39e5ebb8a05a98e4191b908dc7ded9074b3", size = 514799, upload-time = "2025-06-11T17:08:05.757Z" },
 ]
 
 [package.optional-dependencies]
@@ -1714,49 +1727,49 @@ wheels = [
 
 [[package]]
 name = "libcst"
-version = "1.8.1"
+version = "1.8.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pyyaml", marker = "python_full_version < '3.13'" },
     { name = "pyyaml-ft", marker = "python_full_version >= '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d6/10/b4e1bb190078865a6957dbe8e5347f33ad8da96e21e4a65cd49cc4d35965/libcst-1.8.1.tar.gz", hash = "sha256:423427819409a1d905017bbd51062bd0f1e4795c74c2f9f52a6b63dd67c282d2", size = 779211, upload-time = "2025-06-10T16:48:36.189Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/d3/aad6b7f828cc30954c67950bdcb737c9aef5bf832962a0231720ba3358ba/libcst-1.8.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b45eb7b914c5a929d5501a88e94ea676dd26e43d47a232b5bafa7cebb7f3a5a5", size = 2185372, upload-time = "2025-06-10T16:47:28.588Z" },
-    { url = "https://files.pythonhosted.org/packages/65/eb/4bf97e88b0b83c6618306668c11772b17093e982c008e8a70aa07d4172d0/libcst-1.8.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ff683bc50d543daca39b9db27af2406a1626fbadd9cc288605eff3915553e98", size = 2073096, upload-time = "2025-06-10T16:47:30.037Z" },
-    { url = "https://files.pythonhosted.org/packages/05/3f/364cb2809d343bf4bd5dcadac986faeb4a527b999e21fb4ebefc36e0ca19/libcst-1.8.1-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:02388998631bf53590185518cb99a1bd9de3178e16354cee10e6ae00cc027919", size = 2401830, upload-time = "2025-06-10T16:47:32.152Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/fe/3c2c9c9c032c926ec974682557259654efe6200d88a3781fa89c565c659a/libcst-1.8.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f17c68a1d2d6ed44169244cc3b933b382cee709aac524c1b44cefe08205d09fc", size = 2220162, upload-time = "2025-06-10T16:47:33.673Z" },
-    { url = "https://files.pythonhosted.org/packages/07/4b/6659aef66c311f45f30c313b354f41d469cb0ea135bd8a8ea4a40f613efa/libcst-1.8.1-cp312-cp312-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f7fb2d40918d1c7c0415b4384071d7f749e87459e78b40f61d2df33c4c9bf7d1", size = 2192329, upload-time = "2025-06-10T16:47:35.137Z" },
-    { url = "https://files.pythonhosted.org/packages/85/42/6d64a23c36f391bc91cd9d5fc59e424100e99d72fa93215d73d1439a6a92/libcst-1.8.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:026ed27b3b9747990a5ed6c8d3aa0d49b119ba71ce53824faa407bec3bc4b167", size = 2312737, upload-time = "2025-06-10T16:47:36.586Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/b5/1ead67d976ff91f68ba79b66a710598616e946ee4384654def8d6fd30c96/libcst-1.8.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e5a0deda3af3abb811b52043630a1ad0bc4b571b091478beae0bb13812632b0a", size = 2281881, upload-time = "2025-06-10T16:47:38.199Z" },
-    { url = "https://files.pythonhosted.org/packages/af/d5/0507a56dcc09a0a8f2146a626270c42a4e66433ed53aa9eb5c48f1733995/libcst-1.8.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0041cd15f2f203a4fe1702217fcc639ac6f8b380d2b19c5a08cf2f229e3985c1", size = 2391445, upload-time = "2025-06-10T16:47:39.744Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/fc/b8e3de1419017a0947cabcf21ba4b9f57f61e756842e52328b545adff5a0/libcst-1.8.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e6c5ca26b83b74f6438c10af99124c9fce3f7af92a05d164fd5ebaaeaa610ad", size = 2095432, upload-time = "2025-06-10T16:47:41.364Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/7e/1bcd3c43c25ee8473a82b4f6ebb8856169df48daf065383c2d3821dd7fd0/libcst-1.8.1-cp312-cp312-win_arm64.whl", hash = "sha256:4de6f247dbeb34b1f701959fd210196bfd839798b8161b566343431bc9c4d5e1", size = 1984950, upload-time = "2025-06-10T16:47:42.951Z" },
-    { url = "https://files.pythonhosted.org/packages/64/46/495aa1a73a528b675864de0fc00409cf90f23c827368f59c241e66f9cba4/libcst-1.8.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:260e277139f10c087b56b1c7d26fafa9c02c9e53ea993e8ef36b4da5b06dcaaf", size = 2185406, upload-time = "2025-06-10T16:47:44.482Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/a0/d41ff315720ad638a776f6c4f937255231a2479b63ddb8df28cd0a4b49aa/libcst-1.8.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a0c1fc051a223be5bc1537f2d8a8dfd6f073184362a2ffb2576127c0dd2bd8b8", size = 2073226, upload-time = "2025-06-10T16:47:46.155Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/5f/b55aa60ee503e9bfcc57e2a28d432e972612e8b26c5d1f0f3fcaba7cbc2c/libcst-1.8.1-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:484b734df8089ddc85c102482323693ff255cd5fd509703742bd3866a2c0254b", size = 2402332, upload-time = "2025-06-10T16:47:47.644Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/54/0660c228d55dd451ccaa74d2810e9053ea8401efd64d35b70d9436063326/libcst-1.8.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:69d4c34b254dfebe00e0bfdee787341a40f351450a451c37d7fa09c2e04dd45b", size = 2220796, upload-time = "2025-06-10T16:47:49.18Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/3f/cc31abb0c6c7ec91d9e4cfb32d99b485501e0698742b27d2474363fff9e5/libcst-1.8.1-cp313-cp313-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:61ddc308e70514d515500a08a25a3d6c877a8a9f5811862ce010c917550a9dfa", size = 2192230, upload-time = "2025-06-10T16:47:50.674Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/b4/ca3152cc01beb627d8ec3e56af9104531f85309e4a8fd8332feb7a0da828/libcst-1.8.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:381f2cda8daa7881700cabcb3689c16147e98964ae7be6b3a6e86a87a844f311", size = 2312833, upload-time = "2025-06-10T16:47:52.659Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/08/0675e9006990052e53e838376d794ef2a7d0ce8f7bcd51a3a45e9341b1aa/libcst-1.8.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2f985f311062712a2ebb8b87a261c46182286abfed7125f4f95588e715c13007", size = 2282214, upload-time = "2025-06-10T16:47:54.122Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/54/64898ae69ed267ff2ed2e8f1d583370374338cc8b302f8f18db48fee8320/libcst-1.8.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fd84d29376980b5b8b937bff9d79a08bc5ae1cf7f87218abe8b89837c0553fa0", size = 2391479, upload-time = "2025-06-10T16:47:55.617Z" },
-    { url = "https://files.pythonhosted.org/packages/49/c3/8d011a3d8cb3c4b80ed8eef61643760b26b0f88ca494947836a3b918ee6f/libcst-1.8.1-cp313-cp313-win_amd64.whl", hash = "sha256:6ab9b6e583342ce9c59a098b51865f436e641650a1db5b6e9e6fa03ba8fa8801", size = 2095493, upload-time = "2025-06-10T16:47:57.099Z" },
-    { url = "https://files.pythonhosted.org/packages/79/b2/a16940d40182ca4b330be692ac0b40778d17d628cc0e23c9cb8e1a6d0d02/libcst-1.8.1-cp313-cp313-win_arm64.whl", hash = "sha256:5f9d5c0154b4a55aca556e0703d4f008b76dea6f574df3bae20533b5da6be05e", size = 1984529, upload-time = "2025-06-10T16:47:58.551Z" },
-    { url = "https://files.pythonhosted.org/packages/db/45/30311b4bf8c3ff8980651704d0ff934f7f40d8785cc8df1eace42cade6a9/libcst-1.8.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e04f1dd7c5c513c6c880e402040848af300dc48a47101de6b3aaff86089f620f", size = 2175136, upload-time = "2025-06-10T16:48:00.07Z" },
-    { url = "https://files.pythonhosted.org/packages/18/f1/04403f954d96bc3883abe7ff52aec2153ce425f3e00a9a7458a85c1dd5a3/libcst-1.8.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c7b16f11defcdf4583c8041574937c1a0608b18915d1b215d2eec2d8700cc96a", size = 2064011, upload-time = "2025-06-10T16:48:01.565Z" },
-    { url = "https://files.pythonhosted.org/packages/52/8c/35240b0e121c3d678c88ee41c0f6e3bcde6751c1f64e15014957b8b55e84/libcst-1.8.1-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:de0b2cbd7823a246bd6dae08cff9cf9313f8445ee2dd8d57c0de50b08f655469", size = 2378441, upload-time = "2025-06-10T16:48:03.216Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/51/bd894b38a884a20d788e99a0c7e2f71a692c0114b4b1161b4b33329c6d80/libcst-1.8.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:357098cccd6bc7cbaf101096d4a51d96b8384f51dd017c614d99e15d7a00a452", size = 2208927, upload-time = "2025-06-10T16:48:05.083Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/f7/957a490e124eb54025c3b34a8e2a936851ae49f73047fbe8815212c697ea/libcst-1.8.1-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2e2573ac24030f359efbc79fad1e59fd7a1fd82682249b095ac7f4db53a92557", size = 2179715, upload-time = "2025-06-10T16:48:07.068Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/c0/fafae96e4e1e9f1c7c5e83d5117a3fc8adc47c3d94dd8b653fe3d6c81eb6/libcst-1.8.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ae4ba4e86576aef3af0be382f76eb92b8d9a5c5af228f48dac66bc670e3b1206", size = 2302728, upload-time = "2025-06-10T16:48:08.617Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/fa/60d99b357dd2ec1599849275da48917b083b7e6d99f593eb8356c256c38d/libcst-1.8.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:57a2b024830d9fe65ef17a3456efc75f70ddc5a76842a98ff45f3710b2f14aa8", size = 2271015, upload-time = "2025-06-10T16:48:10.11Z" },
-    { url = "https://files.pythonhosted.org/packages/56/0c/bc39ca37536335162ba81294ef968994b2571b30992df0699218397e3ca3/libcst-1.8.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ce501720337f386ac14b8ad845dcd751ac1d090f9a43854fe321a4500014ed4e", size = 2382498, upload-time = "2025-06-10T16:48:11.608Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/d7/bd4f4ec17d3a17c34f8a3426ee57b43448e50b3ecaadbc58a237d8075f19/libcst-1.8.1-cp313-cp313t-win_amd64.whl", hash = "sha256:cceca837ec5e53d0c6e70be079362b896dfca69bdb732ff696b1bf768de7b4ca", size = 2084192, upload-time = "2025-06-10T16:48:13.175Z" },
-    { url = "https://files.pythonhosted.org/packages/80/00/3e85ca329542af74112f1a10899a380191547eb8f0cfee94b574a904c93e/libcst-1.8.1-cp313-cp313t-win_arm64.whl", hash = "sha256:6b7a0abf9abe551cf5f5dd522985780a15c5d7d4bb3a16cefacc1694e8dded76", size = 1974194, upload-time = "2025-06-10T16:48:15.177Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/89/aa/b52d195b167958fe1bd106a260f64cc80ec384f6ac2a9cda874d8803df06/libcst-1.8.2.tar.gz", hash = "sha256:66e82cedba95a6176194a817be4232c720312f8be6d2c8f3847f3317d95a0c7f", size = 881534, upload-time = "2025-06-13T20:56:37.915Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/2d/8726bf8ea8252e8fd1e48980753eef5449622c5f6cf731102bc43dcdc2c6/libcst-1.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2e8c1dfa854e700fcf6cd79b2796aa37d55697a74646daf5ea47c7c764bac31c", size = 2185942, upload-time = "2025-06-13T20:55:26.105Z" },
+    { url = "https://files.pythonhosted.org/packages/99/b3/565d24db8daed66eae7653c1fc1bc97793d49d5d3bcef530450ee8da882c/libcst-1.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b5c57a3c1976c365678eb0730bcb140d40510990cb77df9a91bb5c41d587ba6", size = 2072622, upload-time = "2025-06-13T20:55:27.548Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d6/5a433e8a58eeb5c5d46635cfe958d0605f598d87977d4560484e3662d438/libcst-1.8.2-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:0f23409add2aaebbb6d8e881babab43c2d979f051b8bd8aed5fe779ea180a4e8", size = 2402738, upload-time = "2025-06-13T20:55:29.539Z" },
+    { url = "https://files.pythonhosted.org/packages/85/e4/0dd752c1880b570118fa91ac127589e6cf577ddcb2eef1aaf8b81ecc3f79/libcst-1.8.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b88e9104c456590ad0ef0e82851d4fc03e9aa9d621fa8fdd4cd0907152a825ae", size = 2219932, upload-time = "2025-06-13T20:55:31.17Z" },
+    { url = "https://files.pythonhosted.org/packages/42/bc/fceae243c6a329477ac6d4edb887bcaa2ae7a3686158d8d9b9abb3089c37/libcst-1.8.2-cp312-cp312-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5ba3ea570c8fb6fc44f71aa329edc7c668e2909311913123d0d7ab8c65fc357", size = 2191891, upload-time = "2025-06-13T20:55:33.066Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/7d/eb341bdc11f1147e7edeccffd0f2f785eff014e72134f5e46067472012b0/libcst-1.8.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:460fcf3562f078781e1504983cb11909eb27a1d46eaa99e65c4b0fafdc298298", size = 2311927, upload-time = "2025-06-13T20:55:34.614Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/78bfc7aa5a542574d2ab0768210d084901dec5fc373103ca119905408cf2/libcst-1.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c1381ddbd1066d543e05d580c15beacf671e1469a0b2adb6dba58fec311f4eed", size = 2281098, upload-time = "2025-06-13T20:55:36.089Z" },
+    { url = "https://files.pythonhosted.org/packages/83/37/a41788a72dc06ed3566606f7cf50349c9918cee846eeae45d1bac03d54c2/libcst-1.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a70e40ce7600e1b32e293bb9157e9de3b69170e2318ccb219102f1abb826c94a", size = 2387649, upload-time = "2025-06-13T20:55:37.797Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/df/7a49576c9fd55cdfd8bcfb725273aa4ee7dc41e87609f3451a4901d68057/libcst-1.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:3ece08ba778b6eeea74d9c705e9af2d1b4e915e9bc6de67ad173b962e575fcc0", size = 2094574, upload-time = "2025-06-13T20:55:39.833Z" },
+    { url = "https://files.pythonhosted.org/packages/29/60/27381e194d2af08bfd0fed090c905b2732907b69da48d97d86c056d70790/libcst-1.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:5efd1bf6ee5840d1b0b82ec8e0b9c64f182fa5a7c8aad680fbd918c4fa3826e0", size = 1984568, upload-time = "2025-06-13T20:55:41.511Z" },
+    { url = "https://files.pythonhosted.org/packages/11/9c/e3d4c7f1eb5c23907f905f84a4da271b60cd15b746ac794d42ea18bb105e/libcst-1.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08e9dca4ab6f8551794ce7ec146f86def6a82da41750cbed2c07551345fa10d3", size = 2185848, upload-time = "2025-06-13T20:55:43.653Z" },
+    { url = "https://files.pythonhosted.org/packages/59/e0/635cbb205d42fd296c01ab5cd1ba485b0aee92bffe061de587890c81f1bf/libcst-1.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8310521f2ccb79b5c4345750d475b88afa37bad930ab5554735f85ad5e3add30", size = 2072510, upload-time = "2025-06-13T20:55:45.287Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/45/8911cfe9413fd690a024a1ff2c8975f060dd721160178679d3f6a21f939e/libcst-1.8.2-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:da2d8b008aff72acd5a4a588491abdda1b446f17508e700f26df9be80d8442ae", size = 2403226, upload-time = "2025-06-13T20:55:46.927Z" },
+    { url = "https://files.pythonhosted.org/packages/38/83/819d2b1b1fd870ad34ce4f34ec68704ca69bf48ef2d7665483115f267ec4/libcst-1.8.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:be821d874ce8b26cbadd7277fa251a9b37f6d2326f8b5682b6fc8966b50a3a59", size = 2220669, upload-time = "2025-06-13T20:55:48.597Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/2f/2c4742bf834f88a9803095915c4f41cafefb7b04bde66ea86f74668b4b7b/libcst-1.8.2-cp313-cp313-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f74b0bc7378ad5afcf25ac9d0367b4dbba50f6f6468faa41f5dfddcf8bf9c0f8", size = 2191919, upload-time = "2025-06-13T20:55:50.092Z" },
+    { url = "https://files.pythonhosted.org/packages/64/f4/107e13815f1ee5aad642d4eb4671c0273ee737f3832e3dbca9603b39f8d9/libcst-1.8.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b68ea4a6018abfea1f68d50f74de7d399172684c264eb09809023e2c8696fc23", size = 2311965, upload-time = "2025-06-13T20:55:51.974Z" },
+    { url = "https://files.pythonhosted.org/packages/03/63/2948b6e4be367ad375d273a8ad00df573029cffe5ac8f6c09398c250de5b/libcst-1.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2e264307ec49b2c72480422abafe80457f90b4e6e693b7ddf8a23d24b5c24001", size = 2281704, upload-time = "2025-06-13T20:55:54.036Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/d3/590cde9c8c386d5f4f05fdef3394c437ea51060478a5141ff4a1f289e747/libcst-1.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5d5519962ce7c72d81888fb0c09e58e308ba4c376e76bcd853b48151063d6a8", size = 2387511, upload-time = "2025-06-13T20:55:55.538Z" },
+    { url = "https://files.pythonhosted.org/packages/96/3d/ba5e36c663028043fc607dc33e5c390c7f73136fb15a890fb3710ee9d158/libcst-1.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:b62aa11d6b74ed5545e58ac613d3f63095e5fd0254b3e0d1168fda991b9a6b41", size = 2094526, upload-time = "2025-06-13T20:55:57.486Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/34/530ca3b972dddad562f266c81190bea29376f8ba70054ea7b45b114504cd/libcst-1.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9c2bd4ac288a9cdb7ffc3229a9ce8027a66a3fd3f2ab9e13da60f5fbfe91f3b2", size = 1984627, upload-time = "2025-06-13T20:55:59.017Z" },
+    { url = "https://files.pythonhosted.org/packages/19/9f/491f7b8d9d93444cd9bf711156ee1f122c38d25b903599e363d669acc8ab/libcst-1.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:08a8c7d9922ca6eed24e2c13a3c552b3c186af8fc78e5d4820b58487d780ec19", size = 2175415, upload-time = "2025-06-13T20:56:01.157Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/fe/4d13437f453f92687246aa7c5138e102ee5186fe96609ee4c598bb9f9ecb/libcst-1.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bba7c2b5063e8ada5a5477f9fa0c01710645426b5a8628ec50d558542a0a292e", size = 2063719, upload-time = "2025-06-13T20:56:02.787Z" },
+    { url = "https://files.pythonhosted.org/packages/94/59/758ae142c6607f275269021362b731e0f22ff5c9aa7cc67b0ed3a6bc930f/libcst-1.8.2-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:d97c9fe13aacfbefded6861f5200dcb8e837da7391a9bdeb44ccb133705990af", size = 2380624, upload-time = "2025-06-13T20:56:04.909Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/c5/31d214a0bcb3523243a9b5643b597ff653d6ec9e1f3326cfcc16bcbf185d/libcst-1.8.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d2194ae959630aae4176a4b75bd320b3274c20bef2a5ca6b8d6fc96d3c608edf", size = 2208801, upload-time = "2025-06-13T20:56:06.983Z" },
+    { url = "https://files.pythonhosted.org/packages/70/16/a53f852322b266c63b492836a5c4968f192ee70fb52795a79feb4924e9ed/libcst-1.8.2-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0be639f5b2e1999a4b4a82a0f4633969f97336f052d0c131627983589af52f56", size = 2179557, upload-time = "2025-06-13T20:56:09.09Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/49/12a5664c73107187ba3af14869d3878fca1fd4c37f6fbb9adb943cb7a791/libcst-1.8.2-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6753e50904e05c27915933da41518ecd7a8ca4dd3602112ba44920c6e353a455", size = 2302499, upload-time = "2025-06-13T20:56:10.751Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/46/2d62552a9346a040c045d6619b645d59bb707a586318121f099abd0cd5c4/libcst-1.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:706d07106af91c343150be86caeae1ea3851b74aa0730fcbbf8cd089e817f818", size = 2271070, upload-time = "2025-06-13T20:56:12.445Z" },
+    { url = "https://files.pythonhosted.org/packages/af/67/b625fd6ae22575255aade0a24f45e1d430b7e7279729c9c51d4faac982d2/libcst-1.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd4310ea8ddc49cc8872e083737cf806299b17f93159a1f354d59aa08993e876", size = 2380767, upload-time = "2025-06-13T20:56:13.995Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/84/fb88f2ffdb045ff7323a6c05dd3d243a9eb3cb3517a6269dee43fbfb9990/libcst-1.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:51bbafdd847529e8a16d1965814ed17831af61452ee31943c414cb23451de926", size = 2083403, upload-time = "2025-06-13T20:56:15.959Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/8f/da755d6d517eb8ec9664afae967b00a9b8dd567bbbb350e261359c1b47fc/libcst-1.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:4f14f5045766646ed9e8826b959c6d07194788babed1e0ba08c94ea4f39517e3", size = 1974355, upload-time = "2025-06-13T20:56:18.064Z" },
 ]
 
 [[package]]
 name = "lightning"
-version = "2.5.1.post0"
+version = "2.5.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "fsspec", extra = ["http"] },
@@ -1769,9 +1782,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/97/d0/fb3c5077efdd74c28ea3a277fd80bbf03738d866013a8637691138bfebca/lightning-2.5.1.post0.tar.gz", hash = "sha256:fda1ac63c283b3b08a54be8d905dd88469cf09e9845d36dd28b699e78911cbc8", size = 631113, upload-time = "2025-04-25T20:24:25.054Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/42/3c/6a930ac7c64fb896adbe560a9141570732d9ca890a11e6d158edd5aece76/lightning-2.5.2.tar.gz", hash = "sha256:9550df613cfb22358ebf77b4a8ad45f3767cd7d26ba2d52b7f036bd3cdd701c4", size = 633391, upload-time = "2025-06-20T15:58:22.065Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9e/1b/67201d693a575e8a086831710f33e697fab66166223f792e459ef2b84934/lightning-2.5.1.post0-py3-none-any.whl", hash = "sha256:a228a52ca52f0c5006ff327c92b8942f09e1aea3f2d9b0d7c8a209edd5b9e71d", size = 819001, upload-time = "2025-04-25T20:24:21.212Z" },
+    { url = "https://files.pythonhosted.org/packages/71/a9/5d39280e55dc5df9e98be074029f6b48f86fe3db4929cb9ada6401234b47/lightning-2.5.2-py3-none-any.whl", hash = "sha256:7e7f23245e214c8ec14d5d8119d3856c25cfe96f9856296fd5df4e29c2ff88a7", size = 821145, upload-time = "2025-06-20T15:58:18.609Z" },
 ]
 
 [[package]]
@@ -1790,15 +1803,15 @@ wheels = [
 
 [[package]]
 name = "llguidance"
-version = "0.7.29"
+version = "0.7.30"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fb/3b/c3ced46dd10cffa49fad941e84118a1e8279cd3261769b2238eafc4df3c1/llguidance-0.7.29.tar.gz", hash = "sha256:d1aa68a54f9496d36750018e7edad3bf624ee2fbcf671a7483883790d798c4fe", size = 1041807, upload-time = "2025-06-06T01:32:41.578Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/bf/38/d1ef3ae08d8d857e5e0690c5b1e07bf7eb4a1cae5881d87215826dc6cadb/llguidance-0.7.30.tar.gz", hash = "sha256:e93bf75f2b6e48afb86a5cee23038746975e1654672bf5ba0ae75f7d4d4a2248", size = 1055528, upload-time = "2025-06-23T00:23:49.247Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/6e/dc3d372828ec5e90dfd7d9cf17edebc9fd2722b4a42d224d6ca068749843/llguidance-0.7.29-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:94a5ccbd86a70ae5e0a967c5d0e1ee6b0edf2d42f1023fdef0eca87f07ea9da4", size = 3279134, upload-time = "2025-06-06T01:32:39.677Z" },
-    { url = "https://files.pythonhosted.org/packages/63/de/f8945358d163f27d1370106e543d81cc94a197d94e4a613a5da42e264466/llguidance-0.7.29-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:47cedfba78f0e8e0f377439c4f2ff3734e0e09c87be3934fe93bb8996f21a6b9", size = 3173892, upload-time = "2025-06-06T01:32:37.834Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/f4/91342f63620ed1c75518f1cf807fb1d67a789d6357bce5fbdb75bb13a94a/llguidance-0.7.29-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d30a76b30b646ac7f9025d262665f62bdbf2d43698115eeb1119c6ee062a36f", size = 14986589, upload-time = "2025-06-06T01:32:29.283Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/c8/59504a4e9ba60243261708f345c85af9d5b4c46220334b575f6f744c4622/llguidance-0.7.29-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17fd439957d6ca5f459d0dec755a2d040c2dc946ed7e3c332b469ef6861292f8", size = 15045686, upload-time = "2025-06-06T01:32:35.269Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/b6/aa9dd5ac6215efd1071ab2af9547e7b777743e3e8ed48a1074f458042769/llguidance-0.7.29-cp39-abi3-win_amd64.whl", hash = "sha256:83e175212effb655f7e19b4c642b8d013a42b8f17e0baaf869c607a2fc5438f9", size = 2746679, upload-time = "2025-06-06T01:32:43.489Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/e1/694c89986fcae7777184fc8b22baa0976eba15a6847221763f6ad211fc1f/llguidance-0.7.30-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c80af02c118d2b0526bcecaab389af2ed094537a069b0fc724cd2a2f2ba3990f", size = 3327974, upload-time = "2025-06-23T00:23:47.556Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/77/ab7a548ae189dc23900fdd37803c115c2339b1223af9e8eb1f4329b5935a/llguidance-0.7.30-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:00a256d532911d2cf5ba4ef63e182944e767dd2402f38d63002016bc37755958", size = 3210709, upload-time = "2025-06-23T00:23:45.872Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/5b/6a166564b14f9f805f0ea01ec233a84f55789cb7eeffe1d6224ccd0e6cdd/llguidance-0.7.30-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:af8741c867e4bc7e42f7cdc68350c076b4edd0ca10ecefbde75f15a9f6bc25d0", size = 14867038, upload-time = "2025-06-23T00:23:39.571Z" },
+    { url = "https://files.pythonhosted.org/packages/af/80/5a40b9689f17612434b820854cba9b8cabd5142072c491b5280fe5f7a35e/llguidance-0.7.30-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9edc409b9decd6cffba5f5bf3b4fbd7541f95daa8cbc9510cbf96c6ab1ffc153", size = 15004926, upload-time = "2025-06-23T00:23:43.965Z" },
+    { url = "https://files.pythonhosted.org/packages/99/47/58e49a118b514855b245f8a962c6aaf9a5cc95a0f61eac7e230e691c7b7e/llguidance-0.7.30-cp39-abi3-win_amd64.whl", hash = "sha256:05234ecceea7c9c6ff13b9739112043173a3bcb88cae860249b20335a07b3075", size = 2796878, upload-time = "2025-06-23T00:23:51Z" },
 ]
 
 [[package]]
@@ -1878,11 +1891,11 @@ wheels = [
 
 [[package]]
 name = "markdown"
-version = "3.8"
+version = "3.8.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2f/15/222b423b0b88689c266d9eac4e61396fe2cc53464459d6a37618ac863b24/markdown-3.8.tar.gz", hash = "sha256:7df81e63f0df5c4b24b7d156eb81e4690595239b7d70937d0409f1b0de319c6f", size = 360906, upload-time = "2025-04-11T14:42:50.928Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/c2/4ab49206c17f75cb08d6311171f2d65798988db4360c4d1485bd0eedd67c/markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45", size = 362071, upload-time = "2025-06-19T17:12:44.483Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/3f/afe76f8e2246ffbc867440cbcf90525264df0e658f8a5ca1f872b3f6192a/markdown-3.8-py3-none-any.whl", hash = "sha256:794a929b79c5af141ef5ab0f2f642d0f7b1872981250230e72682346f7cc90dc", size = 106210, upload-time = "2025-04-11T14:42:49.178Z" },
+    { url = "https://files.pythonhosted.org/packages/96/2b/34cc11786bc00d0f04d0f5fdc3a2b1ae0b6239eef72d3d345805f9ad92a1/markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24", size = 106827, upload-time = "2025-06-19T17:12:42.994Z" },
 ]
 
 [[package]]
@@ -2049,7 +2062,7 @@ requires-dist = [
 
 [[package]]
 name = "mistral-common"
-version = "1.6.0"
+version = "1.6.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jsonschema" },
@@ -2061,9 +2074,9 @@ dependencies = [
     { name = "tiktoken" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7f/b0/1158f5828a781c9659a043b3ac66fe7168384c8f78425f9400b60e5fcd90/mistral_common-1.6.0.tar.gz", hash = "sha256:7abde886f9346a395e017a12c8f943eb9832f14c554b4128dfd61e96866f5af5", size = 57185, upload-time = "2025-06-09T15:06:45.202Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/ce/b82f260858f8971634b61c4fead2def5ad658ed5ed1c2f3dcadf198816c5/mistral_common-1.6.2.tar.gz", hash = "sha256:273605f0969cfaf1297af44c05c071f271fa193d28d83c43a1d7bfe08239a56e", size = 6298853, upload-time = "2025-06-12T15:20:06.396Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/19/0f/6c0b8c0b26ef2d87655a8276f1910185e848a6c38d168e74fbb7dd7813cf/mistral_common-1.6.0-py3-none-any.whl", hash = "sha256:0c999c9adcd4b46fd6d5ba9b2cd8b4eab9f4a78719de9c3eff47055989bafc3f", size = 48874, upload-time = "2025-06-09T15:06:43.539Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/e8/4841d38a3a5e8a06a2903f553367951013c867a94b42adf67bcf2401d9fc/mistral_common-1.6.2-py3-none-any.whl", hash = "sha256:9fd2f54907374f1dbd7cdfa12c9ddabad8d7a39da2d9ebd15d80ae2d2dab5312", size = 6490291, upload-time = "2025-06-12T15:20:02.326Z" },
 ]
 
 [package.optional-dependencies]
@@ -2104,32 +2117,30 @@ wheels = [
 
 [[package]]
 name = "msgpack"
-version = "1.1.0"
+version = "1.1.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/cb/d0/7555686ae7ff5731205df1012ede15dd9d927f6227ea151e901c7406af4f/msgpack-1.1.0.tar.gz", hash = "sha256:dd432ccc2c72b914e4cb77afce64aab761c1137cc698be3984eee260bcb2896e", size = 167260, upload-time = "2024-09-10T04:25:52.197Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e1/d6/716b7ca1dbde63290d2973d22bbef1b5032ca634c3ff4384a958ec3f093a/msgpack-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d46cf9e3705ea9485687aa4001a76e44748b609d260af21c4ceea7f2212a501d", size = 152421, upload-time = "2024-09-10T04:25:49.63Z" },
-    { url = "https://files.pythonhosted.org/packages/70/da/5312b067f6773429cec2f8f08b021c06af416bba340c912c2ec778539ed6/msgpack-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5dbad74103df937e1325cc4bfeaf57713be0b4f15e1c2da43ccdd836393e2ea2", size = 85277, upload-time = "2024-09-10T04:24:48.562Z" },
-    { url = "https://files.pythonhosted.org/packages/28/51/da7f3ae4462e8bb98af0d5bdf2707f1b8c65a0d4f496e46b6afb06cbc286/msgpack-1.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:58dfc47f8b102da61e8949708b3eafc3504509a5728f8b4ddef84bd9e16ad420", size = 82222, upload-time = "2024-09-10T04:25:36.49Z" },
-    { url = "https://files.pythonhosted.org/packages/33/af/dc95c4b2a49cff17ce47611ca9ba218198806cad7796c0b01d1e332c86bb/msgpack-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4676e5be1b472909b2ee6356ff425ebedf5142427842aa06b4dfd5117d1ca8a2", size = 392971, upload-time = "2024-09-10T04:24:58.129Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/54/65af8de681fa8255402c80eda2a501ba467921d5a7a028c9c22a2c2eedb5/msgpack-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17fb65dd0bec285907f68b15734a993ad3fc94332b5bb21b0435846228de1f39", size = 401403, upload-time = "2024-09-10T04:25:40.428Z" },
-    { url = "https://files.pythonhosted.org/packages/97/8c/e333690777bd33919ab7024269dc3c41c76ef5137b211d776fbb404bfead/msgpack-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a51abd48c6d8ac89e0cfd4fe177c61481aca2d5e7ba42044fd218cfd8ea9899f", size = 385356, upload-time = "2024-09-10T04:25:31.406Z" },
-    { url = "https://files.pythonhosted.org/packages/57/52/406795ba478dc1c890559dd4e89280fa86506608a28ccf3a72fbf45df9f5/msgpack-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2137773500afa5494a61b1208619e3871f75f27b03bcfca7b3a7023284140247", size = 383028, upload-time = "2024-09-10T04:25:17.08Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/69/053b6549bf90a3acadcd8232eae03e2fefc87f066a5b9fbb37e2e608859f/msgpack-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:398b713459fea610861c8a7b62a6fec1882759f308ae0795b5413ff6a160cf3c", size = 391100, upload-time = "2024-09-10T04:25:08.993Z" },
-    { url = "https://files.pythonhosted.org/packages/23/f0/d4101d4da054f04274995ddc4086c2715d9b93111eb9ed49686c0f7ccc8a/msgpack-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:06f5fd2f6bb2a7914922d935d3b8bb4a7fff3a9a91cfce6d06c13bc42bec975b", size = 394254, upload-time = "2024-09-10T04:25:06.048Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/12/cf07458f35d0d775ff3a2dc5559fa2e1fcd06c46f1ef510e594ebefdca01/msgpack-1.1.0-cp312-cp312-win32.whl", hash = "sha256:ad33e8400e4ec17ba782f7b9cf868977d867ed784a1f5f2ab46e7ba53b6e1e1b", size = 69085, upload-time = "2024-09-10T04:25:01.494Z" },
-    { url = "https://files.pythonhosted.org/packages/73/80/2708a4641f7d553a63bc934a3eb7214806b5b39d200133ca7f7afb0a53e8/msgpack-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:115a7af8ee9e8cddc10f87636767857e7e3717b7a2e97379dc2054712693e90f", size = 75347, upload-time = "2024-09-10T04:25:33.106Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/b0/380f5f639543a4ac413e969109978feb1f3c66e931068f91ab6ab0f8be00/msgpack-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:071603e2f0771c45ad9bc65719291c568d4edf120b44eb36324dcb02a13bfddf", size = 151142, upload-time = "2024-09-10T04:24:59.656Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/ee/be57e9702400a6cb2606883d55b05784fada898dfc7fd12608ab1fdb054e/msgpack-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0f92a83b84e7c0749e3f12821949d79485971f087604178026085f60ce109330", size = 84523, upload-time = "2024-09-10T04:25:37.924Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/3a/2919f63acca3c119565449681ad08a2f84b2171ddfcff1dba6959db2cceb/msgpack-1.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a1964df7b81285d00a84da4e70cb1383f2e665e0f1f2a7027e683956d04b734", size = 81556, upload-time = "2024-09-10T04:24:28.296Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/43/a11113d9e5c1498c145a8925768ea2d5fce7cbab15c99cda655aa09947ed/msgpack-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59caf6a4ed0d164055ccff8fe31eddc0ebc07cf7326a2aaa0dbf7a4001cd823e", size = 392105, upload-time = "2024-09-10T04:25:20.153Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/7b/2c1d74ca6c94f70a1add74a8393a0138172207dc5de6fc6269483519d048/msgpack-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0907e1a7119b337971a689153665764adc34e89175f9a34793307d9def08e6ca", size = 399979, upload-time = "2024-09-10T04:25:41.75Z" },
-    { url = "https://files.pythonhosted.org/packages/82/8c/cf64ae518c7b8efc763ca1f1348a96f0e37150061e777a8ea5430b413a74/msgpack-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65553c9b6da8166e819a6aa90ad15288599b340f91d18f60b2061f402b9a4915", size = 383816, upload-time = "2024-09-10T04:24:45.826Z" },
-    { url = "https://files.pythonhosted.org/packages/69/86/a847ef7a0f5ef3fa94ae20f52a4cacf596a4e4a010197fbcc27744eb9a83/msgpack-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7a946a8992941fea80ed4beae6bff74ffd7ee129a90b4dd5cf9c476a30e9708d", size = 380973, upload-time = "2024-09-10T04:25:04.689Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/90/c74cf6e1126faa93185d3b830ee97246ecc4fe12cf9d2d31318ee4246994/msgpack-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4b51405e36e075193bc051315dbf29168d6141ae2500ba8cd80a522964e31434", size = 387435, upload-time = "2024-09-10T04:24:17.879Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/40/631c238f1f338eb09f4acb0f34ab5862c4e9d7eda11c1b685471a4c5ea37/msgpack-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4c01941fd2ff87c2a934ee6055bda4ed353a7846b8d4f341c428109e9fcde8c", size = 399082, upload-time = "2024-09-10T04:25:18.398Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/1b/fa8a952be252a1555ed39f97c06778e3aeb9123aa4cccc0fd2acd0b4e315/msgpack-1.1.0-cp313-cp313-win32.whl", hash = "sha256:7c9a35ce2c2573bada929e0b7b3576de647b0defbd25f5139dcdaba0ae35a4cc", size = 69037, upload-time = "2024-09-10T04:24:52.798Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/bc/8bd826dd03e022153bfa1766dcdec4976d6c818865ed54223d71f07862b3/msgpack-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:bce7d9e614a04d0883af0b3d4d501171fbfca038f12c77fa838d9f198147a23f", size = 75140, upload-time = "2024-09-10T04:24:31.288Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/45/b1/ea4f68038a18c77c9467400d166d74c4ffa536f34761f7983a104357e614/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd", size = 173555, upload-time = "2025-06-13T06:52:51.324Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/26/389b9c593eda2b8551b2e7126ad3a06af6f9b44274eb3a4f054d48ff7e47/msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238", size = 82359, upload-time = "2025-06-13T06:52:03.909Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/65/7d1de38c8a22cf8b1551469159d4b6cf49be2126adc2482de50976084d78/msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157", size = 79172, upload-time = "2025-06-13T06:52:05.246Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/bd/cacf208b64d9577a62c74b677e1ada005caa9b69a05a599889d6fc2ab20a/msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce", size = 425013, upload-time = "2025-06-13T06:52:06.341Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/ec/fd869e2567cc9c01278a736cfd1697941ba0d4b81a43e0aa2e8d71dab208/msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a", size = 426905, upload-time = "2025-06-13T06:52:07.501Z" },
+    { url = "https://files.pythonhosted.org/packages/55/2a/35860f33229075bce803a5593d046d8b489d7ba2fc85701e714fc1aaf898/msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c", size = 407336, upload-time = "2025-06-13T06:52:09.047Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/16/69ed8f3ada150bf92745fb4921bd621fd2cdf5a42e25eb50bcc57a5328f0/msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b", size = 409485, upload-time = "2025-06-13T06:52:10.382Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/b6/0c398039e4c6d0b2e37c61d7e0e9d13439f91f780686deb8ee64ecf1ae71/msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef", size = 412182, upload-time = "2025-06-13T06:52:11.644Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/d0/0cf4a6ecb9bc960d624c93effaeaae75cbf00b3bc4a54f35c8507273cda1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a", size = 419883, upload-time = "2025-06-13T06:52:12.806Z" },
+    { url = "https://files.pythonhosted.org/packages/62/83/9697c211720fa71a2dfb632cad6196a8af3abea56eece220fde4674dc44b/msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c", size = 65406, upload-time = "2025-06-13T06:52:14.271Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/23/0abb886e80eab08f5e8c485d6f13924028602829f63b8f5fa25a06636628/msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4", size = 72558, upload-time = "2025-06-13T06:52:15.252Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/38/561f01cf3577430b59b340b51329803d3a5bf6a45864a55f4ef308ac11e3/msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0", size = 81677, upload-time = "2025-06-13T06:52:16.64Z" },
+    { url = "https://files.pythonhosted.org/packages/09/48/54a89579ea36b6ae0ee001cba8c61f776451fad3c9306cd80f5b5c55be87/msgpack-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8ddb2bcfd1a8b9e431c8d6f4f7db0773084e107730ecf3472f1dfe9ad583f3d9", size = 78603, upload-time = "2025-06-13T06:52:17.843Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/60/daba2699b308e95ae792cdc2ef092a38eb5ee422f9d2fbd4101526d8a210/msgpack-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:196a736f0526a03653d829d7d4c5500a97eea3648aebfd4b6743875f28aa2af8", size = 420504, upload-time = "2025-06-13T06:52:18.982Z" },
+    { url = "https://files.pythonhosted.org/packages/20/22/2ebae7ae43cd8f2debc35c631172ddf14e2a87ffcc04cf43ff9df9fff0d3/msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d592d06e3cc2f537ceeeb23d38799c6ad83255289bb84c2e5792e5a8dea268a", size = 423749, upload-time = "2025-06-13T06:52:20.211Z" },
+    { url = "https://files.pythonhosted.org/packages/40/1b/54c08dd5452427e1179a40b4b607e37e2664bca1c790c60c442c8e972e47/msgpack-1.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4df2311b0ce24f06ba253fda361f938dfecd7b961576f9be3f3fbd60e87130ac", size = 404458, upload-time = "2025-06-13T06:52:21.429Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/60/6bb17e9ffb080616a51f09928fdd5cac1353c9becc6c4a8abd4e57269a16/msgpack-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4141c5a32b5e37905b5940aacbc59739f036930367d7acce7a64e4dec1f5e0b", size = 405976, upload-time = "2025-06-13T06:52:22.995Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/97/88983e266572e8707c1f4b99c8fd04f9eb97b43f2db40e3172d87d8642db/msgpack-1.1.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b1ce7f41670c5a69e1389420436f41385b1aa2504c3b0c30620764b15dded2e7", size = 408607, upload-time = "2025-06-13T06:52:24.152Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/66/36c78af2efaffcc15a5a61ae0df53a1d025f2680122e2a9eb8442fed3ae4/msgpack-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4147151acabb9caed4e474c3344181e91ff7a388b888f1e19ea04f7e73dc7ad5", size = 424172, upload-time = "2025-06-13T06:52:25.704Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/87/a75eb622b555708fe0427fab96056d39d4c9892b0c784b3a721088c7ee37/msgpack-1.1.1-cp313-cp313-win32.whl", hash = "sha256:500e85823a27d6d9bba1d057c871b4210c1dd6fb01fbb764e37e4e8847376323", size = 65347, upload-time = "2025-06-13T06:52:26.846Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341, upload-time = "2025-06-13T06:52:27.835Z" },
 ]
 
 [[package]]
@@ -2156,62 +2167,59 @@ wheels = [
 
 [[package]]
 name = "multidict"
-version = "6.4.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/91/2f/a3470242707058fe856fe59241eee5635d79087100b7042a867368863a27/multidict-6.4.4.tar.gz", hash = "sha256:69ee9e6ba214b5245031b76233dd95408a0fd57fdb019ddcc1ead4790932a8e8", size = 90183, upload-time = "2025-05-19T14:16:37.381Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d2/b5/5675377da23d60875fe7dae6be841787755878e315e2f517235f22f59e18/multidict-6.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:dc388f75a1c00000824bf28b7633e40854f4127ede80512b44c3cfeeea1839a2", size = 64293, upload-time = "2025-05-19T14:14:44.724Z" },
-    { url = "https://files.pythonhosted.org/packages/34/a7/be384a482754bb8c95d2bbe91717bf7ccce6dc38c18569997a11f95aa554/multidict-6.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:98af87593a666f739d9dba5d0ae86e01b0e1a9cfcd2e30d2d361fbbbd1a9162d", size = 38096, upload-time = "2025-05-19T14:14:45.95Z" },
-    { url = "https://files.pythonhosted.org/packages/66/6d/d59854bb4352306145bdfd1704d210731c1bb2c890bfee31fb7bbc1c4c7f/multidict-6.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aff4cafea2d120327d55eadd6b7f1136a8e5a0ecf6fb3b6863e8aca32cd8e50a", size = 37214, upload-time = "2025-05-19T14:14:47.158Z" },
-    { url = "https://files.pythonhosted.org/packages/99/e0/c29d9d462d7cfc5fc8f9bf24f9c6843b40e953c0b55e04eba2ad2cf54fba/multidict-6.4.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:169c4ba7858176b797fe551d6e99040c531c775d2d57b31bcf4de6d7a669847f", size = 224686, upload-time = "2025-05-19T14:14:48.366Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/4a/da99398d7fd8210d9de068f9a1b5f96dfaf67d51e3f2521f17cba4ee1012/multidict-6.4.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b9eb4c59c54421a32b3273d4239865cb14ead53a606db066d7130ac80cc8ec93", size = 231061, upload-time = "2025-05-19T14:14:49.952Z" },
-    { url = "https://files.pythonhosted.org/packages/21/f5/ac11add39a0f447ac89353e6ca46666847051103649831c08a2800a14455/multidict-6.4.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7cf3bd54c56aa16fdb40028d545eaa8d051402b61533c21e84046e05513d5780", size = 232412, upload-time = "2025-05-19T14:14:51.812Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/11/4b551e2110cded705a3c13a1d4b6a11f73891eb5a1c449f1b2b6259e58a6/multidict-6.4.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f682c42003c7264134bfe886376299db4cc0c6cd06a3295b41b347044bcb5482", size = 231563, upload-time = "2025-05-19T14:14:53.262Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/02/751530c19e78fe73b24c3da66618eda0aa0d7f6e7aa512e46483de6be210/multidict-6.4.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a920f9cf2abdf6e493c519492d892c362007f113c94da4c239ae88429835bad1", size = 223811, upload-time = "2025-05-19T14:14:55.232Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/cb/2be8a214643056289e51ca356026c7b2ce7225373e7a1f8c8715efee8988/multidict-6.4.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:530d86827a2df6504526106b4c104ba19044594f8722d3e87714e847c74a0275", size = 216524, upload-time = "2025-05-19T14:14:57.226Z" },
-    { url = "https://files.pythonhosted.org/packages/19/f3/6d5011ec375c09081f5250af58de85f172bfcaafebff286d8089243c4bd4/multidict-6.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ecde56ea2439b96ed8a8d826b50c57364612ddac0438c39e473fafad7ae1c23b", size = 229012, upload-time = "2025-05-19T14:14:58.597Z" },
-    { url = "https://files.pythonhosted.org/packages/67/9c/ca510785df5cf0eaf5b2a8132d7d04c1ce058dcf2c16233e596ce37a7f8e/multidict-6.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:dc8c9736d8574b560634775ac0def6bdc1661fc63fa27ffdfc7264c565bcb4f2", size = 226765, upload-time = "2025-05-19T14:15:00.048Z" },
-    { url = "https://files.pythonhosted.org/packages/36/c8/ca86019994e92a0f11e642bda31265854e6ea7b235642f0477e8c2e25c1f/multidict-6.4.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7f3d3b3c34867579ea47cbd6c1f2ce23fbfd20a273b6f9e3177e256584f1eacc", size = 222888, upload-time = "2025-05-19T14:15:01.568Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/67/bc25a8e8bd522935379066950ec4e2277f9b236162a73548a2576d4b9587/multidict-6.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:87a728af265e08f96b6318ebe3c0f68b9335131f461efab2fc64cc84a44aa6ed", size = 234041, upload-time = "2025-05-19T14:15:03.759Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/a0/70c4c2d12857fccbe607b334b7ee28b6b5326c322ca8f73ee54e70d76484/multidict-6.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9f193eeda1857f8e8d3079a4abd258f42ef4a4bc87388452ed1e1c4d2b0c8740", size = 231046, upload-time = "2025-05-19T14:15:05.698Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/0f/52954601d02d39742aab01d6b92f53c1dd38b2392248154c50797b4df7f1/multidict-6.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:be06e73c06415199200e9a2324a11252a3d62030319919cde5e6950ffeccf72e", size = 227106, upload-time = "2025-05-19T14:15:07.124Z" },
-    { url = "https://files.pythonhosted.org/packages/af/24/679d83ec4379402d28721790dce818e5d6b9f94ce1323a556fb17fa9996c/multidict-6.4.4-cp312-cp312-win32.whl", hash = "sha256:622f26ea6a7e19b7c48dd9228071f571b2fbbd57a8cd71c061e848f281550e6b", size = 35351, upload-time = "2025-05-19T14:15:08.556Z" },
-    { url = "https://files.pythonhosted.org/packages/52/ef/40d98bc5f986f61565f9b345f102409534e29da86a6454eb6b7c00225a13/multidict-6.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:5e2bcda30d5009996ff439e02a9f2b5c3d64a20151d34898c000a6281faa3781", size = 38791, upload-time = "2025-05-19T14:15:09.825Z" },
-    { url = "https://files.pythonhosted.org/packages/df/2a/e166d2ffbf4b10131b2d5b0e458f7cee7d986661caceae0de8753042d4b2/multidict-6.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:82ffabefc8d84c2742ad19c37f02cde5ec2a1ee172d19944d380f920a340e4b9", size = 64123, upload-time = "2025-05-19T14:15:11.044Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/96/e200e379ae5b6f95cbae472e0199ea98913f03d8c9a709f42612a432932c/multidict-6.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6a2f58a66fe2c22615ad26156354005391e26a2f3721c3621504cd87c1ea87bf", size = 38049, upload-time = "2025-05-19T14:15:12.902Z" },
-    { url = "https://files.pythonhosted.org/packages/75/fb/47afd17b83f6a8c7fa863c6d23ac5ba6a0e6145ed8a6bcc8da20b2b2c1d2/multidict-6.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5883d6ee0fd9d8a48e9174df47540b7545909841ac82354c7ae4cbe9952603bd", size = 37078, upload-time = "2025-05-19T14:15:14.282Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/70/1af3143000eddfb19fd5ca5e78393985ed988ac493bb859800fe0914041f/multidict-6.4.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9abcf56a9511653fa1d052bfc55fbe53dbee8f34e68bd6a5a038731b0ca42d15", size = 224097, upload-time = "2025-05-19T14:15:15.566Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/39/d570c62b53d4fba844e0378ffbcd02ac25ca423d3235047013ba2f6f60f8/multidict-6.4.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6ed5ae5605d4ad5a049fad2a28bb7193400700ce2f4ae484ab702d1e3749c3f9", size = 230768, upload-time = "2025-05-19T14:15:17.308Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/f8/ed88f2c4d06f752b015933055eb291d9bc184936903752c66f68fb3c95a7/multidict-6.4.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bbfcb60396f9bcfa63e017a180c3105b8c123a63e9d1428a36544e7d37ca9e20", size = 231331, upload-time = "2025-05-19T14:15:18.73Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/6f/8e07cffa32f483ab887b0d56bbd8747ac2c1acd00dc0af6fcf265f4a121e/multidict-6.4.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0f1987787f5f1e2076b59692352ab29a955b09ccc433c1f6b8e8e18666f608b", size = 230169, upload-time = "2025-05-19T14:15:20.179Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/2b/5dcf173be15e42f330110875a2668ddfc208afc4229097312212dc9c1236/multidict-6.4.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d0121ccce8c812047d8d43d691a1ad7641f72c4f730474878a5aeae1b8ead8c", size = 222947, upload-time = "2025-05-19T14:15:21.714Z" },
-    { url = "https://files.pythonhosted.org/packages/39/75/4ddcbcebe5ebcd6faa770b629260d15840a5fc07ce8ad295a32e14993726/multidict-6.4.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83ec4967114295b8afd120a8eec579920c882831a3e4c3331d591a8e5bfbbc0f", size = 215761, upload-time = "2025-05-19T14:15:23.242Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/c9/55e998ae45ff15c5608e384206aa71a11e1b7f48b64d166db400b14a3433/multidict-6.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:995f985e2e268deaf17867801b859a282e0448633f1310e3704b30616d269d69", size = 227605, upload-time = "2025-05-19T14:15:24.763Z" },
-    { url = "https://files.pythonhosted.org/packages/04/49/c2404eac74497503c77071bd2e6f88c7e94092b8a07601536b8dbe99be50/multidict-6.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d832c608f94b9f92a0ec8b7e949be7792a642b6e535fcf32f3e28fab69eeb046", size = 226144, upload-time = "2025-05-19T14:15:26.249Z" },
-    { url = "https://files.pythonhosted.org/packages/62/c5/0cd0c3c6f18864c40846aa2252cd69d308699cb163e1c0d989ca301684da/multidict-6.4.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d21c1212171cf7da703c5b0b7a0e85be23b720818aef502ad187d627316d5645", size = 221100, upload-time = "2025-05-19T14:15:28.303Z" },
-    { url = "https://files.pythonhosted.org/packages/71/7b/f2f3887bea71739a046d601ef10e689528d4f911d84da873b6be9194ffea/multidict-6.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:cbebaa076aaecad3d4bb4c008ecc73b09274c952cf6a1b78ccfd689e51f5a5b0", size = 232731, upload-time = "2025-05-19T14:15:30.263Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/b3/d9de808349df97fa75ec1372758701b5800ebad3c46ae377ad63058fbcc6/multidict-6.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c93a6fb06cc8e5d3628b2b5fda215a5db01e8f08fc15fadd65662d9b857acbe4", size = 229637, upload-time = "2025-05-19T14:15:33.337Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/57/13207c16b615eb4f1745b44806a96026ef8e1b694008a58226c2d8f5f0a5/multidict-6.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8cd8f81f1310182362fb0c7898145ea9c9b08a71081c5963b40ee3e3cac589b1", size = 225594, upload-time = "2025-05-19T14:15:34.832Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/e4/d23bec2f70221604f5565000632c305fc8f25ba953e8ce2d8a18842b9841/multidict-6.4.4-cp313-cp313-win32.whl", hash = "sha256:3e9f1cd61a0ab857154205fb0b1f3d3ace88d27ebd1409ab7af5096e409614cd", size = 35359, upload-time = "2025-05-19T14:15:36.246Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/7a/cfe1a47632be861b627f46f642c1d031704cc1c0f5c0efbde2ad44aa34bd/multidict-6.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:8ffb40b74400e4455785c2fa37eba434269149ec525fc8329858c862e4b35373", size = 38903, upload-time = "2025-05-19T14:15:37.507Z" },
-    { url = "https://files.pythonhosted.org/packages/68/7b/15c259b0ab49938a0a1c8f3188572802704a779ddb294edc1b2a72252e7c/multidict-6.4.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:6a602151dbf177be2450ef38966f4be3467d41a86c6a845070d12e17c858a156", size = 68895, upload-time = "2025-05-19T14:15:38.856Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/7d/168b5b822bccd88142e0a3ce985858fea612404edd228698f5af691020c9/multidict-6.4.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0d2b9712211b860d123815a80b859075d86a4d54787e247d7fbee9db6832cf1c", size = 40183, upload-time = "2025-05-19T14:15:40.197Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/b7/d4b8d98eb850ef28a4922ba508c31d90715fd9b9da3801a30cea2967130b/multidict-6.4.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d2fa86af59f8fc1972e121ade052145f6da22758f6996a197d69bb52f8204e7e", size = 39592, upload-time = "2025-05-19T14:15:41.508Z" },
-    { url = "https://files.pythonhosted.org/packages/18/28/a554678898a19583548e742080cf55d169733baf57efc48c2f0273a08583/multidict-6.4.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50855d03e9e4d66eab6947ba688ffb714616f985838077bc4b490e769e48da51", size = 226071, upload-time = "2025-05-19T14:15:42.877Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/dc/7ba6c789d05c310e294f85329efac1bf5b450338d2542498db1491a264df/multidict-6.4.4-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:5bce06b83be23225be1905dcdb6b789064fae92499fbc458f59a8c0e68718601", size = 222597, upload-time = "2025-05-19T14:15:44.412Z" },
-    { url = "https://files.pythonhosted.org/packages/24/4f/34eadbbf401b03768dba439be0fb94b0d187facae9142821a3d5599ccb3b/multidict-6.4.4-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66ed0731f8e5dfd8369a883b6e564aca085fb9289aacabd9decd70568b9a30de", size = 228253, upload-time = "2025-05-19T14:15:46.474Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/e6/493225a3cdb0d8d80d43a94503fc313536a07dae54a3f030d279e629a2bc/multidict-6.4.4-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:329ae97fc2f56f44d91bc47fe0972b1f52d21c4b7a2ac97040da02577e2daca2", size = 226146, upload-time = "2025-05-19T14:15:48.003Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/70/e411a7254dc3bff6f7e6e004303b1b0591358e9f0b7c08639941e0de8bd6/multidict-6.4.4-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c27e5dcf520923d6474d98b96749e6805f7677e93aaaf62656005b8643f907ab", size = 220585, upload-time = "2025-05-19T14:15:49.546Z" },
-    { url = "https://files.pythonhosted.org/packages/08/8f/beb3ae7406a619100d2b1fb0022c3bb55a8225ab53c5663648ba50dfcd56/multidict-6.4.4-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:058cc59b9e9b143cc56715e59e22941a5d868c322242278d28123a5d09cdf6b0", size = 212080, upload-time = "2025-05-19T14:15:51.151Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/ec/355124e9d3d01cf8edb072fd14947220f357e1c5bc79c88dff89297e9342/multidict-6.4.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:69133376bc9a03f8c47343d33f91f74a99c339e8b58cea90433d8e24bb298031", size = 226558, upload-time = "2025-05-19T14:15:52.665Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/22/d2b95cbebbc2ada3be3812ea9287dcc9712d7f1a012fad041770afddb2ad/multidict-6.4.4-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:d6b15c55721b1b115c5ba178c77104123745b1417527ad9641a4c5e2047450f0", size = 212168, upload-time = "2025-05-19T14:15:55.279Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/c5/62bfc0b2f9ce88326dbe7179f9824a939c6c7775b23b95de777267b9725c/multidict-6.4.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:a887b77f51d3d41e6e1a63cf3bc7ddf24de5939d9ff69441387dfefa58ac2e26", size = 217970, upload-time = "2025-05-19T14:15:56.806Z" },
-    { url = "https://files.pythonhosted.org/packages/79/74/977cea1aadc43ff1c75d23bd5bc4768a8fac98c14e5878d6ee8d6bab743c/multidict-6.4.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:632a3bf8f1787f7ef7d3c2f68a7bde5be2f702906f8b5842ad6da9d974d0aab3", size = 226980, upload-time = "2025-05-19T14:15:58.313Z" },
-    { url = "https://files.pythonhosted.org/packages/48/fc/cc4a1a2049df2eb84006607dc428ff237af38e0fcecfdb8a29ca47b1566c/multidict-6.4.4-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:a145c550900deb7540973c5cdb183b0d24bed6b80bf7bddf33ed8f569082535e", size = 220641, upload-time = "2025-05-19T14:15:59.866Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/6a/a7444d113ab918701988d4abdde373dbdfd2def7bd647207e2bf645c7eac/multidict-6.4.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cc5d83c6619ca5c9672cb78b39ed8542f1975a803dee2cda114ff73cbb076edd", size = 221728, upload-time = "2025-05-19T14:16:01.535Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/b0/fdf4c73ad1c55e0f4dbbf2aa59dd37037334091f9a4961646d2b7ac91a86/multidict-6.4.4-cp313-cp313t-win32.whl", hash = "sha256:3312f63261b9df49be9d57aaa6abf53a6ad96d93b24f9cc16cf979956355ce6e", size = 41913, upload-time = "2025-05-19T14:16:03.199Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/92/27989ecca97e542c0d01d05a98a5ae12198a243a9ee12563a0313291511f/multidict-6.4.4-cp313-cp313t-win_amd64.whl", hash = "sha256:ba852168d814b2c73333073e1c7116d9395bea69575a01b0b3c89d2d5a87c8fb", size = 46112, upload-time = "2025-05-19T14:16:04.909Z" },
-    { url = "https://files.pythonhosted.org/packages/84/5d/e17845bb0fa76334477d5de38654d27946d5b5d3695443987a094a71b440/multidict-6.4.4-py3-none-any.whl", hash = "sha256:bd4557071b561a8b3b6075c3ce93cf9bfb6182cb241805c3d66ced3b75eff4ac", size = 10481, upload-time = "2025-05-19T14:16:36.024Z" },
+version = "6.5.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/43/2d90c414d9efc4587d6e7cebae9f2c2d8001bcb4f89ed514ae837e9dcbe6/multidict-6.5.1.tar.gz", hash = "sha256:a835ea8103f4723915d7d621529c80ef48db48ae0c818afcabe0f95aa1febc3a", size = 98690, upload-time = "2025-06-24T22:16:05.117Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/36/225fb9b890607d740f61957febf622f5c9cd9e641a93502c7877934d57ef/multidict-6.5.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:48f95fe064f63d9601ef7a3dce2fc2a437d5fcc11bca960bc8be720330b13b6a", size = 74287, upload-time = "2025-06-24T22:14:29.456Z" },
+    { url = "https://files.pythonhosted.org/packages/70/e5/c9eabb16ecf77275664413263527ab169e08371dfa6b168025d8f67261fd/multidict-6.5.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b7b6e1ce9b61f721417c68eeeb37599b769f3b631e6b25c21f50f8f619420b9", size = 44092, upload-time = "2025-06-24T22:14:30.686Z" },
+    { url = "https://files.pythonhosted.org/packages/df/0b/dd9322a432c477a2e6d089bbb53acb68ed25515b8292dbc60f27e7e45d70/multidict-6.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8b83b055889bda09fc866c0a652cdb6c36eeeafc2858259c9a7171fe82df5773", size = 42565, upload-time = "2025-06-24T22:14:31.8Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/ac/22f5b4e55a4bc99f9622de280f7da366c1d7f29ec4eec9d339cb2ba62019/multidict-6.5.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7bd4d655dc460c7aebb73b58ed1c074e85f7286105b012556cf0f25c6d1dba3", size = 254896, upload-time = "2025-06-24T22:14:32.865Z" },
+    { url = "https://files.pythonhosted.org/packages/09/dc/2f6d96d4a80ec731579cb69532fac33cbbda2a838079ae0c47c6e8f5545b/multidict-6.5.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:aa6dcf25ced31cdce10f004506dbc26129f28a911b32ed10e54453a0842a6173", size = 236854, upload-time = "2025-06-24T22:14:34.185Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/cb/ef38a69ee75e8b72e5cff9ed4cff92379eadd057a99eaf4893494bf6ab64/multidict-6.5.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:059fb556c3e6ce1a168496f92ef139ad839a47f898eaa512b1d43e5e05d78c6b", size = 265131, upload-time = "2025-06-24T22:14:35.534Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/9e/85d9fe9e658e0edf566c02181248fa2aaf5e53134df0c80f7231ce5fc689/multidict-6.5.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f97680c839dd9fa208e9584b1c2a5f1224bd01d31961f7f7d94984408c4a6b9e", size = 262187, upload-time = "2025-06-24T22:14:36.891Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/1c/b46ec1dd78c3faa55bffb354410c48fadd81029a144cd056828c82ca15b4/multidict-6.5.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7710c716243525cc05cd038c6e09f1807ee0fef2510a6e484450712c389c8d7f", size = 251220, upload-time = "2025-06-24T22:14:38.584Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/6b/481ec5179ddc7da8b05077ebae2dd51da3df3ae3e5842020fbfa939167c1/multidict-6.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:83eb172b4856ffff2814bdcf9c7792c0439302faab1b31376817b067b26cd8f5", size = 249949, upload-time = "2025-06-24T22:14:40.033Z" },
+    { url = "https://files.pythonhosted.org/packages/00/e3/642f63e12c1b8e6662c23626a98e9d764fe5a63c3a6cb59002f6fdcb920f/multidict-6.5.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:562d4714fa43f6ebc043a657535e4575e7d6141a818c9b3055f0868d29a1a41b", size = 244438, upload-time = "2025-06-24T22:14:41.464Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/cf/797397f6d38b011912504aef213a4be43ef4ec134859caa47f94d810bad8/multidict-6.5.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:2d7def2fc47695c46a427b8f298fb5ace03d635c1fb17f30d6192c9a8fb69e70", size = 259921, upload-time = "2025-06-24T22:14:43.248Z" },
+    { url = "https://files.pythonhosted.org/packages/82/b2/ae914a2d84eba21e956fa3727060248ca23ed4a5bf1beb057df0d10f9de3/multidict-6.5.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:77bc8ab5c6bfe696eff564824e73a451fdeca22f3b960261750836cee02bcbfa", size = 252691, upload-time = "2025-06-24T22:14:45.57Z" },
+    { url = "https://files.pythonhosted.org/packages/01/fa/1ab4d79a236b871cfd40d36a1f9942906c630bd2b7822287bd3927addb62/multidict-6.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9eec51891d3c210948ead894ec1483d48748abec08db5ce9af52cc13fef37aee", size = 246224, upload-time = "2025-06-24T22:14:47.316Z" },
+    { url = "https://files.pythonhosted.org/packages/78/dd/bf002fe04e952db73cad8ce10a5b5347358d0d17221aef156e050aff690b/multidict-6.5.1-cp312-cp312-win32.whl", hash = "sha256:189f0c2bd1c0ae5509e453707d0e187e030c9e873a0116d1f32d1c870d0fc347", size = 41354, upload-time = "2025-06-24T22:14:48.567Z" },
+    { url = "https://files.pythonhosted.org/packages/95/ce/508a8487d98fdc3e693755bc19c543a2af293f5ce96da398bd1974efb802/multidict-6.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:e81f23b4b6f2a588f15d5cb554b2d8b482bb6044223d64b86bc7079cae9ebaad", size = 45072, upload-time = "2025-06-24T22:14:50.898Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/da/4782cf2f274d0d56fff6c07fc5cc5a14acf821dec08350c17d66d0207a05/multidict-6.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:79d13e06d5241f9c8479dfeaf0f7cce8f453a4a302c9a0b1fa9b1a6869ff7757", size = 42149, upload-time = "2025-06-24T22:14:53.138Z" },
+    { url = "https://files.pythonhosted.org/packages/19/3f/c2e07031111d2513d260157933a8697ad52a935d8a2a2b8b7b317ddd9a96/multidict-6.5.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:98011312f36d1e496f15454a95578d1212bc2ffc25650a8484752b06d304fd9b", size = 73588, upload-time = "2025-06-24T22:14:54.332Z" },
+    { url = "https://files.pythonhosted.org/packages/95/bb/f47aa21827202a9f889fd66de9a1db33d0e4bbaaa2567156e4efb3cc0e5e/multidict-6.5.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bae589fb902b47bd94e6f539b34eefe55a1736099f616f614ec1544a43f95b05", size = 43756, upload-time = "2025-06-24T22:14:55.748Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ec/24549de092c9b0bc3167e0beb31a11be58e8595dbcfed2b7821795bb3923/multidict-6.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6eb3bf26cd94eb306e4bc776d0964cc67a7967e4ad9299309f0ff5beec3c62be", size = 42222, upload-time = "2025-06-24T22:14:57.418Z" },
+    { url = "https://files.pythonhosted.org/packages/13/45/54452027ebc0ba660667aab67ae11afb9aaba91f4b5d63cddef045279d94/multidict-6.5.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e5e1a5a99c72d1531501406fcc06b6bf699ebd079dacd6807bb43fc0ff260e5c", size = 253014, upload-time = "2025-06-24T22:14:58.738Z" },
+    { url = "https://files.pythonhosted.org/packages/97/3c/76e7b4c0ce3a8bb43efca679674fba421333fbc8429134072db80e13dcb8/multidict-6.5.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:38755bcba18720cb2338bea23a5afcff234445ee75fa11518f6130e22f2ab970", size = 235939, upload-time = "2025-06-24T22:15:00.138Z" },
+    { url = "https://files.pythonhosted.org/packages/86/ce/48e3123a9af61ff2f60e3764b0b15cf4fca22b1299aac281252ac3a590d6/multidict-6.5.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f42fef9bcba3c32fd4e4a23c5757fc807d218b249573aaffa8634879f95feb73", size = 262940, upload-time = "2025-06-24T22:15:01.52Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/ab/bccd739faf87051b55df619a0967c8545b4d4a4b90258c5f564ab1752f15/multidict-6.5.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:071b962f4cc87469cda90c7cc1c077b76496878b39851d7417a3d994e27fe2c6", size = 260652, upload-time = "2025-06-24T22:15:02.988Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/9c/01f654aad28a5d0d74f2678c1541ae15e711f99603fd84c780078205966e/multidict-6.5.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:627ba4b7ce7c0115981f0fd91921f5d101dfb9972622178aeef84ccce1c2bbf3", size = 250011, upload-time = "2025-06-24T22:15:04.317Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/bc/edf08906e1db7385c6bf36e4179957307f50c44a889493e9b251255be79c/multidict-6.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:05dcaed3e5e54f0d0f99a39762b0195274b75016cbf246f600900305581cf1a2", size = 248242, upload-time = "2025-06-24T22:15:06.035Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/c3/1ad054b88b889fda8b62ea9634ac7082567e8dc42b9b794a2c565ef102ab/multidict-6.5.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:11f5ecf3e741a18c578d118ad257c5588ca33cc7c46d51c0487d7ae76f072c32", size = 244683, upload-time = "2025-06-24T22:15:07.731Z" },
+    { url = "https://files.pythonhosted.org/packages/57/63/119a76b2095e1bb765816175cafeac7b520f564691abef2572fb80f4f246/multidict-6.5.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b948eb625411c20b15088fca862c51a39140b9cf7875b5fb47a72bb249fa2f42", size = 257626, upload-time = "2025-06-24T22:15:09.013Z" },
+    { url = "https://files.pythonhosted.org/packages/26/a9/b91a76af5ff49bd088ee76d11eb6134227f5ea50bcd5f6738443b2fe8e05/multidict-6.5.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fc993a96dfc8300befd03d03df46efdb1d8d5a46911b014e956a4443035f470d", size = 251077, upload-time = "2025-06-24T22:15:10.366Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/fe/b1dc57aaa4de9f5a27543e28bd1f8bff00a316888b7344b5d33258b14b0a/multidict-6.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2d333380f22d35a56c6461f4579cfe186e143cd0b010b9524ac027de2a34cd", size = 244715, upload-time = "2025-06-24T22:15:11.76Z" },
+    { url = "https://files.pythonhosted.org/packages/51/55/47a82690f71d0141eea49a623bbcc00a4d28770efc7cba8ead75602c9b90/multidict-6.5.1-cp313-cp313-win32.whl", hash = "sha256:5891e3327e6a426ddd443c87339b967c84feb8c022dd425e0c025fa0fcd71e68", size = 41156, upload-time = "2025-06-24T22:15:13.139Z" },
+    { url = "https://files.pythonhosted.org/packages/25/b3/43306e4d7d3a9898574d1dc156b9607540dad581b1d767c992030751b82d/multidict-6.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:fcdaa72261bff25fad93e7cb9bd7112bd4bac209148e698e380426489d8ed8a9", size = 44933, upload-time = "2025-06-24T22:15:14.639Z" },
+    { url = "https://files.pythonhosted.org/packages/30/e2/34cb83c8a4e01b28e2abf30dc90178aa63c9db042be22fa02472cb744b86/multidict-6.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:84292145303f354a35558e601c665cdf87059d87b12777417e2e57ba3eb98903", size = 41967, upload-time = "2025-06-24T22:15:15.856Z" },
+    { url = "https://files.pythonhosted.org/packages/64/08/17d2de9cf749ea9589ecfb7532ab4988e8b113b7624826dba6b7527a58f3/multidict-6.5.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f8316e58db799a1972afbc46770dfaaf20b0847003ab80de6fcb9861194faa3f", size = 80513, upload-time = "2025-06-24T22:15:16.946Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/b9/c9392465a21f7dff164633348b4cf66eef55c4ee48bdcdc00f0a71792779/multidict-6.5.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d3468f0db187aca59eb56e0aa9f7c8c5427bcb844ad1c86557b4886aeb4484d8", size = 46854, upload-time = "2025-06-24T22:15:18.116Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/24/d79cbed5d0573304bc907dff0e5ad8788a4de891eec832809812b319930e/multidict-6.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:228533a5f99f1248cd79f6470779c424d63bc3e10d47c82511c65cc294458445", size = 45724, upload-time = "2025-06-24T22:15:19.241Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/22/232be6c077183719c78131f0e3c3d7134eb2d839e6e50e1c1e69e5ef5965/multidict-6.5.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:527076fdf5854901b1246c589af9a8a18b4a308375acb0020b585f696a10c794", size = 251895, upload-time = "2025-06-24T22:15:20.564Z" },
+    { url = "https://files.pythonhosted.org/packages/57/80/85985e1441864b946e79538355b7b47f36206bf6bbaa2fa6d74d8232f2ab/multidict-6.5.1-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9a17a17bad5c22f43e6a6b285dd9c16b1e8f8428202cd9bc22adaac68d0bbfed", size = 229357, upload-time = "2025-06-24T22:15:21.949Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/14/0024d1428b05aedaeea211da232aa6b6ad5c556a8a38b0942df1e54e1fa5/multidict-6.5.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:efd1951edab4a6cb65108d411867811f2b283f4b972337fb4269e40142f7f6a6", size = 259262, upload-time = "2025-06-24T22:15:23.455Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/cc/3fe63d61ffc9a48d62f36249e228e330144d990ac01f61169b615a3be471/multidict-6.5.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c07d5f38b39acb4f8f61a7aa4166d140ed628245ff0441630df15340532e3b3c", size = 257998, upload-time = "2025-06-24T22:15:24.907Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/e4/46b38b9a565ccc5d86f55787090670582d51ab0a0d37cfeaf4313b053f7b/multidict-6.5.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a6605dc74cd333be279e1fcb568ea24f7bdf1cf09f83a77360ce4dd32d67f14", size = 247951, upload-time = "2025-06-24T22:15:26.274Z" },
+    { url = "https://files.pythonhosted.org/packages/af/78/58a9bc0674401f1f26418cd58a5ebf35ce91ead76a22b578908acfe0f4e2/multidict-6.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d64e30ae9ba66ce303a567548a06d64455d97c5dff7052fe428d154274d7174", size = 246786, upload-time = "2025-06-24T22:15:27.695Z" },
+    { url = "https://files.pythonhosted.org/packages/66/24/51142ccee295992e22881cccc54b291308423bbcc836fcf4d2edef1a88d0/multidict-6.5.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2fb5dde79a7f6d98ac5e26a4c9de77ccd2c5224a7ce89aeac6d99df7bbe06464", size = 235030, upload-time = "2025-06-24T22:15:29.391Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/9a/a6f7b75460d3e35b16bf7745c9e3ebb3293324a4295e586563bf50d361f4/multidict-6.5.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:8a0d22e8b07cf620e9aeb1582340d00f0031e6a1f3e39d9c2dcbefa8691443b4", size = 253964, upload-time = "2025-06-24T22:15:31.689Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/f8/0b690674bf8f78604eb0a2b0a85d1380ff3003f270440d40def2a3de8cf4/multidict-6.5.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0120ed5cff2082c7a0ed62a8f80f4f6ac266010c722381816462f279bfa19487", size = 247370, upload-time = "2025-06-24T22:15:33.114Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/7d/ca55049d1041c517f294c1755c786539cb7a8dc5033361f20ce3a3d817be/multidict-6.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3dea06ba27401c4b54317aa04791182dc9295e7aa623732dd459071a0e0f65db", size = 242920, upload-time = "2025-06-24T22:15:34.669Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/65/f4afa14f0921751864bb3ef80267f15ecae423483e8da9bc5d3757632bfa/multidict-6.5.1-cp313-cp313t-win32.whl", hash = "sha256:93b21be44f3cfee3be68ed5cd8848a3c0420d76dbd12d74f7776bde6b29e5f33", size = 46968, upload-time = "2025-06-24T22:15:36.023Z" },
+    { url = "https://files.pythonhosted.org/packages/00/0a/13d08be1ca1523df515fb4efd3cf10f153e62d533f55c53f543cd73041e8/multidict-6.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c5c18f8646a520cc34d00f65f9f6f77782b8a8c59fd8de10713e0de7f470b5d0", size = 52353, upload-time = "2025-06-24T22:15:37.247Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/dd/84aaf725b236677597a9570d8c1c99af0ba03712149852347969e014d826/multidict-6.5.1-cp313-cp313t-win_arm64.whl", hash = "sha256:eb27128141474a1d545f0531b496c7c2f1c4beff50cb5a828f36eb62fef16c67", size = 44500, upload-time = "2025-06-24T22:15:38.445Z" },
+    { url = "https://files.pythonhosted.org/packages/07/9f/d4719ce55a1d8bf6619e8bb92f1e2e7399026ea85ae0c324ec77ee06c050/multidict-6.5.1-py3-none-any.whl", hash = "sha256:895354f4a38f53a1df2cc3fa2223fa714cff2b079a9f018a76cad35e7f0f044c", size = 12185, upload-time = "2025-06-24T22:16:03.816Z" },
 ]
 
 [[package]]
@@ -2232,28 +2240,28 @@ wheels = [
 
 [[package]]
 name = "mypy"
-version = "1.16.0"
+version = "1.16.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "mypy-extensions" },
     { name = "pathspec" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d4/38/13c2f1abae94d5ea0354e146b95a1be9b2137a0d506728e0da037c4276f6/mypy-1.16.0.tar.gz", hash = "sha256:84b94283f817e2aa6350a14b4a8fb2a35a53c286f97c9d30f53b63620e7af8ab", size = 3323139, upload-time = "2025-05-29T13:46:12.532Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/81/69/92c7fa98112e4d9eb075a239caa4ef4649ad7d441545ccffbd5e34607cbb/mypy-1.16.1.tar.gz", hash = "sha256:6bd00a0a2094841c5e47e7374bb42b83d64c527a502e3334e1173a0c24437bab", size = 3324747, upload-time = "2025-06-16T16:51:35.145Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/cf/158e5055e60ca2be23aec54a3010f89dcffd788732634b344fc9cb1e85a0/mypy-1.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c5436d11e89a3ad16ce8afe752f0f373ae9620841c50883dc96f8b8805620b13", size = 11062927, upload-time = "2025-05-29T13:35:52.328Z" },
-    { url = "https://files.pythonhosted.org/packages/94/34/cfff7a56be1609f5d10ef386342ce3494158e4d506516890142007e6472c/mypy-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f2622af30bf01d8fc36466231bdd203d120d7a599a6d88fb22bdcb9dbff84090", size = 10083082, upload-time = "2025-05-29T13:35:33.378Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/7f/7242062ec6288c33d8ad89574df87c3903d394870e5e6ba1699317a65075/mypy-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d045d33c284e10a038f5e29faca055b90eee87da3fc63b8889085744ebabb5a1", size = 11828306, upload-time = "2025-05-29T13:21:02.164Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/5f/b392f7b4f659f5b619ce5994c5c43caab3d80df2296ae54fa888b3d17f5a/mypy-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b4968f14f44c62e2ec4a038c8797a87315be8df7740dc3ee8d3bfe1c6bf5dba8", size = 12702764, upload-time = "2025-05-29T13:20:42.826Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/c0/7646ef3a00fa39ac9bc0938626d9ff29d19d733011be929cfea59d82d136/mypy-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb14a4a871bb8efb1e4a50360d4e3c8d6c601e7a31028a2c79f9bb659b63d730", size = 12896233, upload-time = "2025-05-29T13:18:37.446Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/38/52f4b808b3fef7f0ef840ee8ff6ce5b5d77381e65425758d515cdd4f5bb5/mypy-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:bd4e1ebe126152a7bbaa4daedd781c90c8f9643c79b9748caa270ad542f12bec", size = 9565547, upload-time = "2025-05-29T13:20:02.836Z" },
-    { url = "https://files.pythonhosted.org/packages/97/9c/ca03bdbefbaa03b264b9318a98950a9c683e06472226b55472f96ebbc53d/mypy-1.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a9e056237c89f1587a3be1a3a70a06a698d25e2479b9a2f57325ddaaffc3567b", size = 11059753, upload-time = "2025-05-29T13:18:18.167Z" },
-    { url = "https://files.pythonhosted.org/packages/36/92/79a969b8302cfe316027c88f7dc6fee70129490a370b3f6eb11d777749d0/mypy-1.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b07e107affb9ee6ce1f342c07f51552d126c32cd62955f59a7db94a51ad12c0", size = 10073338, upload-time = "2025-05-29T13:19:48.079Z" },
-    { url = "https://files.pythonhosted.org/packages/14/9b/a943f09319167da0552d5cd722104096a9c99270719b1afeea60d11610aa/mypy-1.16.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6fb60cbd85dc65d4d63d37cb5c86f4e3a301ec605f606ae3a9173e5cf34997b", size = 11827764, upload-time = "2025-05-29T13:46:04.47Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/64/ff75e71c65a0cb6ee737287c7913ea155845a556c64144c65b811afdb9c7/mypy-1.16.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7e32297a437cc915599e0578fa6bc68ae6a8dc059c9e009c628e1c47f91495d", size = 12701356, upload-time = "2025-05-29T13:35:13.553Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/ad/0e93c18987a1182c350f7a5fab70550852f9fabe30ecb63bfbe51b602074/mypy-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:afe420c9380ccec31e744e8baff0d406c846683681025db3531b32db56962d52", size = 12900745, upload-time = "2025-05-29T13:17:24.409Z" },
-    { url = "https://files.pythonhosted.org/packages/28/5d/036c278d7a013e97e33f08c047fe5583ab4f1fc47c9a49f985f1cdd2a2d7/mypy-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:55f9076c6ce55dd3f8cd0c6fff26a008ca8e5131b89d5ba6d86bd3f47e736eeb", size = 9572200, upload-time = "2025-05-29T13:33:44.92Z" },
-    { url = "https://files.pythonhosted.org/packages/99/a3/6ed10530dec8e0fdc890d81361260c9ef1f5e5c217ad8c9b21ecb2b8366b/mypy-1.16.0-py3-none-any.whl", hash = "sha256:29e1499864a3888bca5c1542f2d7232c6e586295183320caa95758fc84034031", size = 2265773, upload-time = "2025-05-29T13:35:18.762Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/d6/39482e5fcc724c15bf6280ff5806548c7185e0c090712a3736ed4d07e8b7/mypy-1.16.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:af4792433f09575d9eeca5c63d7d90ca4aeceda9d8355e136f80f8967639183d", size = 11066493, upload-time = "2025-06-16T16:47:01.683Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/e5/26c347890efc6b757f4d5bb83f4a0cf5958b8cf49c938ac99b8b72b420a6/mypy-1.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:66df38405fd8466ce3517eda1f6640611a0b8e70895e2a9462d1d4323c5eb4b9", size = 10081687, upload-time = "2025-06-16T16:48:19.367Z" },
+    { url = "https://files.pythonhosted.org/packages/44/c7/b5cb264c97b86914487d6a24bd8688c0172e37ec0f43e93b9691cae9468b/mypy-1.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:44e7acddb3c48bd2713994d098729494117803616e116032af192871aed80b79", size = 11839723, upload-time = "2025-06-16T16:49:20.912Z" },
+    { url = "https://files.pythonhosted.org/packages/15/f8/491997a9b8a554204f834ed4816bda813aefda31cf873bb099deee3c9a99/mypy-1.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0ab5eca37b50188163fa7c1b73c685ac66c4e9bdee4a85c9adac0e91d8895e15", size = 12722980, upload-time = "2025-06-16T16:37:40.929Z" },
+    { url = "https://files.pythonhosted.org/packages/df/f0/2bd41e174b5fd93bc9de9a28e4fb673113633b8a7f3a607fa4a73595e468/mypy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb6229b2c9086247e21a83c309754b9058b438704ad2f6807f0d8227f6ebdd", size = 12903328, upload-time = "2025-06-16T16:34:35.099Z" },
+    { url = "https://files.pythonhosted.org/packages/61/81/5572108a7bec2c46b8aff7e9b524f371fe6ab5efb534d38d6b37b5490da8/mypy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:1f0435cf920e287ff68af3d10a118a73f212deb2ce087619eb4e648116d1fe9b", size = 9562321, upload-time = "2025-06-16T16:48:58.823Z" },
+    { url = "https://files.pythonhosted.org/packages/28/e3/96964af4a75a949e67df4b95318fe2b7427ac8189bbc3ef28f92a1c5bc56/mypy-1.16.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ddc91eb318c8751c69ddb200a5937f1232ee8efb4e64e9f4bc475a33719de438", size = 11063480, upload-time = "2025-06-16T16:47:56.205Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/4d/cd1a42b8e5be278fab7010fb289d9307a63e07153f0ae1510a3d7b703193/mypy-1.16.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:87ff2c13d58bdc4bbe7dc0dedfe622c0f04e2cb2a492269f3b418df2de05c536", size = 10090538, upload-time = "2025-06-16T16:46:43.92Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/4f/c3c6b4b66374b5f68bab07c8cabd63a049ff69796b844bc759a0ca99bb2a/mypy-1.16.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a7cfb0fe29fe5a9841b7c8ee6dffb52382c45acdf68f032145b75620acfbd6f", size = 11836839, upload-time = "2025-06-16T16:36:28.039Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/7e/81ca3b074021ad9775e5cb97ebe0089c0f13684b066a750b7dc208438403/mypy-1.16.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:051e1677689c9d9578b9c7f4d206d763f9bbd95723cd1416fad50db49d52f359", size = 12715634, upload-time = "2025-06-16T16:50:34.441Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/95/bdd40c8be346fa4c70edb4081d727a54d0a05382d84966869738cfa8a497/mypy-1.16.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d5d2309511cc56c021b4b4e462907c2b12f669b2dbeb68300110ec27723971be", size = 12895584, upload-time = "2025-06-16T16:34:54.857Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/fd/d486a0827a1c597b3b48b1bdef47228a6e9ee8102ab8c28f944cb83b65dc/mypy-1.16.1-cp313-cp313-win_amd64.whl", hash = "sha256:4f58ac32771341e38a853c5d0ec0dfe27e18e27da9cdb8bbc882d2249c71a3ee", size = 9573886, upload-time = "2025-06-16T16:36:43.589Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/d3/53e684e78e07c1a2bf7105715e5edd09ce951fc3f47cf9ed095ec1b7a037/mypy-1.16.1-py3-none-any.whl", hash = "sha256:5fc2ac4027d0ef28d6ba69a0343737a23c4d1b83672bf38d1fe237bdc0643b37", size = 2265923, upload-time = "2025-06-16T16:48:02.366Z" },
 ]
 
 [[package]]
@@ -2284,11 +2292,11 @@ wheels = [
 
 [[package]]
 name = "narwhals"
-version = "1.41.0"
+version = "1.44.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/32/fc/7b9a3689911662be59889b1b0b40e17d5dba6f98080994d86ca1f3154d41/narwhals-1.41.0.tar.gz", hash = "sha256:0ab2e5a1757a19b071e37ca74b53b0b5426789321d68939738337dfddea629b5", size = 488446, upload-time = "2025-05-26T12:46:07.43Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/56/e5/0b875d29e2a4d112c58fef6aac2ed3a73bbdd4d8d0dce722fd154357248a/narwhals-1.44.0.tar.gz", hash = "sha256:8cf0616d4f6f21225b3b56fcde96ccab6d05023561a0f162402aa9b8c33ad31d", size = 499250, upload-time = "2025-06-23T08:28:08.653Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c9/e0/ade8619846645461c012498f02b93a659e50f07d9d9a6ffefdf5ea2c02a0/narwhals-1.41.0-py3-none-any.whl", hash = "sha256:d958336b40952e4c4b7aeef259a7074851da0800cf902186a58f2faeff97be02", size = 357968, upload-time = "2025-05-26T12:46:05.207Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/fb/12f4a971467aac3cb7cbccbbfca5d0f05e23722068112c1ac4a393613ebe/narwhals-1.44.0-py3-none-any.whl", hash = "sha256:a170ea0bab4cf1f323d9f8bf17f2d7042c3d73802bea321996b39bf075d57de5", size = 365240, upload-time = "2025-06-23T08:28:06.314Z" },
 ]
 
 [[package]]
@@ -2303,6 +2311,7 @@ dependencies = [
     { name = "hydra-core" },
     { name = "math-verify" },
     { name = "matplotlib" },
+    { name = "ninja" },
     { name = "numpy" },
     { name = "nvidia-ml-py" },
     { name = "nvtx" },
@@ -2321,12 +2330,17 @@ dependencies = [
 ]
 
 [package.optional-dependencies]
+automodel = [
+    { name = "flash-attn" },
+]
 mcore = [
+    { name = "flash-attn" },
     { name = "megatron-core" },
     { name = "nemo-tron" },
     { name = "transformer-engine", extra = ["pytorch"] },
 ]
 vllm = [
+    { name = "flash-attn" },
     { name = "vllm" },
 ]
 
@@ -2335,6 +2349,7 @@ build = [
     { name = "einops" },
     { name = "hatchling" },
     { name = "packaging" },
+    { name = "psutil" },
     { name = "pybind11" },
     { name = "setuptools" },
     { name = "torch" },
@@ -2368,11 +2383,15 @@ requires-dist = [
     { name = "colored", specifier = "==2.2.3" },
     { name = "datasets", specifier = ">=3.6.0" },
     { name = "debugpy" },
+    { name = "flash-attn", marker = "extra == 'automodel'", specifier = "==2.7.4.post1" },
+    { name = "flash-attn", marker = "extra == 'mcore'", specifier = "==2.7.4.post1" },
+    { name = "flash-attn", marker = "extra == 'vllm'", specifier = "==2.7.4.post1" },
     { name = "hydra-core" },
     { name = "math-verify" },
     { name = "matplotlib" },
     { name = "megatron-core", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-LM-workspace" },
     { name = "nemo-tron", marker = "extra == 'mcore'", editable = "3rdparty/NeMo-workspace" },
+    { name = "ninja" },
     { name = "numpy" },
     { name = "nvidia-ml-py" },
     { name = "nvtx" },
@@ -2391,13 +2410,14 @@ requires-dist = [
     { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.9.0" },
     { name = "wandb" },
 ]
-provides-extras = ["vllm", "mcore"]
+provides-extras = ["automodel", "vllm", "mcore"]
 
 [package.metadata.requires-dev]
 build = [
     { name = "einops" },
     { name = "hatchling" },
     { name = "packaging" },
+    { name = "psutil" },
     { name = "pybind11" },
     { name = "setuptools" },
     { name = "torch", specifier = "==2.7.0", index = "https://download.pytorch.org/whl/cu128" },
@@ -2670,7 +2690,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.7.1.26"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cublas-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/25/dc/dc825c4b1c83b538e207e34f48f86063c88deaa35d46c651c7c181364ba2/nvidia_cudnn_cu12-9.7.1.26-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:6d011159a158f3cfc47bf851aea79e31bcff60d530b70ef70474c84cac484d07", size = 726851421, upload-time = "2025-02-06T22:18:29.812Z" },
@@ -2681,7 +2701,7 @@ name = "nvidia-cufft-cu12"
 version = "11.3.3.41"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ac/26/b53c493c38dccb1f1a42e1a21dc12cba2a77fbe36c652f7726d9ec4aba28/nvidia_cufft_cu12-11.3.3.41-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:da650080ab79fcdf7a4b06aa1b460e99860646b176a43f6208099bdc17836b6a", size = 193118795, upload-time = "2025-01-23T17:56:30.536Z" },
@@ -2708,9 +2728,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.7.2.55"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cublas-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
+    { name = "nvidia-cusparse-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/08/953675873a136d96bb12f93b49ba045d1107bc94d2551c52b12fa6c7dec3/nvidia_cusolver_cu12-11.7.2.55-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4d1354102f1e922cee9db51920dba9e2559877cf6ff5ad03a00d853adafb191b", size = 260373342, upload-time = "2025-01-23T17:58:56.406Z" },
@@ -2721,7 +2741,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.5.7.53"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
 ]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/ab/31e8149c66213b846c082a3b41b1365b831f41191f9f40c6ddbc8a7d550e/nvidia_cusparse_cu12-12.5.7.53-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c1b61eb8c85257ea07e9354606b26397612627fdcd327bfd91ccf6155e7c86d", size = 292064180, upload-time = "2025-01-23T18:00:23.233Z" },
@@ -2875,7 +2895,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.86.0"
+version = "1.91.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -2887,9 +2907,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ec/7a/9ad4a61f1502f0e59d8c27fb629e28a63259a44d8d31cd2314e1534a2d9f/openai-1.86.0.tar.gz", hash = "sha256:c64d5b788359a8fdf69bd605ae804ce41c1ce2e78b8dd93e2542e0ee267f1e4b", size = 468272, upload-time = "2025-06-10T16:50:32.962Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/a22f2973b729eff3f1f429017bdf717930c5de0fbf9e14017bae330e4e7a/openai-1.91.0.tar.gz", hash = "sha256:d6b07730d2f7c6745d0991997c16f85cddfc90ddcde8d569c862c30716b9fc90", size = 472529, upload-time = "2025-06-23T18:27:10.961Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/58/c1/dfb16b3432810fc9758564f9d1a4dbce6b93b7fb763ba57530c7fc48316d/openai-1.86.0-py3-none-any.whl", hash = "sha256:c8889c39410621fe955c230cc4c21bfe36ec887f4e60a957de05f507d7e1f349", size = 730296, upload-time = "2025-06-10T16:50:30.495Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/d2/f99bdd6fc737d6b3cf0df895508d621fc9a386b375a1230ee81d46c5436e/openai-1.91.0-py3-none-any.whl", hash = "sha256:207f87aa3bc49365e014fac2f7e291b99929f4fe126c4654143440e0ad446a5f", size = 735837, upload-time = "2025-06-23T18:27:08.913Z" },
 ]
 
 [[package]]
@@ -3102,11 +3122,11 @@ wheels = [
 
 [[package]]
 name = "packaging"
-version = "24.2"
+version = "25.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d0/63/68dbb6eb2de9cb10ee4c9c14a0148804425e13c4fb20d61cce69f53106da/packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f", size = 163950, upload-time = "2024-11-08T09:47:47.202Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451, upload-time = "2024-11-08T09:47:44.722Z" },
+    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
 [[package]]
@@ -3159,11 +3179,11 @@ wheels = [
 
 [[package]]
 name = "partial-json-parser"
-version = "0.2.1.1.post5"
+version = "0.2.1.1.post6"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/27/9c/9c366aed65acb40a97842ce1375a87b27ea37d735fc9717f7729bae3cc00/partial_json_parser-0.2.1.1.post5.tar.gz", hash = "sha256:992710ac67e90b367921d52727698928040f7713ba7ecb33b96371ea7aec82ca", size = 10313, upload-time = "2025-01-08T15:44:02.147Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/86/13/459e86c9c67a006651803a3df3d0b08f7708bc5483fdc482582d75562949/partial_json_parser-0.2.1.1.post6.tar.gz", hash = "sha256:43896b68929678224cbbe4884a6a5fe9251ded4b30b8b7d7eb569e5feea93afc", size = 10299, upload-time = "2025-06-23T17:51:45.372Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8c/ee/a9476f01f27c74420601be208c6c2c0dd3486681d515e9d765931b89851c/partial_json_parser-0.2.1.1.post5-py3-none-any.whl", hash = "sha256:627715aaa3cb3fb60a65b0d62223243acaa6c70846520a90326fef3a2f0b61ca", size = 10885, upload-time = "2025-01-08T15:44:00.987Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/40/1f922794af3dc7503f19319a8804b398a161a2cd54183cff8b12225b8d85/partial_json_parser-0.2.1.1.post6-py3-none-any.whl", hash = "sha256:abc332f09b13ef5233384dbfe7128a0e9ea3fa4b8f8be9b37ac1b433c810e99e", size = 10876, upload-time = "2025-06-23T17:51:44.332Z" },
 ]
 
 [[package]]
@@ -3552,7 +3572,7 @@ wheels = [
 
 [[package]]
 name = "pydantic"
-version = "2.11.5"
+version = "2.11.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "annotated-types" },
@@ -3560,9 +3580,9 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "typing-inspection" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f0/86/8ce9040065e8f924d642c58e4a344e33163a07f6b57f836d0d734e0ad3fb/pydantic-2.11.5.tar.gz", hash = "sha256:7f853db3d0ce78ce8bbb148c401c2cdd6431b3473c0cdff2755c7690952a7b7a", size = 787102, upload-time = "2025-05-22T21:18:08.761Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/00/dd/4325abf92c39ba8623b5af936ddb36ffcfe0beae70405d456ab1fb2f5b8c/pydantic-2.11.7.tar.gz", hash = "sha256:d989c3c6cb79469287b1569f7447a17848c998458d49ebe294e975b9baf0f0db", size = 788350, upload-time = "2025-06-14T08:33:17.137Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b5/69/831ed22b38ff9b4b64b66569f0e5b7b97cf3638346eb95a2147fdb49ad5f/pydantic-2.11.5-py3-none-any.whl", hash = "sha256:f9c26ba06f9747749ca1e5c94d6a85cb84254577553c8785576fd38fa64dc0f7", size = 444229, upload-time = "2025-05-22T21:18:06.329Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/c0/ec2b1c8712ca690e5d61979dee872603e92b8a32f94cc1b72d53beab008a/pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b", size = 444782, upload-time = "2025-06-14T08:33:14.905Z" },
 ]
 
 [[package]]
@@ -3627,11 +3647,11 @@ wheels = [
 
 [[package]]
 name = "pygments"
-version = "2.19.1"
+version = "2.19.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581, upload-time = "2025-01-06T17:26:30.443Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
 [[package]]
@@ -3678,7 +3698,7 @@ wheels = [
 
 [[package]]
 name = "pytest"
-version = "8.4.0"
+version = "8.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
@@ -3687,9 +3707,9 @@ dependencies = [
     { name = "pluggy" },
     { name = "pygments" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fb/aa/405082ce2749be5398045152251ac69c0f3578c7077efc53431303af97ce/pytest-8.4.0.tar.gz", hash = "sha256:14d920b48472ea0dbf68e45b96cd1ffda4705f33307dcc86c676c1b5104838a6", size = 1515232, upload-time = "2025-06-02T17:36:30.03Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2f/de/afa024cbe022b1b318a3d224125aa24939e99b4ff6f22e0ba639a2eaee47/pytest-8.4.0-py3-none-any.whl", hash = "sha256:f40f825768ad76c0977cbacdf1fd37c6f7a468e460ea6a0636078f8972d4517e", size = 363797, upload-time = "2025-06-02T17:36:27.859Z" },
+    { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" },
 ]
 
 [[package]]
@@ -3706,15 +3726,16 @@ wheels = [
 
 [[package]]
 name = "pytest-cov"
-version = "6.1.1"
+version = "6.2.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "coverage" },
+    { name = "pluggy" },
     { name = "pytest" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/25/69/5f1e57f6c5a39f81411b550027bf72842c4567ff5fd572bed1edc9e4b5d9/pytest_cov-6.1.1.tar.gz", hash = "sha256:46935f7aaefba760e716c2ebfbe1c216240b9592966e7da99ea8292d4d3e2a0a", size = 66857, upload-time = "2025-04-05T14:07:51.592Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/99/668cade231f434aaa59bbfbf49469068d2ddd945000621d3d165d2e7dd7b/pytest_cov-6.2.1.tar.gz", hash = "sha256:25cc6cc0a5358204b8108ecedc51a9b57b34cc6b8c967cc2c01a4e00d8a67da2", size = 69432, upload-time = "2025-06-12T10:47:47.684Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/28/d0/def53b4a790cfb21483016430ed828f64830dd981ebe1089971cd10cab25/pytest_cov-6.1.1-py3-none-any.whl", hash = "sha256:bddf29ed2d0ab6f4df17b4c55b0a657287db8684af9c42ea546b21b1041b3dde", size = 23841, upload-time = "2025-04-05T14:07:49.641Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/16/4ea354101abb1287856baa4af2732be351c7bee728065aed451b678153fd/pytest_cov-6.2.1-py3-none-any.whl", hash = "sha256:f5bc4c23f42f1cdd23c70b1dab1bbaef4fc505ba950d53e0081d0730dd7e86d5", size = 24644, upload-time = "2025-06-12T10:47:45.932Z" },
 ]
 
 [[package]]
@@ -3731,14 +3752,14 @@ wheels = [
 
 [[package]]
 name = "pytest-random-order"
-version = "1.1.1"
+version = "1.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pytest" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/93/e5/89654b4354b10e89969a74130f391b017dbdc113ce27f0e8ff9fa23e44e1/pytest-random-order-1.1.1.tar.gz", hash = "sha256:4472d7d34f1f1c5f3a359c4ffc5c13ed065232f31eca19c8844c1ab406e79080", size = 14626, upload-time = "2024-01-20T09:25:07.218Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a8/ad/a2a32d91effe0f84a300b9ba6c9d000bd52f31b77f8e49d8fd8653a9ddc3/pytest_random_order-1.2.0.tar.gz", hash = "sha256:12b2d4ee977ec9922b5e3575afe13c22cbdb06e3d03e550abc43df137b90439a", size = 107304, upload-time = "2025-06-22T14:44:43.807Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/02/944cf846bcd6027a1805c69fec90581f916e99ccafcbe409ae6c76833255/pytest_random_order-1.1.1-py3-none-any.whl", hash = "sha256:882727a8b597ecd06ede28654ffeb8a6d511a1e4abe1054cca7982f2e42008cd", size = 11521, upload-time = "2024-01-20T09:25:05.098Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/7f/92c8dbe185aa38270fec1e73e0ed70d8e5de31963aa057ba621055f8b008/pytest_random_order-1.2.0-py3-none-any.whl", hash = "sha256:78d1d6f346222cdf26a7302c502d2f1cab19454529af960b8b9e1427a99ab277", size = 10889, upload-time = "2025-06-22T14:44:42.438Z" },
 ]
 
 [[package]]
@@ -3767,11 +3788,11 @@ wheels = [
 
 [[package]]
 name = "python-dotenv"
-version = "1.1.0"
+version = "1.1.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920, upload-time = "2025-03-25T10:14:56.835Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/b0/4bc07ccd3572a2f9df7e6782f52b0c6c90dcbb803ac4a167702d7d0dfe1e/python_dotenv-1.1.1.tar.gz", hash = "sha256:a8a6399716257f45be6a007360200409fce5cda2661e3dec71d23dc15f6189ab", size = 41978, upload-time = "2025-06-24T04:21:07.341Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256, upload-time = "2025-03-25T10:14:55.034Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" },
 ]
 
 [[package]]
@@ -3794,7 +3815,7 @@ wheels = [
 
 [[package]]
 name = "pytorch-lightning"
-version = "2.5.1.post0"
+version = "2.5.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "fsspec", extra = ["http"] },
@@ -3806,9 +3827,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fe/0c/cfa6223c525f67ea3a7a2907e36e9e9a9653300f82cfd9af88f8136514ab/pytorch_lightning-2.5.1.post0.tar.gz", hash = "sha256:abc3d5a804d41f941b14e3fd7db5572a1270cd1e9889b50e962984c87d498d94", size = 634368, upload-time = "2025-04-25T20:24:29.272Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/3e/728fbdc671d07727ad447f9401d98a43570573965beb3cb2060f9a330b4f/pytorch_lightning-2.5.2.tar.gz", hash = "sha256:f817087d611be8d43b777dd4e543d72703e235510936677a13e6c29f7fd790e3", size = 636859, upload-time = "2025-06-20T15:58:27.062Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/de/a9/e14821cfaf08e8d78185cca0477c9d3a62bafe1b4b530100f7b66bb1f7bb/pytorch_lightning-2.5.1.post0-py3-none-any.whl", hash = "sha256:873fb21392c8b79908218f5ca8f65bd835439216e52550c36ff55d849e99c93e", size = 823084, upload-time = "2025-04-25T20:24:27.421Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/42/47c186c8f9e956e559c89e6c764d5d5d0d0af517c04ca0ad39bd0a357d3a/pytorch_lightning-2.5.2-py3-none-any.whl", hash = "sha256:17cfdf89bd98074e389101f097cdf34c486a1f5c6d3fdcefbaf4dea7f97ff0bf", size = 825366, upload-time = "2025-06-20T15:58:25.534Z" },
 ]
 
 [[package]]
@@ -3885,43 +3906,32 @@ wheels = [
 
 [[package]]
 name = "pyzmq"
-version = "26.4.0"
+version = "27.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi", marker = "implementation_name == 'pypy'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/11/b9213d25230ac18a71b39b3723494e57adebe36e066397b961657b3b41c1/pyzmq-26.4.0.tar.gz", hash = "sha256:4bd13f85f80962f91a651a7356fe0472791a5f7a92f227822b5acf44795c626d", size = 278293, upload-time = "2025-04-04T12:05:44.049Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/44/a778555ebfdf6c7fc00816aad12d185d10a74d975800341b1bc36bad1187/pyzmq-26.4.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:5227cb8da4b6f68acfd48d20c588197fd67745c278827d5238c707daf579227b", size = 1341586, upload-time = "2025-04-04T12:03:41.954Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/4f/f3a58dc69ac757e5103be3bd41fb78721a5e17da7cc617ddb56d973a365c/pyzmq-26.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1c07a7fa7f7ba86554a2b1bef198c9fed570c08ee062fd2fd6a4dcacd45f905", size = 665880, upload-time = "2025-04-04T12:03:43.45Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/45/50230bcfb3ae5cb98bee683b6edeba1919f2565d7cc1851d3c38e2260795/pyzmq-26.4.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae775fa83f52f52de73183f7ef5395186f7105d5ed65b1ae65ba27cb1260de2b", size = 902216, upload-time = "2025-04-04T12:03:45.572Z" },
-    { url = "https://files.pythonhosted.org/packages/41/59/56bbdc5689be5e13727491ad2ba5efd7cd564365750514f9bc8f212eef82/pyzmq-26.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66c760d0226ebd52f1e6b644a9e839b5db1e107a23f2fcd46ec0569a4fdd4e63", size = 859814, upload-time = "2025-04-04T12:03:47.188Z" },
-    { url = "https://files.pythonhosted.org/packages/81/b1/57db58cfc8af592ce94f40649bd1804369c05b2190e4cbc0a2dad572baeb/pyzmq-26.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ef8c6ecc1d520debc147173eaa3765d53f06cd8dbe7bd377064cdbc53ab456f5", size = 855889, upload-time = "2025-04-04T12:03:49.223Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/92/47542e629cbac8f221c230a6d0f38dd3d9cff9f6f589ed45fdf572ffd726/pyzmq-26.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3150ef4084e163dec29ae667b10d96aad309b668fac6810c9e8c27cf543d6e0b", size = 1197153, upload-time = "2025-04-04T12:03:50.591Z" },
-    { url = "https://files.pythonhosted.org/packages/07/e5/b10a979d1d565d54410afc87499b16c96b4a181af46e7645ab4831b1088c/pyzmq-26.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4448c9e55bf8329fa1dcedd32f661bf611214fa70c8e02fee4347bc589d39a84", size = 1507352, upload-time = "2025-04-04T12:03:52.473Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/58/5a23db84507ab9c01c04b1232a7a763be66e992aa2e66498521bbbc72a71/pyzmq-26.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e07dde3647afb084d985310d067a3efa6efad0621ee10826f2cb2f9a31b89d2f", size = 1406834, upload-time = "2025-04-04T12:03:54Z" },
-    { url = "https://files.pythonhosted.org/packages/22/74/aaa837b331580c13b79ac39396601fb361454ee184ca85e8861914769b99/pyzmq-26.4.0-cp312-cp312-win32.whl", hash = "sha256:ba034a32ecf9af72adfa5ee383ad0fd4f4e38cdb62b13624278ef768fe5b5b44", size = 577992, upload-time = "2025-04-04T12:03:55.815Z" },
-    { url = "https://files.pythonhosted.org/packages/30/0f/55f8c02c182856743b82dde46b2dc3e314edda7f1098c12a8227eeda0833/pyzmq-26.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:056a97aab4064f526ecb32f4343917a4022a5d9efb6b9df990ff72e1879e40be", size = 640466, upload-time = "2025-04-04T12:03:57.231Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/29/073779afc3ef6f830b8de95026ef20b2d1ec22d0324d767748d806e57379/pyzmq-26.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:2f23c750e485ce1eb639dbd576d27d168595908aa2d60b149e2d9e34c9df40e0", size = 556342, upload-time = "2025-04-04T12:03:59.218Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/20/fb2c92542488db70f833b92893769a569458311a76474bda89dc4264bd18/pyzmq-26.4.0-cp313-cp313-macosx_10_15_universal2.whl", hash = "sha256:c43fac689880f5174d6fc864857d1247fe5cfa22b09ed058a344ca92bf5301e3", size = 1339484, upload-time = "2025-04-04T12:04:00.671Z" },
-    { url = "https://files.pythonhosted.org/packages/58/29/2f06b9cabda3a6ea2c10f43e67ded3e47fc25c54822e2506dfb8325155d4/pyzmq-26.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:902aca7eba477657c5fb81c808318460328758e8367ecdd1964b6330c73cae43", size = 666106, upload-time = "2025-04-04T12:04:02.366Z" },
-    { url = "https://files.pythonhosted.org/packages/77/e4/dcf62bd29e5e190bd21bfccaa4f3386e01bf40d948c239239c2f1e726729/pyzmq-26.4.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5e48a830bfd152fe17fbdeaf99ac5271aa4122521bf0d275b6b24e52ef35eb6", size = 902056, upload-time = "2025-04-04T12:04:03.919Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/cf/b36b3d7aea236087d20189bec1a87eeb2b66009731d7055e5c65f845cdba/pyzmq-26.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31be2b6de98c824c06f5574331f805707c667dc8f60cb18580b7de078479891e", size = 860148, upload-time = "2025-04-04T12:04:05.581Z" },
-    { url = "https://files.pythonhosted.org/packages/18/a6/f048826bc87528c208e90604c3bf573801e54bd91e390cbd2dfa860e82dc/pyzmq-26.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:6332452034be001bbf3206ac59c0d2a7713de5f25bb38b06519fc6967b7cf771", size = 855983, upload-time = "2025-04-04T12:04:07.096Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/27/454d34ab6a1d9772a36add22f17f6b85baf7c16e14325fa29e7202ca8ee8/pyzmq-26.4.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:da8c0f5dd352136853e6a09b1b986ee5278dfddfebd30515e16eae425c872b30", size = 1197274, upload-time = "2025-04-04T12:04:08.523Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/3d/7abfeab6b83ad38aa34cbd57c6fc29752c391e3954fd12848bd8d2ec0df6/pyzmq-26.4.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f4ccc1a0a2c9806dda2a2dd118a3b7b681e448f3bb354056cad44a65169f6d86", size = 1507120, upload-time = "2025-04-04T12:04:10.58Z" },
-    { url = "https://files.pythonhosted.org/packages/13/ff/bc8d21dbb9bc8705126e875438a1969c4f77e03fc8565d6901c7933a3d01/pyzmq-26.4.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1c0b5fceadbab461578daf8d1dcc918ebe7ddd2952f748cf30c7cf2de5d51101", size = 1406738, upload-time = "2025-04-04T12:04:12.509Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/5d/d4cd85b24de71d84d81229e3bbb13392b2698432cf8fdcea5afda253d587/pyzmq-26.4.0-cp313-cp313-win32.whl", hash = "sha256:28e2b0ff5ba4b3dd11062d905682bad33385cfa3cc03e81abd7f0822263e6637", size = 577826, upload-time = "2025-04-04T12:04:14.289Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/6c/f289c1789d7bb6e5a3b3bef7b2a55089b8561d17132be7d960d3ff33b14e/pyzmq-26.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:23ecc9d241004c10e8b4f49d12ac064cd7000e1643343944a10df98e57bc544b", size = 640406, upload-time = "2025-04-04T12:04:15.757Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/99/676b8851cb955eb5236a0c1e9ec679ea5ede092bf8bf2c8a68d7e965cac3/pyzmq-26.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:1edb0385c7f025045d6e0f759d4d3afe43c17a3d898914ec6582e6f464203c08", size = 556216, upload-time = "2025-04-04T12:04:17.212Z" },
-    { url = "https://files.pythonhosted.org/packages/65/c2/1fac340de9d7df71efc59d9c50fc7a635a77b103392d1842898dd023afcb/pyzmq-26.4.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:93a29e882b2ba1db86ba5dd5e88e18e0ac6b627026c5cfbec9983422011b82d4", size = 1333769, upload-time = "2025-04-04T12:04:18.665Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/c7/6c03637e8d742c3b00bec4f5e4cd9d1c01b2f3694c6f140742e93ca637ed/pyzmq-26.4.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb45684f276f57110bb89e4300c00f1233ca631f08f5f42528a5c408a79efc4a", size = 658826, upload-time = "2025-04-04T12:04:20.405Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/97/a8dca65913c0f78e0545af2bb5078aebfc142ca7d91cdaffa1fbc73e5dbd/pyzmq-26.4.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f72073e75260cb301aad4258ad6150fa7f57c719b3f498cb91e31df16784d89b", size = 891650, upload-time = "2025-04-04T12:04:22.413Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/7e/f63af1031eb060bf02d033732b910fe48548dcfdbe9c785e9f74a6cc6ae4/pyzmq-26.4.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be37e24b13026cfedd233bcbbccd8c0bcd2fdd186216094d095f60076201538d", size = 849776, upload-time = "2025-04-04T12:04:23.959Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/fa/1a009ce582802a895c0d5fe9413f029c940a0a8ee828657a3bb0acffd88b/pyzmq-26.4.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:237b283044934d26f1eeff4075f751b05d2f3ed42a257fc44386d00df6a270cf", size = 842516, upload-time = "2025-04-04T12:04:25.449Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/bc/f88b0bad0f7a7f500547d71e99f10336f2314e525d4ebf576a1ea4a1d903/pyzmq-26.4.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:b30f862f6768b17040929a68432c8a8be77780317f45a353cb17e423127d250c", size = 1189183, upload-time = "2025-04-04T12:04:27.035Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/8c/db446a3dd9cf894406dec2e61eeffaa3c07c3abb783deaebb9812c4af6a5/pyzmq-26.4.0-cp313-cp313t-musllinux_1_1_i686.whl", hash = "sha256:c80fcd3504232f13617c6ab501124d373e4895424e65de8b72042333316f64a8", size = 1495501, upload-time = "2025-04-04T12:04:28.833Z" },
-    { url = "https://files.pythonhosted.org/packages/05/4c/bf3cad0d64c3214ac881299c4562b815f05d503bccc513e3fd4fdc6f67e4/pyzmq-26.4.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:26a2a7451606b87f67cdeca2c2789d86f605da08b4bd616b1a9981605ca3a364", size = 1395540, upload-time = "2025-04-04T12:04:30.562Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/f1/06/50a4e9648b3e8b992bef8eb632e457307553a89d294103213cfd47b3da69/pyzmq-27.0.0.tar.gz", hash = "sha256:b1f08eeb9ce1510e6939b6e5dcd46a17765e2333daae78ecf4606808442e52cf", size = 280478, upload-time = "2025-06-13T14:09:07.087Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/93/a7/9ad68f55b8834ede477842214feba6a4c786d936c022a67625497aacf61d/pyzmq-27.0.0-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:cbabc59dcfaac66655c040dfcb8118f133fb5dde185e5fc152628354c1598e52", size = 1305438, upload-time = "2025-06-13T14:07:31.676Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/ee/26aa0f98665a22bc90ebe12dced1de5f3eaca05363b717f6fb229b3421b3/pyzmq-27.0.0-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:cb0ac5179cba4b2f94f1aa208fbb77b62c4c9bf24dd446278b8b602cf85fcda3", size = 895095, upload-time = "2025-06-13T14:07:33.104Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/85/c57e7ab216ecd8aa4cc7e3b83b06cc4e9cf45c87b0afc095f10cd5ce87c1/pyzmq-27.0.0-cp312-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53a48f0228eab6cbf69fde3aa3c03cbe04e50e623ef92ae395fce47ef8a76152", size = 651826, upload-time = "2025-06-13T14:07:34.831Z" },
+    { url = "https://files.pythonhosted.org/packages/69/9a/9ea7e230feda9400fb0ae0d61d7d6ddda635e718d941c44eeab22a179d34/pyzmq-27.0.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:111db5f395e09f7e775f759d598f43cb815fc58e0147623c4816486e1a39dc22", size = 839750, upload-time = "2025-06-13T14:07:36.553Z" },
+    { url = "https://files.pythonhosted.org/packages/08/66/4cebfbe71f3dfbd417011daca267539f62ed0fbc68105357b68bbb1a25b7/pyzmq-27.0.0-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c8878011653dcdc27cc2c57e04ff96f0471e797f5c19ac3d7813a245bcb24371", size = 1641357, upload-time = "2025-06-13T14:07:38.21Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/f6/b0f62578c08d2471c791287149cb8c2aaea414ae98c6e995c7dbe008adfb/pyzmq-27.0.0-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:c0ed2c1f335ba55b5fdc964622254917d6b782311c50e138863eda409fbb3b6d", size = 2020281, upload-time = "2025-06-13T14:07:39.599Z" },
+    { url = "https://files.pythonhosted.org/packages/37/b9/4f670b15c7498495da9159edc374ec09c88a86d9cd5a47d892f69df23450/pyzmq-27.0.0-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e918d70862d4cfd4b1c187310015646a14e1f5917922ab45b29f28f345eeb6be", size = 1877110, upload-time = "2025-06-13T14:07:41.027Z" },
+    { url = "https://files.pythonhosted.org/packages/66/31/9dee25c226295b740609f0d46db2fe972b23b6f5cf786360980524a3ba92/pyzmq-27.0.0-cp312-abi3-win32.whl", hash = "sha256:88b4e43cab04c3c0f0d55df3b1eef62df2b629a1a369b5289a58f6fa8b07c4f4", size = 559297, upload-time = "2025-06-13T14:07:42.533Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/12/52da5509800f7ff2d287b2f2b4e636e7ea0f001181cba6964ff6c1537778/pyzmq-27.0.0-cp312-abi3-win_amd64.whl", hash = "sha256:dce4199bf5f648a902ce37e7b3afa286f305cd2ef7a8b6ec907470ccb6c8b371", size = 619203, upload-time = "2025-06-13T14:07:43.843Z" },
+    { url = "https://files.pythonhosted.org/packages/93/6d/7f2e53b19d1edb1eb4f09ec7c3a1f945ca0aac272099eab757d15699202b/pyzmq-27.0.0-cp312-abi3-win_arm64.whl", hash = "sha256:56e46bbb85d52c1072b3f809cc1ce77251d560bc036d3a312b96db1afe76db2e", size = 551927, upload-time = "2025-06-13T14:07:45.51Z" },
+    { url = "https://files.pythonhosted.org/packages/19/62/876b27c4ff777db4ceba1c69ea90d3c825bb4f8d5e7cd987ce5802e33c55/pyzmq-27.0.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:c36ad534c0c29b4afa088dc53543c525b23c0797e01b69fef59b1a9c0e38b688", size = 1340826, upload-time = "2025-06-13T14:07:46.881Z" },
+    { url = "https://files.pythonhosted.org/packages/43/69/58ef8f4f59d3bcd505260c73bee87b008850f45edca40ddaba54273c35f4/pyzmq-27.0.0-cp313-cp313t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:67855c14173aec36395d7777aaba3cc527b393821f30143fd20b98e1ff31fd38", size = 897283, upload-time = "2025-06-13T14:07:49.562Z" },
+    { url = "https://files.pythonhosted.org/packages/43/15/93a0d0396700a60475ad3c5d42c5f1c308d3570bc94626b86c71ef9953e0/pyzmq-27.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8617c7d43cd8ccdb62aebe984bfed77ca8f036e6c3e46dd3dddda64b10f0ab7a", size = 660567, upload-time = "2025-06-13T14:07:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/b3/fe055513e498ca32f64509abae19b9c9eb4d7c829e02bd8997dd51b029eb/pyzmq-27.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:67bfbcbd0a04c575e8103a6061d03e393d9f80ffdb9beb3189261e9e9bc5d5e9", size = 847681, upload-time = "2025-06-13T14:07:52.77Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/4f/ff15300b00b5b602191f3df06bbc8dd4164e805fdd65bb77ffbb9c5facdc/pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5cd11d46d7b7e5958121b3eaf4cd8638eff3a720ec527692132f05a57f14341d", size = 1650148, upload-time = "2025-06-13T14:07:54.178Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/6f/84bdfff2a224a6f26a24249a342e5906993c50b0761e311e81b39aef52a7/pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:b801c2e40c5aa6072c2f4876de8dccd100af6d9918d4d0d7aa54a1d982fd4f44", size = 2023768, upload-time = "2025-06-13T14:07:55.714Z" },
+    { url = "https://files.pythonhosted.org/packages/64/39/dc2db178c26a42228c5ac94a9cc595030458aa64c8d796a7727947afbf55/pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:20d5cb29e8c5f76a127c75b6e7a77e846bc4b655c373baa098c26a61b7ecd0ef", size = 1885199, upload-time = "2025-06-13T14:07:57.166Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/21/dae7b06a1f8cdee5d8e7a63d99c5d129c401acc40410bef2cbf42025e26f/pyzmq-27.0.0-cp313-cp313t-win32.whl", hash = "sha256:a20528da85c7ac7a19b7384e8c3f8fa707841fd85afc4ed56eda59d93e3d98ad", size = 575439, upload-time = "2025-06-13T14:07:58.959Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/bc/1709dc55f0970cf4cb8259e435e6773f9946f41a045c2cb90e870b7072da/pyzmq-27.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d8229f2efece6a660ee211d74d91dbc2a76b95544d46c74c615e491900dc107f", size = 639933, upload-time = "2025-06-13T14:08:00.777Z" },
 ]
 
 [[package]]
@@ -4209,40 +4219,40 @@ wheels = [
 
 [[package]]
 name = "scipy"
-version = "1.15.3"
+version = "1.16.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735, upload-time = "2025-05-08T16:06:06.471Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284, upload-time = "2025-05-08T16:06:11.686Z" },
-    { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958, upload-time = "2025-05-08T16:06:15.97Z" },
-    { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454, upload-time = "2025-05-08T16:06:20.394Z" },
-    { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199, upload-time = "2025-05-08T16:06:26.159Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455, upload-time = "2025-05-08T16:06:32.778Z" },
-    { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140, upload-time = "2025-05-08T16:06:39.249Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549, upload-time = "2025-05-08T16:06:45.729Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184, upload-time = "2025-05-08T16:06:52.623Z" },
-    { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256, upload-time = "2025-05-08T16:06:58.696Z" },
-    { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540, upload-time = "2025-05-08T16:07:04.209Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115, upload-time = "2025-05-08T16:07:08.998Z" },
-    { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884, upload-time = "2025-05-08T16:07:14.091Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018, upload-time = "2025-05-08T16:07:19.427Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716, upload-time = "2025-05-08T16:07:25.712Z" },
-    { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342, upload-time = "2025-05-08T16:07:31.468Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869, upload-time = "2025-05-08T16:07:38.002Z" },
-    { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851, upload-time = "2025-05-08T16:08:33.671Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011, upload-time = "2025-05-08T16:07:44.039Z" },
-    { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407, upload-time = "2025-05-08T16:07:49.891Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030, upload-time = "2025-05-08T16:07:54.121Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709, upload-time = "2025-05-08T16:07:58.506Z" },
-    { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045, upload-time = "2025-05-08T16:08:03.929Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062, upload-time = "2025-05-08T16:08:09.558Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132, upload-time = "2025-05-08T16:08:15.34Z" },
-    { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503, upload-time = "2025-05-08T16:08:21.513Z" },
-    { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097, upload-time = "2025-05-08T16:08:27.627Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/81/18/b06a83f0c5ee8cddbde5e3f3d0bb9b702abfa5136ef6d4620ff67df7eee5/scipy-1.16.0.tar.gz", hash = "sha256:b5ef54021e832869c8cfb03bc3bf20366cbcd426e02a58e8a58d7584dfbb8f62", size = 30581216, upload-time = "2025-06-22T16:27:55.782Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/01/c0/c943bc8d2bbd28123ad0f4f1eef62525fa1723e84d136b32965dcb6bad3a/scipy-1.16.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:7eb6bd33cef4afb9fa5f1fb25df8feeb1e52d94f21a44f1d17805b41b1da3180", size = 36459071, upload-time = "2025-06-22T16:19:06.605Z" },
+    { url = "https://files.pythonhosted.org/packages/99/0d/270e2e9f1a4db6ffbf84c9a0b648499842046e4e0d9b2275d150711b3aba/scipy-1.16.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:1dbc8fdba23e4d80394ddfab7a56808e3e6489176d559c6c71935b11a2d59db1", size = 28490500, upload-time = "2025-06-22T16:19:11.775Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/22/01d7ddb07cff937d4326198ec8d10831367a708c3da72dfd9b7ceaf13028/scipy-1.16.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:7dcf42c380e1e3737b343dec21095c9a9ad3f9cbe06f9c05830b44b1786c9e90", size = 20762345, upload-time = "2025-06-22T16:19:15.813Z" },
+    { url = "https://files.pythonhosted.org/packages/34/7f/87fd69856569ccdd2a5873fe5d7b5bbf2ad9289d7311d6a3605ebde3a94b/scipy-1.16.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26ec28675f4a9d41587266084c626b02899db373717d9312fa96ab17ca1ae94d", size = 23418563, upload-time = "2025-06-22T16:19:20.746Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/f1/e4f4324fef7f54160ab749efbab6a4bf43678a9eb2e9817ed71a0a2fd8de/scipy-1.16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:952358b7e58bd3197cfbd2f2f2ba829f258404bdf5db59514b515a8fe7a36c52", size = 33203951, upload-time = "2025-06-22T16:19:25.813Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/f0/b6ac354a956384fd8abee2debbb624648125b298f2c4a7b4f0d6248048a5/scipy-1.16.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03931b4e870c6fef5b5c0970d52c9f6ddd8c8d3e934a98f09308377eba6f3824", size = 35070225, upload-time = "2025-06-22T16:19:31.416Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/73/5cbe4a3fd4bc3e2d67ffad02c88b83edc88f381b73ab982f48f3df1a7790/scipy-1.16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:512c4f4f85912767c351a0306824ccca6fd91307a9f4318efe8fdbd9d30562ef", size = 35389070, upload-time = "2025-06-22T16:19:37.387Z" },
+    { url = "https://files.pythonhosted.org/packages/86/e8/a60da80ab9ed68b31ea5a9c6dfd3c2f199347429f229bf7f939a90d96383/scipy-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e69f798847e9add03d512eaf5081a9a5c9a98757d12e52e6186ed9681247a1ac", size = 37825287, upload-time = "2025-06-22T16:19:43.375Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/b5/29fece1a74c6a94247f8a6fb93f5b28b533338e9c34fdcc9cfe7a939a767/scipy-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:adf9b1999323ba335adc5d1dc7add4781cb5a4b0ef1e98b79768c05c796c4e49", size = 38431929, upload-time = "2025-06-22T16:19:49.385Z" },
+    { url = "https://files.pythonhosted.org/packages/46/95/0746417bc24be0c2a7b7563946d61f670a3b491b76adede420e9d173841f/scipy-1.16.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:e9f414cbe9ca289a73e0cc92e33a6a791469b6619c240aa32ee18abdce8ab451", size = 36418162, upload-time = "2025-06-22T16:19:56.3Z" },
+    { url = "https://files.pythonhosted.org/packages/19/5a/914355a74481b8e4bbccf67259bbde171348a3f160b67b4945fbc5f5c1e5/scipy-1.16.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:bbba55fb97ba3cdef9b1ee973f06b09d518c0c7c66a009c729c7d1592be1935e", size = 28465985, upload-time = "2025-06-22T16:20:01.238Z" },
+    { url = "https://files.pythonhosted.org/packages/58/46/63477fc1246063855969cbefdcee8c648ba4b17f67370bd542ba56368d0b/scipy-1.16.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:58e0d4354eacb6004e7aa1cd350e5514bd0270acaa8d5b36c0627bb3bb486974", size = 20737961, upload-time = "2025-06-22T16:20:05.913Z" },
+    { url = "https://files.pythonhosted.org/packages/93/86/0fbb5588b73555e40f9d3d6dde24ee6fac7d8e301a27f6f0cab9d8f66ff2/scipy-1.16.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:75b2094ec975c80efc273567436e16bb794660509c12c6a31eb5c195cbf4b6dc", size = 23377941, upload-time = "2025-06-22T16:20:10.668Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/80/a561f2bf4c2da89fa631b3cbf31d120e21ea95db71fd9ec00cb0247c7a93/scipy-1.16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b65d232157a380fdd11a560e7e21cde34fdb69d65c09cb87f6cc024ee376351", size = 33196703, upload-time = "2025-06-22T16:20:16.097Z" },
+    { url = "https://files.pythonhosted.org/packages/11/6b/3443abcd0707d52e48eb315e33cc669a95e29fc102229919646f5a501171/scipy-1.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d8747f7736accd39289943f7fe53a8333be7f15a82eea08e4afe47d79568c32", size = 35083410, upload-time = "2025-06-22T16:20:21.734Z" },
+    { url = "https://files.pythonhosted.org/packages/20/ab/eb0fc00e1e48961f1bd69b7ad7e7266896fe5bad4ead91b5fc6b3561bba4/scipy-1.16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eb9f147a1b8529bb7fec2a85cf4cf42bdfadf9e83535c309a11fdae598c88e8b", size = 35387829, upload-time = "2025-06-22T16:20:27.548Z" },
+    { url = "https://files.pythonhosted.org/packages/57/9e/d6fc64e41fad5d481c029ee5a49eefc17f0b8071d636a02ceee44d4a0de2/scipy-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d2b83c37edbfa837a8923d19c749c1935ad3d41cf196006a24ed44dba2ec4358", size = 37841356, upload-time = "2025-06-22T16:20:35.112Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/a7/4c94bbe91f12126b8bf6709b2471900577b7373a4fd1f431f28ba6f81115/scipy-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:79a3c13d43c95aa80b87328a46031cf52508cf5f4df2767602c984ed1d3c6bbe", size = 38403710, upload-time = "2025-06-22T16:21:54.473Z" },
+    { url = "https://files.pythonhosted.org/packages/47/20/965da8497f6226e8fa90ad3447b82ed0e28d942532e92dd8b91b43f100d4/scipy-1.16.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:f91b87e1689f0370690e8470916fe1b2308e5b2061317ff76977c8f836452a47", size = 36813833, upload-time = "2025-06-22T16:20:43.925Z" },
+    { url = "https://files.pythonhosted.org/packages/28/f4/197580c3dac2d234e948806e164601c2df6f0078ed9f5ad4a62685b7c331/scipy-1.16.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:88a6ca658fb94640079e7a50b2ad3b67e33ef0f40e70bdb7dc22017dae73ac08", size = 28974431, upload-time = "2025-06-22T16:20:51.302Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/fc/e18b8550048d9224426e76906694c60028dbdb65d28b1372b5503914b89d/scipy-1.16.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:ae902626972f1bd7e4e86f58fd72322d7f4ec7b0cfc17b15d4b7006efc385176", size = 21246454, upload-time = "2025-06-22T16:20:57.276Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/48/07b97d167e0d6a324bfd7484cd0c209cc27338b67e5deadae578cf48e809/scipy-1.16.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:8cb824c1fc75ef29893bc32b3ddd7b11cf9ab13c1127fe26413a05953b8c32ed", size = 23772979, upload-time = "2025-06-22T16:21:03.363Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/4f/9efbd3f70baf9582edf271db3002b7882c875ddd37dc97f0f675ad68679f/scipy-1.16.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:de2db7250ff6514366a9709c2cba35cb6d08498e961cba20d7cff98a7ee88938", size = 33341972, upload-time = "2025-06-22T16:21:11.14Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/dc/9e496a3c5dbe24e76ee24525155ab7f659c20180bab058ef2c5fa7d9119c/scipy-1.16.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e85800274edf4db8dd2e4e93034f92d1b05c9421220e7ded9988b16976f849c1", size = 35185476, upload-time = "2025-06-22T16:21:19.156Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/b3/21001cff985a122ba434c33f2c9d7d1dc3b669827e94f4fc4e1fe8b9dfd8/scipy-1.16.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4f720300a3024c237ace1cb11f9a84c38beb19616ba7c4cdcd771047a10a1706", size = 35570990, upload-time = "2025-06-22T16:21:27.797Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d3/7ba42647d6709251cdf97043d0c107e0317e152fa2f76873b656b509ff55/scipy-1.16.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:aad603e9339ddb676409b104c48a027e9916ce0d2838830691f39552b38a352e", size = 37950262, upload-time = "2025-06-22T16:21:36.976Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/c4/231cac7a8385394ebbbb4f1ca662203e9d8c332825ab4f36ffc3ead09a42/scipy-1.16.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f56296fefca67ba605fd74d12f7bd23636267731a72cb3947963e76b8c0a25db", size = 38515076, upload-time = "2025-06-22T16:21:45.694Z" },
 ]
 
 [[package]]
@@ -4263,15 +4273,15 @@ wheels = [
 
 [[package]]
 name = "sentry-sdk"
-version = "2.29.1"
+version = "2.31.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/22/67/d552a5f8e5a6a56b2feea6529e2d8ccd54349084c84176d5a1f7295044bc/sentry_sdk-2.29.1.tar.gz", hash = "sha256:8d4a0206b95fa5fe85e5e7517ed662e3888374bdc342c00e435e10e6d831aa6d", size = 325518, upload-time = "2025-05-19T14:27:38.512Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/45/c7ef7e12d8434fda8b61cdab432d8af64fb832480c93cdaf4bdcab7f5597/sentry_sdk-2.31.0.tar.gz", hash = "sha256:fed6d847f15105849cdf5dfdc64dcec356f936d41abb8c9d66adae45e60959ec", size = 334167, upload-time = "2025-06-24T16:36:26.066Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f0/e5/da07b0bd832cefd52d16f2b9bbbe31624d57552602c06631686b93ccb1bd/sentry_sdk-2.29.1-py2.py3-none-any.whl", hash = "sha256:90862fe0616ded4572da6c9dadb363121a1ae49a49e21c418f0634e9d10b4c19", size = 341553, upload-time = "2025-05-19T14:27:36.882Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a2/9b6d8cc59f03251c583b3fec9d2f075dc09c0f6e030e0e0a3b223c6e64b2/sentry_sdk-2.31.0-py2.py3-none-any.whl", hash = "sha256:e953f5ab083e6599bab255b75d6829b33b3ddf9931a27ca00b4ab0081287e84f", size = 355638, upload-time = "2025-06-24T16:36:24.306Z" },
 ]
 
 [[package]]
@@ -4637,27 +4647,27 @@ wheels = [
 
 [[package]]
 name = "tokenizers"
-version = "0.21.1"
+version = "0.21.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/92/76/5ac0c97f1117b91b7eb7323dcd61af80d72f790b4df71249a7850c195f30/tokenizers-0.21.1.tar.gz", hash = "sha256:a1bb04dc5b448985f86ecd4b05407f5a8d97cb2c0532199b2a302a604a0165ab", size = 343256, upload-time = "2025-03-13T10:51:18.189Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ab/2d/b0fce2b8201635f60e8c95990080f58461cc9ca3d5026de2e900f38a7f21/tokenizers-0.21.2.tar.gz", hash = "sha256:fdc7cffde3e2113ba0e6cc7318c40e3438a4d74bbc62bf04bcc63bdfb082ac77", size = 351545, upload-time = "2025-06-24T10:24:52.449Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a5/1f/328aee25f9115bf04262e8b4e5a2050b7b7cf44b59c74e982db7270c7f30/tokenizers-0.21.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e78e413e9e668ad790a29456e677d9d3aa50a9ad311a40905d6861ba7692cf41", size = 2780767, upload-time = "2025-03-13T10:51:09.459Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/1a/4526797f3719b0287853f12c5ad563a9be09d446c44ac784cdd7c50f76ab/tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:cd51cd0a91ecc801633829fcd1fda9cf8682ed3477c6243b9a095539de4aecf3", size = 2650555, upload-time = "2025-03-13T10:51:07.692Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/7a/a209b29f971a9fdc1da86f917fe4524564924db50d13f0724feed37b2a4d/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28da6b72d4fb14ee200a1bd386ff74ade8992d7f725f2bde2c495a9a98cf4d9f", size = 2937541, upload-time = "2025-03-13T10:50:56.679Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/1e/b788b50ffc6191e0b1fc2b0d49df8cff16fe415302e5ceb89f619d12c5bc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:34d8cfde551c9916cb92014e040806122295a6800914bab5865deb85623931cf", size = 2819058, upload-time = "2025-03-13T10:50:59.525Z" },
-    { url = "https://files.pythonhosted.org/packages/36/aa/3626dfa09a0ecc5b57a8c58eeaeb7dd7ca9a37ad9dd681edab5acd55764c/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aaa852d23e125b73d283c98f007e06d4595732104b65402f46e8ef24b588d9f8", size = 3133278, upload-time = "2025-03-13T10:51:04.678Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/4d/8fbc203838b3d26269f944a89459d94c858f5b3f9a9b6ee9728cdcf69161/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a21a15d5c8e603331b8a59548bbe113564136dc0f5ad8306dd5033459a226da0", size = 3144253, upload-time = "2025-03-13T10:51:01.261Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/1b/2bd062adeb7c7511b847b32e356024980c0ffcf35f28947792c2d8ad2288/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2fdbd4c067c60a0ac7eca14b6bd18a5bebace54eb757c706b47ea93204f7a37c", size = 3398225, upload-time = "2025-03-13T10:51:03.243Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/63/38be071b0c8e06840bc6046991636bcb30c27f6bb1e670f4f4bc87cf49cc/tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dd9a0061e403546f7377df940e866c3e678d7d4e9643d0461ea442b4f89e61a", size = 3038874, upload-time = "2025-03-13T10:51:06.235Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/83/afa94193c09246417c23a3c75a8a0a96bf44ab5630a3015538d0c316dd4b/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:db9484aeb2e200c43b915a1a0150ea885e35f357a5a8fabf7373af333dcc8dbf", size = 9014448, upload-time = "2025-03-13T10:51:10.927Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/b3/0e1a37d4f84c0f014d43701c11eb8072704f6efe8d8fc2dcdb79c47d76de/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:ed248ab5279e601a30a4d67bdb897ecbe955a50f1e7bb62bd99f07dd11c2f5b6", size = 8937877, upload-time = "2025-03-13T10:51:12.688Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/33/ff08f50e6d615eb180a4a328c65907feb6ded0b8f990ec923969759dc379/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:9ac78b12e541d4ce67b4dfd970e44c060a2147b9b2a21f509566d556a509c67d", size = 9186645, upload-time = "2025-03-13T10:51:14.723Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/aa/8ae85f69a9f6012c6f8011c6f4aa1c96154c816e9eea2e1b758601157833/tokenizers-0.21.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e5a69c1a4496b81a5ee5d2c1f3f7fbdf95e90a0196101b0ee89ed9956b8a168f", size = 9384380, upload-time = "2025-03-13T10:51:16.526Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/5b/a5d98c89f747455e8b7a9504910c865d5e51da55e825a7ae641fb5ff0a58/tokenizers-0.21.1-cp39-abi3-win32.whl", hash = "sha256:1039a3a5734944e09de1d48761ade94e00d0fa760c0e0551151d4dd851ba63e3", size = 2239506, upload-time = "2025-03-13T10:51:20.643Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/b6/072a8e053ae600dcc2ac0da81a23548e3b523301a442a6ca900e92ac35be/tokenizers-0.21.1-cp39-abi3-win_amd64.whl", hash = "sha256:0f0dcbcc9f6e13e675a66d7a5f2f225a736745ce484c1a4e07476a89ccdad382", size = 2435481, upload-time = "2025-03-13T10:51:19.243Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/cc/2936e2d45ceb130a21d929743f1e9897514691bec123203e10837972296f/tokenizers-0.21.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:342b5dfb75009f2255ab8dec0041287260fed5ce00c323eb6bab639066fef8ec", size = 2875206, upload-time = "2025-06-24T10:24:42.755Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/e6/33f41f2cc7861faeba8988e7a77601407bf1d9d28fc79c5903f8f77df587/tokenizers-0.21.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:126df3205d6f3a93fea80c7a8a266a78c1bd8dd2fe043386bafdd7736a23e45f", size = 2732655, upload-time = "2025-06-24T10:24:41.56Z" },
+    { url = "https://files.pythonhosted.org/packages/33/2b/1791eb329c07122a75b01035b1a3aa22ad139f3ce0ece1b059b506d9d9de/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a32cd81be21168bd0d6a0f0962d60177c447a1aa1b1e48fa6ec9fc728ee0b12", size = 3019202, upload-time = "2025-06-24T10:24:31.791Z" },
+    { url = "https://files.pythonhosted.org/packages/05/15/fd2d8104faa9f86ac68748e6f7ece0b5eb7983c7efc3a2c197cb98c99030/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8bd8999538c405133c2ab999b83b17c08b7fc1b48c1ada2469964605a709ef91", size = 2934539, upload-time = "2025-06-24T10:24:34.567Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/2e/53e8fd053e1f3ffbe579ca5f9546f35ac67cf0039ed357ad7ec57f5f5af0/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e9944e61239b083a41cf8fc42802f855e1dca0f499196df37a8ce219abac6eb", size = 3248665, upload-time = "2025-06-24T10:24:39.024Z" },
+    { url = "https://files.pythonhosted.org/packages/00/15/79713359f4037aa8f4d1f06ffca35312ac83629da062670e8830917e2153/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:514cd43045c5d546f01142ff9c79a96ea69e4b5cda09e3027708cb2e6d5762ab", size = 3451305, upload-time = "2025-06-24T10:24:36.133Z" },
+    { url = "https://files.pythonhosted.org/packages/38/5f/959f3a8756fc9396aeb704292777b84f02a5c6f25c3fc3ba7530db5feb2c/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1b9405822527ec1e0f7d8d2fdb287a5730c3a6518189c968254a8441b21faae", size = 3214757, upload-time = "2025-06-24T10:24:37.784Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/74/f41a432a0733f61f3d21b288de6dfa78f7acff309c6f0f323b2833e9189f/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fed9a4d51c395103ad24f8e7eb976811c57fbec2af9f133df471afcd922e5020", size = 3121887, upload-time = "2025-06-24T10:24:40.293Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/6a/bc220a11a17e5d07b0dfb3b5c628621d4dcc084bccd27cfaead659963016/tokenizers-0.21.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2c41862df3d873665ec78b6be36fcc30a26e3d4902e9dd8608ed61d49a48bc19", size = 9091965, upload-time = "2025-06-24T10:24:44.431Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/bd/ac386d79c4ef20dc6f39c4706640c24823dca7ebb6f703bfe6b5f0292d88/tokenizers-0.21.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:ed21dc7e624e4220e21758b2e62893be7101453525e3d23264081c9ef9a6d00d", size = 9053372, upload-time = "2025-06-24T10:24:46.455Z" },
+    { url = "https://files.pythonhosted.org/packages/63/7b/5440bf203b2a5358f074408f7f9c42884849cd9972879e10ee6b7a8c3b3d/tokenizers-0.21.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:0e73770507e65a0e0e2a1affd6b03c36e3bc4377bd10c9ccf51a82c77c0fe365", size = 9298632, upload-time = "2025-06-24T10:24:48.446Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/d2/faa1acac3f96a7427866e94ed4289949b2524f0c1878512516567d80563c/tokenizers-0.21.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:106746e8aa9014a12109e58d540ad5465b4c183768ea96c03cbc24c44d329958", size = 9470074, upload-time = "2025-06-24T10:24:50.378Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/a5/896e1ef0707212745ae9f37e84c7d50269411aef2e9ccd0de63623feecdf/tokenizers-0.21.2-cp39-abi3-win32.whl", hash = "sha256:cabda5a6d15d620b6dfe711e1af52205266d05b379ea85a8a301b3593c60e962", size = 2330115, upload-time = "2025-06-24T10:24:55.069Z" },
+    { url = "https://files.pythonhosted.org/packages/13/c3/cc2755ee10be859c4338c962a35b9a663788c0c0b50c0bdd8078fb6870cf/tokenizers-0.21.2-cp39-abi3-win_amd64.whl", hash = "sha256:58747bb898acdb1007f37a7bbe614346e98dc28708ffb66a3fd50ce169ac6c98", size = 2509918, upload-time = "2025-06-24T10:24:53.71Z" },
 ]
 
 [[package]]
@@ -4746,7 +4756,7 @@ wheels = [
 
 [[package]]
 name = "torchmetrics"
-version = "1.7.2"
+version = "1.7.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "lightning-utilities" },
@@ -4754,9 +4764,9 @@ dependencies = [
     { name = "packaging" },
     { name = "torch" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b7/ec/f5a4f94c77a1b4c0a37e5c5c8b666a33bc074130258a6b655346bec560c2/torchmetrics-1.7.2.tar.gz", hash = "sha256:ba401cd01aeaa268e809c0e4f42ef8f95669bf9b485e1d93d54dc765e012338a", size = 566185, upload-time = "2025-05-28T20:26:29.543Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/22/8b16c4ec34d93ee15024924cbbe84fbd235bb3e1df2cc8f48c865c1528e7/torchmetrics-1.7.3.tar.gz", hash = "sha256:08450a19cdb67ba1608aac0b213e5dc73033e11b60ad4719696ebcede591621e", size = 566545, upload-time = "2025-06-13T15:39:37.498Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/56/89/b5fd7eb99b27457d71d3b7d9eca0b884fa5992abca7672aab1177c5f22d8/torchmetrics-1.7.2-py3-none-any.whl", hash = "sha256:9cc3bff07a715fcb37fb04d2a1a5ae36267c36066c097578020056653a94f2a8", size = 962510, upload-time = "2025-05-28T20:26:27.385Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/f2/bed7da46003c26ed44fc7fa3ecc98a84216f0d4758e5e6a3693754d490d9/torchmetrics-1.7.3-py3-none-any.whl", hash = "sha256:7b6fd43e92f0a1071c8bcb029637f252b0630699140a93ed8817ce7afe9db34e", size = 962639, upload-time = "2025-06-13T15:39:35.69Z" },
 ]
 
 [[package]]
@@ -5169,38 +5179,69 @@ wheels = [
 
 [[package]]
 name = "watchfiles"
-version = "1.0.5"
+version = "1.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/03/e2/8ed598c42057de7aa5d97c472254af4906ff0a59a66699d426fc9ef795d7/watchfiles-1.0.5.tar.gz", hash = "sha256:b7529b5dcc114679d43827d8c35a07c493ad6f083633d573d81c660abc5979e9", size = 94537, upload-time = "2025-04-08T10:36:26.722Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2a/8c/4f0b9bdb75a1bfbd9c78fad7d8854369283f74fe7cf03eb16be77054536d/watchfiles-1.0.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5eb568c2aa6018e26da9e6c86f3ec3fd958cee7f0311b35c2630fa4217d17f2", size = 401511, upload-time = "2025-04-08T10:35:17.956Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/4e/7e15825def77f8bd359b6d3f379f0c9dac4eb09dd4ddd58fd7d14127179c/watchfiles-1.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0a04059f4923ce4e856b4b4e5e783a70f49d9663d22a4c3b3298165996d1377f", size = 392715, upload-time = "2025-04-08T10:35:19.202Z" },
-    { url = "https://files.pythonhosted.org/packages/58/65/b72fb817518728e08de5840d5d38571466c1b4a3f724d190cec909ee6f3f/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e380c89983ce6e6fe2dd1e1921b9952fb4e6da882931abd1824c092ed495dec", size = 454138, upload-time = "2025-04-08T10:35:20.586Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/a4/86833fd2ea2e50ae28989f5950b5c3f91022d67092bfec08f8300d8b347b/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fe43139b2c0fdc4a14d4f8d5b5d967f7a2777fd3d38ecf5b1ec669b0d7e43c21", size = 458592, upload-time = "2025-04-08T10:35:21.87Z" },
-    { url = "https://files.pythonhosted.org/packages/38/7e/42cb8df8be9a37e50dd3a818816501cf7a20d635d76d6bd65aae3dbbff68/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee0822ce1b8a14fe5a066f93edd20aada932acfe348bede8aa2149f1a4489512", size = 487532, upload-time = "2025-04-08T10:35:23.143Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/fd/13d26721c85d7f3df6169d8b495fcac8ab0dc8f0945ebea8845de4681dab/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a0dbcb1c2d8f2ab6e0a81c6699b236932bd264d4cef1ac475858d16c403de74d", size = 522865, upload-time = "2025-04-08T10:35:24.702Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/0d/7f9ae243c04e96c5455d111e21b09087d0eeaf9a1369e13a01c7d3d82478/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a2014a2b18ad3ca53b1f6c23f8cd94a18ce930c1837bd891262c182640eb40a6", size = 499887, upload-time = "2025-04-08T10:35:25.969Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/0f/a257766998e26aca4b3acf2ae97dff04b57071e991a510857d3799247c67/watchfiles-1.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10f6ae86d5cb647bf58f9f655fcf577f713915a5d69057a0371bc257e2553234", size = 454498, upload-time = "2025-04-08T10:35:27.353Z" },
-    { url = "https://files.pythonhosted.org/packages/81/79/8bf142575a03e0af9c3d5f8bcae911ee6683ae93a625d349d4ecf4c8f7df/watchfiles-1.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1a7bac2bde1d661fb31f4d4e8e539e178774b76db3c2c17c4bb3e960a5de07a2", size = 630663, upload-time = "2025-04-08T10:35:28.685Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/80/abe2e79f610e45c63a70d271caea90c49bbf93eb00fa947fa9b803a1d51f/watchfiles-1.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ab626da2fc1ac277bbf752446470b367f84b50295264d2d313e28dc4405d663", size = 625410, upload-time = "2025-04-08T10:35:30.42Z" },
-    { url = "https://files.pythonhosted.org/packages/91/6f/bc7fbecb84a41a9069c2c6eb6319f7f7df113adf113e358c57fc1aff7ff5/watchfiles-1.0.5-cp312-cp312-win32.whl", hash = "sha256:9f4571a783914feda92018ef3901dab8caf5b029325b5fe4558c074582815249", size = 277965, upload-time = "2025-04-08T10:35:32.023Z" },
-    { url = "https://files.pythonhosted.org/packages/99/a5/bf1c297ea6649ec59e935ab311f63d8af5faa8f0b86993e3282b984263e3/watchfiles-1.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:360a398c3a19672cf93527f7e8d8b60d8275119c5d900f2e184d32483117a705", size = 291693, upload-time = "2025-04-08T10:35:33.225Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/7b/fd01087cc21db5c47e5beae507b87965db341cce8a86f9eb12bf5219d4e0/watchfiles-1.0.5-cp312-cp312-win_arm64.whl", hash = "sha256:1a2902ede862969077b97523987c38db28abbe09fb19866e711485d9fbf0d417", size = 283287, upload-time = "2025-04-08T10:35:34.568Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/62/435766874b704f39b2fecd8395a29042db2b5ec4005bd34523415e9bd2e0/watchfiles-1.0.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0b289572c33a0deae62daa57e44a25b99b783e5f7aed81b314232b3d3c81a11d", size = 401531, upload-time = "2025-04-08T10:35:35.792Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/a6/e52a02c05411b9cb02823e6797ef9bbba0bfaf1bb627da1634d44d8af833/watchfiles-1.0.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a056c2f692d65bf1e99c41045e3bdcaea3cb9e6b5a53dcaf60a5f3bd95fc9763", size = 392417, upload-time = "2025-04-08T10:35:37.048Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/53/c4af6819770455932144e0109d4854437769672d7ad897e76e8e1673435d/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9dca99744991fc9850d18015c4f0438865414e50069670f5f7eee08340d8b40", size = 453423, upload-time = "2025-04-08T10:35:38.357Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/d1/8e88df58bbbf819b8bc5cfbacd3c79e01b40261cad0fc84d1e1ebd778a07/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:894342d61d355446d02cd3988a7326af344143eb33a2fd5d38482a92072d9563", size = 458185, upload-time = "2025-04-08T10:35:39.708Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/70/fffaa11962dd5429e47e478a18736d4e42bec42404f5ee3b92ef1b87ad60/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ab44e1580924d1ffd7b3938e02716d5ad190441965138b4aa1d1f31ea0877f04", size = 486696, upload-time = "2025-04-08T10:35:41.469Z" },
-    { url = "https://files.pythonhosted.org/packages/39/db/723c0328e8b3692d53eb273797d9a08be6ffb1d16f1c0ba2bdbdc2a3852c/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d6f9367b132078b2ceb8d066ff6c93a970a18c3029cea37bfd7b2d3dd2e5db8f", size = 522327, upload-time = "2025-04-08T10:35:43.289Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/05/9fccc43c50c39a76b68343484b9da7b12d42d0859c37c61aec018c967a32/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2e55a9b162e06e3f862fb61e399fe9f05d908d019d87bf5b496a04ef18a970a", size = 499741, upload-time = "2025-04-08T10:35:44.574Z" },
-    { url = "https://files.pythonhosted.org/packages/23/14/499e90c37fa518976782b10a18b18db9f55ea73ca14641615056f8194bb3/watchfiles-1.0.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0125f91f70e0732a9f8ee01e49515c35d38ba48db507a50c5bdcad9503af5827", size = 453995, upload-time = "2025-04-08T10:35:46.336Z" },
-    { url = "https://files.pythonhosted.org/packages/61/d9/f75d6840059320df5adecd2c687fbc18960a7f97b55c300d20f207d48aef/watchfiles-1.0.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:13bb21f8ba3248386337c9fa51c528868e6c34a707f729ab041c846d52a0c69a", size = 629693, upload-time = "2025-04-08T10:35:48.161Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/17/180ca383f5061b61406477218c55d66ec118e6c0c51f02d8142895fcf0a9/watchfiles-1.0.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:839ebd0df4a18c5b3c1b890145b5a3f5f64063c2a0d02b13c76d78fe5de34936", size = 624677, upload-time = "2025-04-08T10:35:49.65Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/15/714d6ef307f803f236d69ee9d421763707899d6298d9f3183e55e366d9af/watchfiles-1.0.5-cp313-cp313-win32.whl", hash = "sha256:4a8ec1e4e16e2d5bafc9ba82f7aaecfeec990ca7cd27e84fb6f191804ed2fcfc", size = 277804, upload-time = "2025-04-08T10:35:51.093Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/b4/c57b99518fadf431f3ef47a610839e46e5f8abf9814f969859d1c65c02c7/watchfiles-1.0.5-cp313-cp313-win_amd64.whl", hash = "sha256:f436601594f15bf406518af922a89dcaab416568edb6f65c4e5bbbad1ea45c11", size = 291087, upload-time = "2025-04-08T10:35:52.458Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/2a/9a/d451fcc97d029f5812e898fd30a53fd8c15c7bbd058fd75cfc6beb9bd761/watchfiles-1.1.0.tar.gz", hash = "sha256:693ed7ec72cbfcee399e92c895362b6e66d63dac6b91e2c11ae03d10d503e575", size = 94406, upload-time = "2025-06-15T19:06:59.42Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/b8/858957045a38a4079203a33aaa7d23ea9269ca7761c8a074af3524fbb240/watchfiles-1.1.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9dc001c3e10de4725c749d4c2f2bdc6ae24de5a88a339c4bce32300a31ede179", size = 402339, upload-time = "2025-06-15T19:05:24.516Z" },
+    { url = "https://files.pythonhosted.org/packages/80/28/98b222cca751ba68e88521fabd79a4fab64005fc5976ea49b53fa205d1fa/watchfiles-1.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9ba68ec283153dead62cbe81872d28e053745f12335d037de9cbd14bd1877f5", size = 394409, upload-time = "2025-06-15T19:05:25.469Z" },
+    { url = "https://files.pythonhosted.org/packages/86/50/dee79968566c03190677c26f7f47960aff738d32087087bdf63a5473e7df/watchfiles-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130fc497b8ee68dce163e4254d9b0356411d1490e868bd8790028bc46c5cc297", size = 450939, upload-time = "2025-06-15T19:05:26.494Z" },
+    { url = "https://files.pythonhosted.org/packages/40/45/a7b56fb129700f3cfe2594a01aa38d033b92a33dddce86c8dfdfc1247b72/watchfiles-1.1.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:50a51a90610d0845a5931a780d8e51d7bd7f309ebc25132ba975aca016b576a0", size = 457270, upload-time = "2025-06-15T19:05:27.466Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/c8/fa5ef9476b1d02dc6b5e258f515fcaaecf559037edf8b6feffcbc097c4b8/watchfiles-1.1.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc44678a72ac0910bac46fa6a0de6af9ba1355669b3dfaf1ce5f05ca7a74364e", size = 483370, upload-time = "2025-06-15T19:05:28.548Z" },
+    { url = "https://files.pythonhosted.org/packages/98/68/42cfcdd6533ec94f0a7aab83f759ec11280f70b11bfba0b0f885e298f9bd/watchfiles-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a543492513a93b001975ae283a51f4b67973662a375a403ae82f420d2c7205ee", size = 598654, upload-time = "2025-06-15T19:05:29.997Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/74/b2a1544224118cc28df7e59008a929e711f9c68ce7d554e171b2dc531352/watchfiles-1.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ac164e20d17cc285f2b94dc31c384bc3aa3dd5e7490473b3db043dd70fbccfd", size = 478667, upload-time = "2025-06-15T19:05:31.172Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/77/e3362fe308358dc9f8588102481e599c83e1b91c2ae843780a7ded939a35/watchfiles-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7590d5a455321e53857892ab8879dce62d1f4b04748769f5adf2e707afb9d4f", size = 452213, upload-time = "2025-06-15T19:05:32.299Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/17/c8f1a36540c9a1558d4faf08e909399e8133599fa359bf52ec8fcee5be6f/watchfiles-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:37d3d3f7defb13f62ece99e9be912afe9dd8a0077b7c45ee5a57c74811d581a4", size = 626718, upload-time = "2025-06-15T19:05:33.415Z" },
+    { url = "https://files.pythonhosted.org/packages/26/45/fb599be38b4bd38032643783d7496a26a6f9ae05dea1a42e58229a20ac13/watchfiles-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:7080c4bb3efd70a07b1cc2df99a7aa51d98685be56be6038c3169199d0a1c69f", size = 623098, upload-time = "2025-06-15T19:05:34.534Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/e7/fdf40e038475498e160cd167333c946e45d8563ae4dd65caf757e9ffe6b4/watchfiles-1.1.0-cp312-cp312-win32.whl", hash = "sha256:cbcf8630ef4afb05dc30107bfa17f16c0896bb30ee48fc24bf64c1f970f3b1fd", size = 279209, upload-time = "2025-06-15T19:05:35.577Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/d3/3ae9d5124ec75143bdf088d436cba39812122edc47709cd2caafeac3266f/watchfiles-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:cbd949bdd87567b0ad183d7676feb98136cde5bb9025403794a4c0db28ed3a47", size = 292786, upload-time = "2025-06-15T19:05:36.559Z" },
+    { url = "https://files.pythonhosted.org/packages/26/2f/7dd4fc8b5f2b34b545e19629b4a018bfb1de23b3a496766a2c1165ca890d/watchfiles-1.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:0a7d40b77f07be87c6faa93d0951a0fcd8cbca1ddff60a1b65d741bac6f3a9f6", size = 284343, upload-time = "2025-06-15T19:05:37.5Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/42/fae874df96595556a9089ade83be34a2e04f0f11eb53a8dbf8a8a5e562b4/watchfiles-1.1.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5007f860c7f1f8df471e4e04aaa8c43673429047d63205d1630880f7637bca30", size = 402004, upload-time = "2025-06-15T19:05:38.499Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/55/a77e533e59c3003d9803c09c44c3651224067cbe7fb5d574ddbaa31e11ca/watchfiles-1.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:20ecc8abbd957046f1fe9562757903f5eaf57c3bce70929fda6c7711bb58074a", size = 393671, upload-time = "2025-06-15T19:05:39.52Z" },
+    { url = "https://files.pythonhosted.org/packages/05/68/b0afb3f79c8e832e6571022611adbdc36e35a44e14f129ba09709aa4bb7a/watchfiles-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2f0498b7d2a3c072766dba3274fe22a183dbea1f99d188f1c6c72209a1063dc", size = 449772, upload-time = "2025-06-15T19:05:40.897Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/05/46dd1f6879bc40e1e74c6c39a1b9ab9e790bf1f5a2fe6c08b463d9a807f4/watchfiles-1.1.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:239736577e848678e13b201bba14e89718f5c2133dfd6b1f7846fa1b58a8532b", size = 456789, upload-time = "2025-06-15T19:05:42.045Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/ca/0eeb2c06227ca7f12e50a47a3679df0cd1ba487ea19cf844a905920f8e95/watchfiles-1.1.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eff4b8d89f444f7e49136dc695599a591ff769300734446c0a86cba2eb2f9895", size = 482551, upload-time = "2025-06-15T19:05:43.781Z" },
+    { url = "https://files.pythonhosted.org/packages/31/47/2cecbd8694095647406645f822781008cc524320466ea393f55fe70eed3b/watchfiles-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12b0a02a91762c08f7264e2e79542f76870c3040bbc847fb67410ab81474932a", size = 597420, upload-time = "2025-06-15T19:05:45.244Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/7e/82abc4240e0806846548559d70f0b1a6dfdca75c1b4f9fa62b504ae9b083/watchfiles-1.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:29e7bc2eee15cbb339c68445959108803dc14ee0c7b4eea556400131a8de462b", size = 477950, upload-time = "2025-06-15T19:05:46.332Z" },
+    { url = "https://files.pythonhosted.org/packages/25/0d/4d564798a49bf5482a4fa9416dea6b6c0733a3b5700cb8a5a503c4b15853/watchfiles-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9481174d3ed982e269c090f780122fb59cee6c3796f74efe74e70f7780ed94c", size = 451706, upload-time = "2025-06-15T19:05:47.459Z" },
+    { url = "https://files.pythonhosted.org/packages/81/b5/5516cf46b033192d544102ea07c65b6f770f10ed1d0a6d388f5d3874f6e4/watchfiles-1.1.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:80f811146831c8c86ab17b640801c25dc0a88c630e855e2bef3568f30434d52b", size = 625814, upload-time = "2025-06-15T19:05:48.654Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/dd/7c1331f902f30669ac3e754680b6edb9a0dd06dea5438e61128111fadd2c/watchfiles-1.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:60022527e71d1d1fda67a33150ee42869042bce3d0fcc9cc49be009a9cded3fb", size = 622820, upload-time = "2025-06-15T19:05:50.088Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/14/36d7a8e27cd128d7b1009e7715a7c02f6c131be9d4ce1e5c3b73d0e342d8/watchfiles-1.1.0-cp313-cp313-win32.whl", hash = "sha256:32d6d4e583593cb8576e129879ea0991660b935177c0f93c6681359b3654bfa9", size = 279194, upload-time = "2025-06-15T19:05:51.186Z" },
+    { url = "https://files.pythonhosted.org/packages/25/41/2dd88054b849aa546dbeef5696019c58f8e0774f4d1c42123273304cdb2e/watchfiles-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:f21af781a4a6fbad54f03c598ab620e3a77032c5878f3d780448421a6e1818c7", size = 292349, upload-time = "2025-06-15T19:05:52.201Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/cf/421d659de88285eb13941cf11a81f875c176f76a6d99342599be88e08d03/watchfiles-1.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:5366164391873ed76bfdf618818c82084c9db7fac82b64a20c44d335eec9ced5", size = 283836, upload-time = "2025-06-15T19:05:53.265Z" },
+    { url = "https://files.pythonhosted.org/packages/45/10/6faf6858d527e3599cc50ec9fcae73590fbddc1420bd4fdccfebffeedbc6/watchfiles-1.1.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:17ab167cca6339c2b830b744eaf10803d2a5b6683be4d79d8475d88b4a8a4be1", size = 400343, upload-time = "2025-06-15T19:05:54.252Z" },
+    { url = "https://files.pythonhosted.org/packages/03/20/5cb7d3966f5e8c718006d0e97dfe379a82f16fecd3caa7810f634412047a/watchfiles-1.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:328dbc9bff7205c215a7807da7c18dce37da7da718e798356212d22696404339", size = 392916, upload-time = "2025-06-15T19:05:55.264Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/07/d8f1176328fa9e9581b6f120b017e286d2a2d22ae3f554efd9515c8e1b49/watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7208ab6e009c627b7557ce55c465c98967e8caa8b11833531fdf95799372633", size = 449582, upload-time = "2025-06-15T19:05:56.317Z" },
+    { url = "https://files.pythonhosted.org/packages/66/e8/80a14a453cf6038e81d072a86c05276692a1826471fef91df7537dba8b46/watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a8f6f72974a19efead54195bc9bed4d850fc047bb7aa971268fd9a8387c89011", size = 456752, upload-time = "2025-06-15T19:05:57.359Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/25/0853b3fe0e3c2f5af9ea60eb2e781eade939760239a72c2d38fc4cc335f6/watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d181ef50923c29cf0450c3cd47e2f0557b62218c50b2ab8ce2ecaa02bd97e670", size = 481436, upload-time = "2025-06-15T19:05:58.447Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/9e/4af0056c258b861fbb29dcb36258de1e2b857be4a9509e6298abcf31e5c9/watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adb4167043d3a78280d5d05ce0ba22055c266cf8655ce942f2fb881262ff3cdf", size = 596016, upload-time = "2025-06-15T19:05:59.59Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/fa/95d604b58aa375e781daf350897aaaa089cff59d84147e9ccff2447c8294/watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8c5701dc474b041e2934a26d31d39f90fac8a3dee2322b39f7729867f932b1d4", size = 476727, upload-time = "2025-06-15T19:06:01.086Z" },
+    { url = "https://files.pythonhosted.org/packages/65/95/fe479b2664f19be4cf5ceeb21be05afd491d95f142e72d26a42f41b7c4f8/watchfiles-1.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b067915e3c3936966a8607f6fe5487df0c9c4afb85226613b520890049deea20", size = 451864, upload-time = "2025-06-15T19:06:02.144Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/8a/3c4af14b93a15ce55901cd7a92e1a4701910f1768c78fb30f61d2b79785b/watchfiles-1.1.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:9c733cda03b6d636b4219625a4acb5c6ffb10803338e437fb614fef9516825ef", size = 625626, upload-time = "2025-06-15T19:06:03.578Z" },
+    { url = "https://files.pythonhosted.org/packages/da/f5/cf6aa047d4d9e128f4b7cde615236a915673775ef171ff85971d698f3c2c/watchfiles-1.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:cc08ef8b90d78bfac66f0def80240b0197008e4852c9f285907377b2947ffdcb", size = 622744, upload-time = "2025-06-15T19:06:05.066Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/00/70f75c47f05dea6fd30df90f047765f6fc2d6eb8b5a3921379b0b04defa2/watchfiles-1.1.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:9974d2f7dc561cce3bb88dfa8eb309dab64c729de85fba32e98d75cf24b66297", size = 402114, upload-time = "2025-06-15T19:06:06.186Z" },
+    { url = "https://files.pythonhosted.org/packages/53/03/acd69c48db4a1ed1de26b349d94077cca2238ff98fd64393f3e97484cae6/watchfiles-1.1.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c68e9f1fcb4d43798ad8814c4c1b61547b014b667216cb754e606bfade587018", size = 393879, upload-time = "2025-06-15T19:06:07.369Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/c8/a9a2a6f9c8baa4eceae5887fecd421e1b7ce86802bcfc8b6a942e2add834/watchfiles-1.1.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95ab1594377effac17110e1352989bdd7bdfca9ff0e5eeccd8c69c5389b826d0", size = 450026, upload-time = "2025-06-15T19:06:08.476Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/51/d572260d98388e6e2b967425c985e07d47ee6f62e6455cefb46a6e06eda5/watchfiles-1.1.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fba9b62da882c1be1280a7584ec4515d0a6006a94d6e5819730ec2eab60ffe12", size = 457917, upload-time = "2025-06-15T19:06:09.988Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/2d/4258e52917bf9f12909b6ec314ff9636276f3542f9d3807d143f27309104/watchfiles-1.1.0-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3434e401f3ce0ed6b42569128b3d1e3af773d7ec18751b918b89cd49c14eaafb", size = 483602, upload-time = "2025-06-15T19:06:11.088Z" },
+    { url = "https://files.pythonhosted.org/packages/84/99/bee17a5f341a4345fe7b7972a475809af9e528deba056f8963d61ea49f75/watchfiles-1.1.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa257a4d0d21fcbca5b5fcba9dca5a78011cb93c0323fb8855c6d2dfbc76eb77", size = 596758, upload-time = "2025-06-15T19:06:12.197Z" },
+    { url = "https://files.pythonhosted.org/packages/40/76/e4bec1d59b25b89d2b0716b41b461ed655a9a53c60dc78ad5771fda5b3e6/watchfiles-1.1.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7fd1b3879a578a8ec2076c7961076df540b9af317123f84569f5a9ddee64ce92", size = 477601, upload-time = "2025-06-15T19:06:13.391Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/fa/a514292956f4a9ce3c567ec0c13cce427c158e9f272062685a8a727d08fc/watchfiles-1.1.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62cc7a30eeb0e20ecc5f4bd113cd69dcdb745a07c68c0370cea919f373f65d9e", size = 451936, upload-time = "2025-06-15T19:06:14.656Z" },
+    { url = "https://files.pythonhosted.org/packages/32/5d/c3bf927ec3bbeb4566984eba8dd7a8eb69569400f5509904545576741f88/watchfiles-1.1.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:891c69e027748b4a73847335d208e374ce54ca3c335907d381fde4e41661b13b", size = 626243, upload-time = "2025-06-15T19:06:16.232Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/65/6e12c042f1a68c556802a84d54bb06d35577c81e29fba14019562479159c/watchfiles-1.1.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:12fe8eaffaf0faa7906895b4f8bb88264035b3f0243275e0bf24af0436b27259", size = 623073, upload-time = "2025-06-15T19:06:17.457Z" },
+    { url = "https://files.pythonhosted.org/packages/89/ab/7f79d9bf57329e7cbb0a6fd4c7bd7d0cee1e4a8ef0041459f5409da3506c/watchfiles-1.1.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:bfe3c517c283e484843cb2e357dd57ba009cff351edf45fb455b5fbd1f45b15f", size = 400872, upload-time = "2025-06-15T19:06:18.57Z" },
+    { url = "https://files.pythonhosted.org/packages/df/d5/3f7bf9912798e9e6c516094db6b8932df53b223660c781ee37607030b6d3/watchfiles-1.1.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a9ccbf1f129480ed3044f540c0fdbc4ee556f7175e5ab40fe077ff6baf286d4e", size = 392877, upload-time = "2025-06-15T19:06:19.55Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/c5/54ec7601a2798604e01c75294770dbee8150e81c6e471445d7601610b495/watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba0e3255b0396cac3cc7bbace76404dd72b5438bf0d8e7cefa2f79a7f3649caa", size = 449645, upload-time = "2025-06-15T19:06:20.66Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/04/c2f44afc3b2fce21ca0b7802cbd37ed90a29874f96069ed30a36dfe57c2b/watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4281cd9fce9fc0a9dbf0fc1217f39bf9cf2b4d315d9626ef1d4e87b84699e7e8", size = 457424, upload-time = "2025-06-15T19:06:21.712Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/b0/eec32cb6c14d248095261a04f290636da3df3119d4040ef91a4a50b29fa5/watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6d2404af8db1329f9a3c9b79ff63e0ae7131986446901582067d9304ae8aaf7f", size = 481584, upload-time = "2025-06-15T19:06:22.777Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/e2/ca4bb71c68a937d7145aa25709e4f5d68eb7698a25ce266e84b55d591bbd/watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e78b6ed8165996013165eeabd875c5dfc19d41b54f94b40e9fff0eb3193e5e8e", size = 596675, upload-time = "2025-06-15T19:06:24.226Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/dd/b0e4b7fb5acf783816bc950180a6cd7c6c1d2cf7e9372c0ea634e722712b/watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:249590eb75ccc117f488e2fabd1bfa33c580e24b96f00658ad88e38844a040bb", size = 477363, upload-time = "2025-06-15T19:06:25.42Z" },
+    { url = "https://files.pythonhosted.org/packages/69/c4/088825b75489cb5b6a761a4542645718893d395d8c530b38734f19da44d2/watchfiles-1.1.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d05686b5487cfa2e2c28ff1aa370ea3e6c5accfe6435944ddea1e10d93872147", size = 452240, upload-time = "2025-06-15T19:06:26.552Z" },
+    { url = "https://files.pythonhosted.org/packages/10/8c/22b074814970eeef43b7c44df98c3e9667c1f7bf5b83e0ff0201b0bd43f9/watchfiles-1.1.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:d0e10e6f8f6dc5762adee7dece33b722282e1f59aa6a55da5d493a97282fedd8", size = 625607, upload-time = "2025-06-15T19:06:27.606Z" },
+    { url = "https://files.pythonhosted.org/packages/32/fa/a4f5c2046385492b2273213ef815bf71a0d4c1943b784fb904e184e30201/watchfiles-1.1.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:af06c863f152005c7592df1d6a7009c836a247c9d8adb78fef8575a5a98699db", size = 623315, upload-time = "2025-06-15T19:06:29.076Z" },
 ]
 
 [[package]]
@@ -5214,16 +5255,16 @@ wheels = [
 
 [[package]]
 name = "webdataset"
-version = "0.2.111"
+version = "1.0.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "braceexpand" },
     { name = "numpy" },
     { name = "pyyaml" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/dc/13/27b4a05a01bcf96e451f624d36d3637101e92b25970295546f7d949b38e9/webdataset-0.2.111.tar.gz", hash = "sha256:5b2835386a25601307a9ded9bcc0dbd1e81a9eee017784152528e77dd8619511", size = 79970, upload-time = "2025-02-12T20:12:15.577Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5a/3a/68800d92e065cf4750ebecf973b13979c0c929b439e1293012938862038d/webdataset-1.0.2.tar.gz", hash = "sha256:7f0498be827cfa46cc5430a58768a24e2c6a410676a61be1838f53d61afdaab4", size = 80090, upload-time = "2025-06-19T23:26:21.945Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6e/e1/c1140ab6533668930895512ac5cbf07972fa41ebab275f5f5cdd432bc3c7/webdataset-0.2.111-py3-none-any.whl", hash = "sha256:57a70eb5d7029303ce2262d900ee3f16443bb5e9cf25f634775ce972859bcee4", size = 85514, upload-time = "2025-02-12T20:12:12.926Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/00/aca6beb3658dab4ed3dbb41a78e6e7f31342e0b41d28088f205525751601/webdataset-1.0.2-py3-none-any.whl", hash = "sha256:3dbfced32b25c0d199c6b9787937b6f85742bc3c84f652c846893075c1c082d9", size = 74956, upload-time = "2025-06-19T23:26:20.354Z" },
 ]
 
 [[package]]
@@ -5322,8 +5363,8 @@ name = "xformers"
 version = "0.0.30"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
-    { name = "torch", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "numpy", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
+    { name = "torch", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/bf/f7/dd2269cce89fd1221947dd7cc3a60707ffe721ef55c1803ac3b1a1f7ae5c/xformers-0.0.30.tar.gz", hash = "sha256:a12bf3eb39e294cdbe8a7253ac9b665f41bac61d6d98df174e34ef7bdb6f2fc4", size = 10214139, upload-time = "2025-04-28T20:51:02.045Z" }
 wheels = [
@@ -5335,12 +5376,12 @@ name = "xgrammar"
 version = "0.1.19"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "ninja" },
-    { name = "pydantic" },
-    { name = "sentencepiece" },
-    { name = "tiktoken" },
-    { name = "torch" },
-    { name = "transformers" },
+    { name = "ninja", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
+    { name = "pydantic", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
+    { name = "sentencepiece", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
+    { name = "tiktoken", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
+    { name = "torch", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
+    { name = "transformers", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
     { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b5/55/73e1e4f918ade656c4fa7f3a5fcfb3d521a429fe305d2cb8ca58bfb201d4/xgrammar-0.1.19.tar.gz", hash = "sha256:75bf3e814283b1cbaee9252234c5d4081f0058d29b26d8984f1cdf031c99b775", size = 1714056, upload-time = "2025-05-08T07:13:46.05Z" }

From bb614d493b9ac60c7e3901ab71faa94491943bba Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Tue, 8 Jul 2025 11:01:28 -0700
Subject: [PATCH 13/59] fix: Fix crash for logprob error plot (#623)

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/utils/logger.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py
index b99ebcc858..0a83204ee6 100644
--- a/nemo_rl/utils/logger.py
+++ b/nemo_rl/utils/logger.py
@@ -757,6 +757,12 @@ def log_plot_token_mult_prob_error(
             data["full_lengths"][sample_idx] - 1,
         )
 
+        if generation_start_idx >= generation_end_idx:
+            print(
+                f"Skipping token_mult_prob_error plot because generation_start_idx ({generation_start_idx}) >= generation_end_idx ({generation_end_idx})"
+            )
+            return
+
         generation_logprob = generation_logprobs[
             sample_idx, generation_start_idx:generation_end_idx
         ]

From fe8ada33a329116a6a7ab561119c2f46edfc13be Mon Sep 17 00:00:00 2001
From: yuki <48991475+yuki-666@users.noreply.github.com>
Date: Wed, 9 Jul 2025 02:06:59 +0800
Subject: [PATCH 14/59] refactor: remove fsdp1 path (#614)

Signed-off-by: Yuki Huang <yukih@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 docs/model-quirks.md                          |    2 +-
 ...ma3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml |   92 --
 ...rpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml |  119 --
 ...ft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml |   72 --
 nemo_rl/algorithms/grpo.py                    |   10 +-
 .../ray_actor_environment_registry.py         |    1 -
 nemo_rl/models/policy/fsdp1_policy_worker.py  | 1072 -----------------
 nemo_rl/models/policy/lm_policy.py            |   35 +-
 ...lama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh |   42 -
 .../grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh |   40 -
 .../sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh |   41 -
 tests/test_suites/nightly.txt                 |    9 +-
 tests/unit/experience/test_rollouts.py        |  145 ---
 .../models/generation/test_vllm_generation.py |  132 +-
 tests/unit/models/policy/test_fsdp1_worker.py |  879 --------------
 tests/unit/utils/test_native_checkpoint.py    |    2 +-
 16 files changed, 83 insertions(+), 2610 deletions(-)
 delete mode 100644 examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml
 delete mode 100644 examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
 delete mode 100644 examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml
 delete mode 100644 nemo_rl/models/policy/fsdp1_policy_worker.py
 delete mode 100755 tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh
 delete mode 100755 tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh
 delete mode 100755 tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh
 delete mode 100644 tests/unit/models/policy/test_fsdp1_worker.py

diff --git a/docs/model-quirks.md b/docs/model-quirks.md
index ca08b2741b..7824e8bf78 100644
--- a/docs/model-quirks.md
+++ b/docs/model-quirks.md
@@ -6,7 +6,7 @@ This document outlines special cases and model-specific behaviors that require c
 
 ### Tied Weights
 
-Weight tying between the embedding layer (`model.embed_tokens`) and output layer (`lm_head`) is currently not respected when using the FSDP1 policy or the DTensor policy when TP > 1 (See [this issue](https://github.com/NVIDIA-NeMo/RL/issues/227)). To avoid errors when training these models, we only allow training models with tied weights using the DTensor policy with TP=1. For Llama-3 and Qwen2.5 models, weight-tying is only enabled for the smaller models (< 2B), which can typically be trained without tensor parallelism. For Gemma-3, all model sizes have weight-tying enabled, including the larger models which require tensor parallelism. To support training of these models, we specially handle the Gemma-3 models by allowing training using the DTensor policy with TP > 1.
+Weight tying between the embedding layer (`model.embed_tokens`) and output layer (`lm_head`) is currently not respected when using the DTensor policy when TP > 1 (See [this issue](https://github.com/NVIDIA-NeMo/RL/issues/227)). To avoid errors when training these models, we only allow training models with tied weights using the DTensor policy with TP=1. For Llama-3 and Qwen2.5 models, weight-tying is only enabled for the smaller models (< 2B), which can typically be trained without tensor parallelism. For Gemma-3, all model sizes have weight-tying enabled, including the larger models which require tensor parallelism. To support training of these models, we specially handle the Gemma-3 models by allowing training using the DTensor policy with TP > 1.
 
 **Special Handling:**
 - We skip the tied weights check for all Gemma-3 models when using the DTensor policy, allowing training using TP > 1.
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml
deleted file mode 100644
index c382763562..0000000000
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.yaml
+++ /dev/null
@@ -1,92 +0,0 @@
-dpo:
-  max_num_epochs: 2
-  max_num_steps: 20
-  val_period: 50
-  val_batches: 16
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: false
-  seed: 42
-
-  reference_policy_kl_penalty: 0.05
-  preference_average_log_probs: False
-  sft_average_log_probs: ${.preference_average_log_probs}
-  preference_loss_weight: 1
-  sft_loss_weight: 0.01
-
-checkpointing:
-  enabled: false
-  checkpoint_dir: "results/dpo"
-  metric_name: "val_loss"
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10000
-
-policy:
-  model_name: "meta-llama/Llama-3.1-8B-Instruct"
-  tokenizer:
-    name: ${policy.model_name}
-  train_global_batch_size: 256
-  train_micro_batch_size: 1
-  max_total_sequence_length: 2048
-  precision: "bfloat16"
-  fsdp_offload_enabled: false
-  activation_checkpointing_enabled: false
-
-  dtensor_cfg:
-    enabled: false
-    cpu_offload: False
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
-
-  dynamic_batching:
-    enabled: False
-
-  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
-  max_grad_norm: 1.0
-
-  optimizer:
-    name: "torch.optim.AdamW"
-    kwargs:
-      lr: 5.0e-6
-      weight_decay: 0.1
-      betas: [0.9, 0.98]
-      eps: 1e-8
-      foreach: False
-      fused: False
-
-  scheduler:
-    - name: "torch.optim.lr_scheduler.LinearLR"
-      kwargs:
-        start_factor: 0.000000001
-        end_factor: 1.0
-        total_iters: 1
-    - name: "torch.optim.lr_scheduler.ConstantLR"
-      kwargs:
-        factor: 1.0
-        total_iters: 10000000000
-    - milestones: [1]
-
-data:
-  dataset_name: "HelpSteer3"
-  max_input_seq_length: ${policy.max_total_sequence_length}
-
-logger:
-  log_dir: "logs"
-  wandb_enabled: true
-  tensorboard_enabled: true
-  monitor_gpus: true
-  wandb:
-    project: nemo-rl
-    name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-
-cluster:
-  gpus_per_node: 8
-  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
deleted file mode 100644
index fe2de660ce..0000000000
--- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.yaml
+++ /dev/null
@@ -1,119 +0,0 @@
-grpo:
-  num_prompts_per_step: 64
-  num_generations_per_prompt: 32
-  max_rollout_turns: 1
-  max_num_steps: 30
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp1
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: Qwen/Qwen2.5-7B-Instruct
-  tokenizer:
-    name: Qwen/Qwen2.5-7B-Instruct
-  train_global_batch_size: 512
-  train_micro_batch_size: 1
-  generation_batch_size: 32
-  logprob_batch_size: 2
-  max_total_sequence_length: 4096
-  precision: bfloat16
-  fsdp_offload_enabled: false
-  activation_checkpointing_enabled: false
-  dtensor_cfg:
-    enabled: false
-    cpu_offload: false
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: False
-  make_sequence_length_divisible_by: 1
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 3e-07
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
-  scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 13
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 13
-  generation:
-    backend: vllm
-    max_new_tokens: 4096
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids:
-      - 151645
-    stop_strings: null
-    vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 1
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
-      max_model_len: 4096
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
-data:
-  max_input_seq_length: 4096
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
-logger:
-  log_dir: logs/grpo-qwen2.5-7b-instruct-4n8g-fsdp1
-  num_val_samples_to_print: 0
-  wandb_enabled: true
-  tensorboard_enabled: true
-  monitor_gpus: true
-  wandb:
-    project: nemo-rl
-    name: grpo-qwen2.5-7b-instruct-4n8g-fsdp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 4
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml
deleted file mode 100644
index 5a27f9b6b9..0000000000
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.yaml
+++ /dev/null
@@ -1,72 +0,0 @@
-sft:
-  max_num_epochs: 1
-  max_num_steps: 250
-  val_period: 10
-  val_batches: 8
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: true
-  seed: 42
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp1
-  metric_name: val_loss
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: meta-llama/Llama-3.1-8B-Instruct
-  tokenizer:
-    name: meta-llama/Llama-3.1-8B-Instruct
-    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
-  train_global_batch_size: 32
-  train_micro_batch_size: 1
-  max_total_sequence_length: 1024
-  precision: bfloat16
-  fsdp_offload_enabled: false
-  activation_checkpointing_enabled: false
-  dtensor_cfg:
-    enabled: false
-    cpu_offload: false
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: False
-  make_sequence_length_divisible_by: 1
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 5e-06
-      weight_decay: 0.1
-      betas:
-        - 0.9
-        - 0.98
-      eps: 1e-05
-      foreach: false
-      fused: false
-data:
-  max_input_seq_length: 1024
-  dataset_name: squad
-  add_bos: true
-  add_eos: true
-  add_generation_prompt: false
-logger:
-  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp1
-  wandb_enabled: true
-  tensorboard_enabled: true
-  monitor_gpus: true
-  wandb:
-    project: nemo-rl
-    name: sft-llama3.1-8b-instruct-1n8g-fsdp1
-  tensorboard:
-    log_dir: tb_logs-sft-dev-squad
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 1
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
index 3bcad52849..29b08f7bdb 100644
--- a/nemo_rl/algorithms/grpo.py
+++ b/nemo_rl/algorithms/grpo.py
@@ -230,7 +230,7 @@ def setup(
             use_gpus=True,
             num_gpus_per_node=cluster_config["gpus_per_node"],
             max_colocated_worker_groups=1
-            if generation_config["backend"] in ("hf", "megatron")
+            if generation_config["backend"] == "megatron"
             else 2,
         )
         train_cluster = cluster
@@ -238,8 +238,8 @@ def setup(
         print(f"  ✓ Ray cluster initialized with {cluster_config['num_nodes']} nodes")
 
     else:
-        assert generation_config["backend"] not in ("hf", "megatron"), (
-            "Non-colocated inference is not supported for either the HF or Megatron generation backends. "
+        assert generation_config["backend"] != "megatron", (
+            "Non-colocated inference is not supported for Megatron generation backends. "
             "Please use vLLM backend for generation."
         )
 
@@ -315,7 +315,7 @@ def setup(
     backend = generation_config["backend"]
     generation_config["model_name"] = policy_config["model_name"]  # Needed for vLLM
 
-    if backend in ("hf", "megatron"):
+    if backend == "megatron":
         policy_generation = None
         print(
             f"  ✓ Using {backend} backend for generation with {policy_config['model_name']}"
@@ -484,7 +484,7 @@ def grpo_train(
     """Run GRPO training algorithm."""
     timer = Timer()
     NEED_REFIT = True
-    # If policy_generation is None, use the policy as the generation interface (hf framework backend)
+    # If policy_generation is None, use the policy as the generation interface (megatron framework backend)
     if policy_generation is None:
         policy_generation = policy  # type: ignore
         NEED_REFIT = False
diff --git a/nemo_rl/distributed/ray_actor_environment_registry.py b/nemo_rl/distributed/ray_actor_environment_registry.py
index b685cde801..9464569c32 100644
--- a/nemo_rl/distributed/ray_actor_environment_registry.py
+++ b/nemo_rl/distributed/ray_actor_environment_registry.py
@@ -19,7 +19,6 @@
     # Temporary workaround for the coupled implementation of DTensorPolicyWorker and vLLM.
     # This will be reverted to PY_EXECUTABLES.BASE once https://github.com/NVIDIA-NeMo/RL/issues/501 is resolved.
     "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker": PY_EXECUTABLES.VLLM,
-    "nemo_rl.models.policy.fsdp1_policy_worker.FSDP1PolicyWorker": PY_EXECUTABLES.BASE,
     "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker": PY_EXECUTABLES.MCORE,
     "nemo_rl.environments.math_environment.MathEnvironment": PY_EXECUTABLES.SYSTEM,
     "nemo_rl.environments.games.sliding_puzzle.SlidingPuzzleEnv": PY_EXECUTABLES.SYSTEM,
diff --git a/nemo_rl/models/policy/fsdp1_policy_worker.py b/nemo_rl/models/policy/fsdp1_policy_worker.py
deleted file mode 100644
index 418f280e46..0000000000
--- a/nemo_rl/models/policy/fsdp1_policy_worker.py
+++ /dev/null
@@ -1,1072 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import gc
-import os
-import warnings
-from collections import defaultdict
-from contextlib import AbstractContextManager, contextmanager, nullcontext
-from typing import Any, Generator, Optional, cast
-
-import ray
-import torch
-from torch.distributed.device_mesh import init_device_mesh
-from torch.distributed.fsdp import (
-    CPUOffload,
-    FullyShardedDataParallel,
-    MixedPrecision,
-)
-from torch.distributed.fsdp.api import ShardedStateDictConfig, StateDictType
-from torch.distributed.fsdp.wrap import size_based_auto_wrap_policy
-from transformers import AutoModelForCausalLM, PreTrainedTokenizerBase
-from transformers.integrations.accelerate import find_tied_parameters
-
-from nemo_rl.algorithms.interfaces import LossFunction, LossType
-from nemo_rl.distributed.batched_data_dict import BatchedDataDict
-from nemo_rl.models.generation.interfaces import (
-    GenerationDatumSpec,
-    GenerationOutputSpec,
-    verify_right_padding,
-)
-from nemo_rl.models.policy import PolicyConfig
-from nemo_rl.models.policy.interfaces import (
-    LogprobOutputSpec,
-    ReferenceLogprobOutputSpec,
-)
-from nemo_rl.models.policy.utils import (
-    get_gpu_info,
-    import_class_from_path,
-    sliding_window_overwrite,
-)
-from nemo_rl.utils.native_checkpoint import (
-    load_checkpoint,
-    save_checkpoint,
-)
-
-
-@ray.remote
-class FSDP1PolicyWorker:
-    def __repr__(self) -> str:
-        """Customizes the actor's prefix in the Ray logs.
-
-        This makes it easier to identify which worker is producing specific log messages.
-        """
-        if torch.distributed.is_initialized():
-            return f"{self.__class__.__name__}[rank={torch.distributed.get_rank()}]"
-        else:
-            return f"{self.__class__.__name__}"
-
-    def __init__(
-        self,
-        config: PolicyConfig,
-        tokenizer: PreTrainedTokenizerBase,
-        weights_path: Optional[str] = None,
-        optimizer_path: Optional[str] = None,
-        init_optimizer: bool = True,
-        init_reference_model: bool = True,
-        **kwargs: Any,
-    ):
-        self.cfg = config
-        # torch distributed init. Envars for rank, world_size, and master_addr and master_port are set from the ray remote call
-        torch.distributed.init_process_group(backend="nccl")
-        rank = torch.distributed.get_rank()
-        world_size = torch.distributed.get_world_size()
-        model_name = self.cfg["model_name"]
-        if self.cfg["precision"] == "float32":
-            self.dtype = torch.float32
-        elif self.cfg["precision"] == "bfloat16":
-            self.dtype = torch.bfloat16
-        else:
-            raise ValueError(f"Unknown precision: {self.cfg['precision']}")
-
-        print(f"[Rank {rank}] Loading model {model_name} on CPU...")
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            device_map="cpu",  # load weights onto CPU initially
-            # Always load the model in float32 to keep master weights in float32.
-            # Keeping the master weights in lower precision has shown to cause issues with convergence.
-            # https://github.com/NVIDIA-NeMo/RL/issues/279 will fix the issue of CPU OOM for larger models.
-            torch_dtype=torch.float32,
-            trust_remote_code=True,
-            **sliding_window_overwrite(
-                model_name
-            ),  # due to https://github.com/huggingface/transformers/issues/38002
-        )
-        # caching since this property is not always preserved after FSDP
-        self.num_tied_weights = len(find_tied_parameters(self.model))
-
-        if init_reference_model:
-            self.reference_model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                device_map="cpu",  # load weights onto CPU initially
-                torch_dtype=torch.float32,  # use full precision in sft until https://github.com/NVIDIA-NeMo/RL/issues/13 is fixed
-                trust_remote_code=True,
-                **sliding_window_overwrite(
-                    model_name
-                ),  # due to https://github.com/huggingface/transformers/issues/38002
-            )
-        else:
-            self.reference_model = None
-
-        self.tokenizer = tokenizer
-
-        # ------------------------------------------------
-        # 3) Move to GPU + Composable FSDP
-        #    (Initialize device mesh, shard submodules, then shard entire model)
-        # ------------------------------------------------
-
-        def do_fsdp(model: torch.nn.Module) -> torch.nn.Module:
-            if world_size == 1:
-                print(
-                    "[INFO] Using a single GPU - skipping FSDP wrapper to avoid GPU memory offloading issues"
-                )
-                return model
-
-            # Create a device mesh with 'world_size' GPUs in a 1D arrangement.
-            mesh = init_device_mesh("cuda", (world_size,))
-            mp_policy = MixedPrecision(
-                param_dtype=self.dtype,
-                reduce_dtype=torch.float32,
-                buffer_dtype=torch.float32,
-            )
-
-            cpu_offload = (
-                CPUOffload(offload_params=True)
-                if self.cfg["fsdp_offload_enabled"]
-                else None
-            )
-
-            return FullyShardedDataParallel(
-                model,
-                device_mesh=mesh,
-                auto_wrap_policy=size_based_auto_wrap_policy,
-                mixed_precision=mp_policy,
-                cpu_offload=cpu_offload,
-            )
-
-        self.model.to("cuda")
-        if self.cfg["activation_checkpointing_enabled"]:
-            self.model.gradient_checkpointing_enable(
-                gradient_checkpointing_kwargs={"use_reentrant": False}
-            )
-        self.model = do_fsdp(self.model)
-        self.model = self.manual_offload_to_cpu(self.model)
-        if self.reference_model is not None:
-            self.reference_model.to("cuda")
-            self.reference_model = do_fsdp(self.reference_model)
-            self.reference_model = self.manual_offload_to_cpu(self.reference_model)
-        self.model = self.manual_load_to_gpu(self.model)
-
-        # used for streaming update inference engine weights
-        self._held_sharded_state_dict_reference: Optional[dict[str, Any]] = None
-        self._held_streamed_param_reference: Optional[dict[str, Any]] = None
-
-        # register_fsdp_forward_method(self.model, "generate")
-        if init_optimizer:
-            optimizer_cls = import_class_from_path(self.cfg["optimizer"]["name"])
-            self.optimizer = optimizer_cls(
-                self.model.parameters(), **self.cfg["optimizer"]["kwargs"]
-            )
-        else:
-            self.optimizer = None
-
-        if "scheduler" in self.cfg and self.optimizer is not None:
-            if isinstance(self.cfg["scheduler"], dict):
-                scheduler_cls = import_class_from_path(
-                    cast(str, self.cfg["scheduler"]["name"])
-                )
-                self.scheduler = scheduler_cls(
-                    self.optimizer, **self.cfg["scheduler"]["kwargs"]
-                )
-            else:
-                schedulers = []
-                for scheduler_cfg in self.cfg["scheduler"]:
-                    if "name" in scheduler_cfg:
-                        schedulers.append(
-                            import_class_from_path(scheduler_cfg["name"])(
-                                self.optimizer, **scheduler_cfg["kwargs"]
-                            )
-                        )
-                    else:
-                        assert "milestones" in scheduler_cfg, (
-                            "unknown scheduler config: ",
-                            scheduler_cfg,
-                        )
-                        milestones: list[int] = scheduler_cfg["milestones"]
-
-                self.scheduler = torch.optim.lr_scheduler.SequentialLR(
-                    self.optimizer, schedulers, milestones
-                )
-
-        elif self.optimizer is not None:
-            ## default to a passthrough LR schedule
-            self.scheduler = torch.optim.lr_scheduler.LambdaLR(
-                self.optimizer, lr_lambda=lambda epoch: 1
-            )
-
-        # restore
-        if weights_path:
-            self.load_checkpoint(
-                weights_path,
-                optimizer_path,
-            )
-        else:
-            print(
-                "No weights path provided. Starting from scratch (default policy init)"
-            )
-
-    def is_alive(self) -> bool:
-        return True
-
-    def reset_peak_memory_stats(self) -> None:
-        torch.cuda.reset_peak_memory_stats()
-
-    def get_gpu_info(self) -> dict[str, Any]:
-        """Return information about the GPU being used by this worker."""
-        return get_gpu_info(self.model)
-
-    def train(
-        self,
-        data: BatchedDataDict[Any],
-        loss_fn: LossFunction,
-        eval_mode: bool = False,
-        gbs: Optional[int] = None,
-        mbs: Optional[int] = None,
-    ) -> dict[str, Any]:
-        """Train the policy on a batch of data with a given loss function."""
-        # Check if the model has tied weights
-        skip_tie_check = os.environ.get("NRL_SKIP_TIED_WEIGHT_CHECK")
-        if self.num_tied_weights != 0 and not skip_tie_check:
-            raise ValueError(
-                f"Using FSP1 with a model ({self.cfg['model_name']}) that has tied weights (num_tied_weights={self.num_tied_weights}) is not supported (https://github.com/NVIDIA-NeMo/RL/issues/227). Please use dtensor policy with tensor parallel == 1 instead."
-            )
-
-        if gbs is None:
-            gbs = self.cfg["train_global_batch_size"]
-        if mbs is None:
-            mbs = self.cfg["train_micro_batch_size"]
-        local_gbs = gbs // torch.distributed.get_world_size()
-        dataset_size = data["input_ids"].shape[0]
-        num_global_batches = dataset_size // local_gbs
-
-        if eval_mode:
-            ctx: AbstractContextManager = torch.no_grad()
-            self.model.eval()
-        else:
-            ctx = nullcontext()
-            # Ensure model is in training mode
-            self.model.train()
-
-        with ctx:
-            # Get data from batch and move to device
-            data.to("cuda")
-
-            losses = []
-            all_mb_metrics = []
-            for gb_start in range(0, dataset_size, local_gbs):
-                global_batch: BatchedDataDict[Any] = data.slice(
-                    gb_start, gb_start + local_gbs
-                )
-
-                assert "sample_mask" in global_batch, (
-                    "sample_mask must be present in the data!"
-                )
-                ## get the normalization factor for the loss
-                local_valid_seqs = torch.sum(global_batch["sample_mask"])
-
-                if not "token_mask" in global_batch:
-                    local_valid_toks = (
-                        local_valid_seqs * global_batch["input_ids"].shape[1]
-                    )
-                else:
-                    local_valid_toks = torch.sum(
-                        global_batch["token_mask"][:, 1:]
-                        * global_batch["sample_mask"].unsqueeze(-1)
-                    )
-
-                to_reduce = torch.tensor([local_valid_seqs, local_valid_toks]).cuda()
-                torch.distributed.all_reduce(to_reduce)
-                global_valid_seqs, global_valid_toks = to_reduce[0], to_reduce[1]
-
-                if (
-                    hasattr(loss_fn, "loss_type")
-                    and loss_fn.loss_type == LossType.TOKEN_LEVEL
-                ):
-                    assert "token_mask" in global_batch, (
-                        "token_mask must be present in the data when using token-level loss"
-                    )
-
-                self.optimizer.zero_grad()
-                mb_losses = []
-
-                # Calculate number of microbatches to process
-                # make_microbatch_iterator assumes that the batch size is a multiple of the microbatch size
-                # so its safe to not check for the case where the last data slice is smaller than mbs
-                num_microbatches = min(local_gbs, dataset_size - gb_start) // mbs
-
-                for mb in global_batch.make_microbatch_iterator(mbs):
-                    input_ids = mb["input_ids"]
-
-                    input_lengths = mb["input_lengths"]
-                    batch_size, seq_len = input_ids.shape
-                    attention_mask = torch.ones(
-                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
-                    )
-                    for i, length in enumerate(input_lengths):
-                        # For right-padded sequence, set 1s at the beginning of the sequence
-                        attention_mask[i, :length] = 1
-
-                    with torch.autocast(device_type="cuda", dtype=self.dtype):
-                        outputs = self.model(
-                            input_ids=input_ids,
-                            attention_mask=attention_mask,
-                            use_cache=False,
-                        )
-                        # Get logprobs
-                        if not hasattr(outputs, "logits"):
-                            logits = self.model.lm_head(outputs.last_hidden_state)
-                        else:
-                            logits = outputs.logits
-
-                    # Divide logits by temperature
-                    if "generation" in self.cfg and self.cfg["generation"] is not None:
-                        logits.div_(self.cfg["generation"]["temperature"])
-
-                    loss, loss_metrics = loss_fn(
-                        logits, mb, global_valid_seqs, global_valid_toks
-                    )
-                    ## scale by the number of global batches so we get the correct
-                    ## value when summing metrics across all microbatches
-                    for k in loss_metrics.keys():
-                        loss_metrics[k] /= num_global_batches
-                    num_valid_samples = loss_metrics["num_valid_samples"]
-                    loss_metrics["lr"] = self.optimizer.param_groups[0]["lr"]
-                    loss_metrics["global_valid_seqs"] = global_valid_seqs.item()
-                    loss_metrics["global_valid_toks"] = global_valid_toks.item()
-
-                    # Backward pass
-                    if not eval_mode:
-                        ## NOTE: invalid samples should be multiplied
-                        ## by zero in the loss function to prevent them
-                        ## from affecting the gradient calculation
-
-                        # when FSDP reduces the gradients over the DP dim, they're automatically averaged
-                        # but we want to sum them so we cancel out the average here
-                        loss *= torch.distributed.get_world_size()
-                        loss.backward()
-                    if num_valid_samples > 0:
-                        mb_losses.append(loss.item())
-                        all_mb_metrics.append(loss_metrics)
-
-                # Clip gradients
-                if not eval_mode:
-                    if self.cfg["max_grad_norm"] is None:
-                        max_grad_norm = 9999999999.0
-                    else:
-                        max_grad_norm = self.cfg["max_grad_norm"]
-
-                    if isinstance(self.model, FullyShardedDataParallel):
-                        # when using FSDP1, use FSDP's clip_grad_norm_
-                        # to ensure grad norm is being computed over all parameters
-                        # see https://pytorch.org/docs/stable/fsdp.html#torch.distributed.fsdp.FullyShardedDataParallel.clip_grad_norm_
-                        grad_norm = self.model.clip_grad_norm_(max_norm=max_grad_norm)
-                    else:
-                        grad_norm = torch.nn.utils.clip_grad_norm_(
-                            self.model.parameters(), max_norm=max_grad_norm
-                        )
-                    grad_norm = grad_norm.cpu()
-
-                    # Update parameters
-                    self.optimizer.step()
-                else:
-                    grad_norm = None
-                losses.append(torch.tensor(mb_losses).sum().item())
-
-            # increment scheduler after all batches in rollout are processed
-            if not eval_mode:
-                self.scheduler.step()
-
-            # Compute global loss across all ranks
-            with torch.no_grad():
-                global_loss = torch.tensor(losses, device="cuda")
-                torch.distributed.all_reduce(global_loss)
-
-            # Aggregate metrics across all microbatches
-            mb_metrics = defaultdict(list)
-            for m in all_mb_metrics:
-                for k, v in m.items():
-                    mb_metrics[k].append(v)
-
-            metrics = {
-                "global_loss": global_loss.cpu(),
-                "grad_norm": grad_norm,
-                "rank": torch.distributed.get_rank(),
-                "all_mb_metrics": dict(mb_metrics),
-            }
-
-            return metrics
-
-    def get_logprobs(
-        self, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
-    ) -> BatchedDataDict[LogprobOutputSpec]:
-        """Get the logprobs of the model for a batch of data.
-
-        If no micro-batch size is provided, uses the configured logprob_batch_size to do microbatching.
-
-        Input data is assumed to be right-padded. The method internally converts to
-        left-padded format for computation, and returns outputs in right-padded format.
-
-        Returns:
-          a BatchedDataDict with key "logprobs" and shape [batch_size, sequence_length].
-          We use the convention that the logprob of the first token is 0 so that the sequence length is maintained.
-          The logprob of input token i is specified at position i in the output logprobs tensor.
-        """
-        logprob_batch_size = (
-            micro_batch_size
-            if micro_batch_size is not None
-            else self.cfg["logprob_batch_size"]
-        )
-        all_log_probs = []
-        self.model.eval()
-
-        # Process in batches
-        with torch.no_grad():
-            data.to("cuda")
-            for lp_batch in data.make_microbatch_iterator(logprob_batch_size):
-                input_ids = lp_batch["input_ids"]
-                batch_size, seq_len = input_ids.shape
-
-                # Create attention mask
-                input_lengths = lp_batch["input_lengths"]
-
-                # Create attention mask for right-padded data
-                attention_mask = torch.zeros(
-                    (batch_size, seq_len), dtype=torch.long, device=input_ids.device
-                )
-                for i, length in enumerate(input_lengths):
-                    # For right-padded sequence, set 1s at the beginning of the sequence
-                    attention_mask[i, :length] = 1
-
-                # Process with the model directly using right-padded inputs
-                with torch.autocast(device_type="cuda", dtype=self.dtype):
-                    outputs = self.model(
-                        input_ids=input_ids,
-                        attention_mask=attention_mask,
-                        use_cache=False,
-                    )
-                log_probs = torch.nn.functional.log_softmax(
-                    outputs.logits.to(torch.float32), dim=-1
-                )
-
-                # Extract logprobs for each token in the sequence by gathering the logprob
-                # corresponding to the next token at each position
-                # Input shapes:
-                #   log_probs: [batch_size, sequence_length, vocab_size] - logits for each position
-                #   token_ids: [batch_size, sequence_length] - actual tokens
-                # Output shape: [batch_size, sequence_length] - logprob of each token given previous
-                # We get logprob of token[t+1] from logits[t], prepending 0 to maintain sequence length
-                token_ids = input_ids
-                next_tokens = token_ids[:, 1:]  # Skip first token
-                log_probs = log_probs[:, :-1]  # Remove last position's logits
-                token_logprobs = log_probs.gather(
-                    dim=-1, index=next_tokens.unsqueeze(-1)
-                ).squeeze(-1)
-
-                # Prepend 0 logprob for first token to maintain same sequence length as input
-                token_logprobs = torch.cat(
-                    [torch.zeros_like(token_logprobs[:, :1]), token_logprobs], dim=1
-                )
-
-                # Apply mask to zero out padding tokens logprobs
-                token_logprobs = token_logprobs * attention_mask
-                all_log_probs.append(token_logprobs)
-
-        # Concatenate all batches
-        return_data = BatchedDataDict[LogprobOutputSpec]()
-        return_data["logprobs"] = torch.cat(all_log_probs, dim=0).cpu()
-
-        return return_data
-
-    @contextmanager
-    def use_reference_model(self) -> Generator[None, None, None]:
-        """Context manager that temporarily swaps the reference model and active model.
-
-        On entry: Moves model to CPU, moves reference_model to CUDA. Swaps the references
-        On exit: Restores original references and re-flips cuda/cpu
-
-        """
-        try:
-            # Save original references
-            original_model = self.model
-            original_reference_model = self.reference_model
-
-            self.model = self.manual_offload_to_cpu(self.model)
-            self.reference_model = self.manual_load_to_gpu(self.reference_model)
-
-            # Swap the references
-            self.model, self.reference_model = self.reference_model, self.model
-            gc.collect()
-            torch.cuda.empty_cache()
-
-            # - self.model is the original reference_model, now on CUDA
-            # - self.reference_model is the original model, now on CPU
-            yield
-
-        finally:
-            # Restore original references and device placement
-            self.reference_model = self.manual_offload_to_cpu(original_reference_model)
-            self.model = self.manual_load_to_gpu(original_model)
-            gc.collect()
-            torch.cuda.empty_cache()
-
-    def get_reference_policy_logprobs(
-        self, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
-    ) -> BatchedDataDict[ReferenceLogprobOutputSpec]:
-        """Get the logprobs from the reference policy for a batch of data.
-
-        Returns:
-          a BatchedDataDict with key "reference_logprobs" and shape [batch_size, sequence_length].
-          We use the convention that the logprob of the first token is 0 so that the sequence length is maintained.
-          The logprob of input token i is specified at position i in the output logprobs tensor.
-        """
-        with self.use_reference_model():
-            reference_logprobs = self.get_logprobs(data, micro_batch_size)
-
-        return_data = BatchedDataDict[ReferenceLogprobOutputSpec]()
-        return_data["reference_logprobs"] = reference_logprobs["logprobs"].cpu()
-        return return_data
-
-    def generate(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
-    ) -> BatchedDataDict[GenerationOutputSpec]:
-        """Generate a batch of data using huggingface framework generation.
-
-        Args:
-            data: BatchedDataDict containing input_ids and input_lengths tensors
-
-        Returns:
-            BatchedDataDict conforming to GenerationOutputSpec:
-                - output_ids: input + generated token IDs
-                - logprobs: Log probabilities for each token
-                - generation_lengths: Lengths of each response
-        """
-        # Verify input is right padded
-        assert isinstance(data, BatchedDataDict), (
-            f"data must be a BatchedDataDict, got type: {type(data)}"
-        )
-        assert "input_ids" in data and "input_lengths" in data, (
-            f"input_ids and input_lengths must be present in the BatchedDataDict, got keys: {data.keys()}"
-        )
-        is_right_padded, error_msg = verify_right_padding(
-            data, pad_value=self.tokenizer.pad_token_id
-        )
-        if not is_right_padded:
-            warnings.warn(
-                f"Input to vLLM worker is not properly right-padded: {error_msg}"
-            )
-
-        self.model.eval()
-
-        # Right padded tokens are converted to left padded tokens for HF generate (https://huggingface.co/docs/transformers/main/en/llm_tutorial?padding=right+pad#padding-side)
-        with torch.distributed.fsdp.FullyShardedDataParallel.summon_full_params(
-            self.model, recurse=False
-        ):
-            # Get generation config from self.cfg
-            generation_batch_size = self.cfg["generation_batch_size"]
-            assert self.cfg["generation"] is not None, (
-                "Generation config is not set while trying to generate"
-            )
-            gen_cfg = self.cfg["generation"]
-
-            micro_batches = []
-
-            # Process in batches
-            max_length = 0
-            for gen_batch in data.make_microbatch_iterator(generation_batch_size):
-                # Create attention mask from input_lengths if needed for the model
-                input_ids = gen_batch["input_ids"].cuda()
-                input_lengths = gen_batch["input_lengths"].cuda()
-                batch_size, seq_len = input_ids.shape
-
-                # Convert right padding to left padding
-                left_padded_input_ids = torch.full_like(
-                    input_ids, gen_cfg["pad_token_id"]
-                )
-                left_padded_attention_mask = torch.zeros(
-                    (batch_size, seq_len), dtype=torch.long, device=input_ids.device
-                )
-
-                for i, length in enumerate(input_lengths):
-                    # Move tokens to the end of the sequence (left padding)
-                    left_padded_input_ids[i, seq_len - length :] = input_ids[i, :length]
-                    # Set attention mask for the actual tokens (at the end for left padding)
-                    left_padded_attention_mask[i, seq_len - length :] = 1
-
-                # this function requires all generations have the same stop strings, so we collect all here
-                batch_stop_strings: list[list[str]] = gen_batch.get("stop_strings", [])
-                stop_strings = set()
-                for sample_stop_strings in batch_stop_strings:
-                    if sample_stop_strings:
-                        stop_strings.update(sample_stop_strings)
-
-                # Add default stop strings from config
-                if gen_cfg.get("stop_strings", None):
-                    stop_strings.update(gen_cfg["stop_strings"])
-
-                stop_strings: list[str] | None = (
-                    list(stop_strings) if len(stop_strings) > 0 else None
-                )
-
-                if isinstance(
-                    self.model, torch.distributed.fsdp.FullyShardedDataParallel
-                ):
-                    generation_module = self.model.module
-                else:
-                    generation_module = self.model
-                outputs = generation_module.generate(  # type: ignore # we know it's a nn.Module
-                    input_ids=left_padded_input_ids,
-                    attention_mask=left_padded_attention_mask,
-                    max_new_tokens=gen_cfg["max_new_tokens"],
-                    do_sample=not greedy,
-                    temperature=gen_cfg["temperature"],
-                    top_p=gen_cfg["top_p"],
-                    top_k=gen_cfg["top_k"],
-                    pad_token_id=gen_cfg["pad_token_id"],
-                    eos_token_id=gen_cfg["stop_token_ids"],
-                    stop_strings=stop_strings,
-                    tokenizer=self.tokenizer,  # needs for stop_strings
-                    return_dict_in_generate=True,
-                    output_scores=True,
-                    synced_gpus=True,
-                )
-                # Get the generated sequences
-                max_length = max(max_length, outputs.sequences.size(1))
-
-                # Convert scores to log probabilities and extract the logprob of the chosen token
-                scores = torch.stack(
-                    outputs.scores, dim=1
-                )  # [batch_size, seq_len, vocab_size]
-                logprobs = torch.nn.functional.log_softmax(scores, dim=-1)
-
-                # Get the logprobs of the actually generated tokens
-                # outputs.sequences[:, -scores.size(1):] gives us just the newly generated tokens
-                generated_tokens = outputs.sequences[:, -scores.size(1) :]
-                token_logprobs = logprobs.gather(
-                    dim=-1, index=generated_tokens.unsqueeze(-1)
-                ).squeeze(-1)
-
-                # Prepend zeros for input tokens based on original input lengths, not the padded length
-                mb = {}
-                mb["orig_input_lengths"] = input_lengths.clone()
-                mb["generation_logprobs"] = token_logprobs
-                mb["left_padded_output_ids"] = outputs.sequences
-
-                micro_batches.append(mb)
-
-            # Get lengths, pad, and concatenate all batches
-            return_data: BatchedDataDict[GenerationOutputSpec] = (
-                BatchedDataDict.from_batches(
-                    micro_batches,
-                    pad_value_dict={
-                        "left_padded_output_ids": self.cfg["generation"]["pad_token_id"]
-                    },
-                )
-            )
-
-            # Calculate the lengths of generations for each sequence by finding stop tokens
-            generation_lengths = []
-            unpadded_sequence_lengths = []
-            input_length = data["input_ids"].size(1)
-
-            # Convert left-padded outputs back to right-padded format
-            batch_size = len(return_data["left_padded_output_ids"])
-            max_seq_len = max(
-                [seq.size(0) for seq in return_data["left_padded_output_ids"]]
-            )
-            right_padded_output_ids = torch.full(
-                (batch_size, max_seq_len),
-                self.cfg["generation"]["pad_token_id"],
-                dtype=return_data["left_padded_output_ids"][0].dtype,
-                device=return_data["left_padded_output_ids"][0].device,
-            )
-
-            for idx, seq in enumerate(return_data["left_padded_output_ids"]):
-                # Get only the generated part (excluding input)
-                original_length = return_data["orig_input_lengths"][idx].item()
-                seq_len = seq.size(0)
-
-                # The generated content starts after the left-padded input
-                generated_part = seq[-(seq_len - input_length) :]
-
-                eos_positions = (generated_part == self.tokenizer.eos_token_id).nonzero(
-                    as_tuple=True
-                )[0]
-                # TODO @sahilj: handle different stopping criteria
-                # Calculate generation length
-                if len(eos_positions) > 0:
-                    gen_length = (
-                        eos_positions[0].item() + 1
-                    )  # +1 to include the EOS token
-                else:
-                    gen_length = len(generated_part)
-
-                generation_lengths.append(gen_length)
-
-                valid_length = original_length + gen_length
-                unpadded_sequence_lengths.append(valid_length)
-
-                # Extract the original input tokens from the left-padded sequence
-                # For left-padded sequences, tokens are at the end of the input section
-                valid_input_part = (
-                    seq[input_length - original_length : input_length]
-                    if original_length > 0
-                    else torch.tensor([], device=seq.device, dtype=seq.dtype)
-                )
-
-                # Combine with generated part
-                valid_generated_part = generated_part[:gen_length]
-                valid_tokens = torch.cat([valid_input_part, valid_generated_part])
-
-                # Place at the beginning of the right-padded sequence
-                right_padded_output_ids[idx, :valid_length] = valid_tokens
-
-            # Store the right-padded outputs
-            return_data["output_ids"] = right_padded_output_ids
-
-            # Align generation_logprobs with right-padded output format
-            batch_size = len(return_data["generation_logprobs"])
-            right_padded_logprobs = torch.zeros(
-                (batch_size, max_seq_len),
-                dtype=return_data["generation_logprobs"][0].dtype,
-                device=return_data["generation_logprobs"][0].device,
-            )
-
-            for idx, logprob_seq in enumerate(return_data["generation_logprobs"]):
-                original_length = return_data["orig_input_lengths"][idx].item()
-                gen_length = generation_lengths[idx]
-
-                # For right-padded format, we need:
-                # 1. Zeros for the original input tokens (at the beginning)
-                # 2. Actual logprobs for generated tokens (after the zeros)
-                # 3. Zeros padding at the end (if needed)
-
-                right_padded_seq = torch.zeros(
-                    max_seq_len, dtype=logprob_seq.dtype, device=logprob_seq.device
-                )
-                right_padded_seq[original_length : original_length + gen_length] = (
-                    logprob_seq[:gen_length]
-                )
-                right_padded_logprobs[idx] = right_padded_seq
-                valid_length = original_length + gen_length
-
-            # Remove the temporary data we added
-            if "generation_logprobs" in return_data:
-                del return_data["generation_logprobs"]
-            if "orig_input_lengths" in return_data:
-                del return_data["orig_input_lengths"]
-            if "left_padded_output_ids" in return_data:
-                del return_data["left_padded_output_ids"]
-
-            # Ensure consistent data types and device placement
-            return_data["output_ids"] = right_padded_output_ids
-            return_data["logprobs"] = right_padded_logprobs
-            return_data["generation_lengths"] = torch.tensor(
-                generation_lengths, dtype=torch.long
-            )
-            return_data["unpadded_sequence_lengths"] = torch.tensor(
-                unpadded_sequence_lengths, dtype=torch.long
-            )
-
-            # Move everything to CPU before returning
-            return_data.to("cpu")
-
-            return return_data
-
-    def _add_noise_to_weights(self) -> None:
-        """Add small Gaussian noise to the weights of the model. Note that this is used for testing purposes only."""
-        # TODO @sahilj: do this without a summon (maybe FSDP2)
-        noise_std = 0.01  # Standard deviation for the noise
-        with torch.distributed.fsdp.FullyShardedDataParallel.summon_full_params(
-            self.model, recurse=True
-        ):
-            for p in self.model.parameters():
-                if p.requires_grad:
-                    noise = torch.randn_like(p.data) * noise_std
-                    p.data.add_(noise)  # Add noise in-place
-        torch.cuda.synchronize()
-
-    def report_device_id(self) -> str:
-        """Report the UUID of the current CUDA device using NVML.
-
-        Returns:
-            str: UUID of the device in the format "GPU-xxxxx"
-        """
-        from nemo_rl.utils.nvml import get_device_uuid
-
-        # Get current device index from torch
-        device_idx = torch.cuda.current_device()
-        # Get device UUID using NVML
-        return get_device_uuid(device_idx)
-
-    @torch.no_grad()
-    def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
-        """Prepare the weights for IPC.
-
-        This function:
-        - Prepares the state_dict of the model.
-        - Collects the info for streaming multiple tensors.
-
-        Returns:
-            list: The list of parameters sizes.
-            float: The total available memory in bytes.
-        """
-        from nemo_rl.utils.nvml import get_free_memory_bytes
-
-        # If the model is not FSDP, then we need to manually move it to the GPU
-        # For an FSDP model, model.state_dict() will move the params to the GPU
-        if not isinstance(self.model, FullyShardedDataParallel):
-            self.model = self.manual_load_to_gpu(self.model)
-            self._held_sharded_state_dict_reference = self.model.state_dict()
-        else:
-            # Get sharded state dict instead of full state dict for FSDP1
-            with FullyShardedDataParallel.state_dict_type(
-                self.model,
-                state_dict_type=StateDictType.SHARDED_STATE_DICT,
-                state_dict_config=ShardedStateDictConfig(),
-            ):
-                self._held_sharded_state_dict_reference = self.model.state_dict()
-
-        # Collect info for streaming multiple tensors
-        state_dict_info = []
-        for name, tensor in self._held_sharded_state_dict_reference.items():
-            # dtensor's numel will return complete tensor instead of only local tensor
-            size_in_bytes = tensor.element_size() * tensor.numel()
-            state_dict_info.append((name, size_in_bytes))
-
-        # Collect current available memory for refit
-        ## Get current device index from torch
-        device_idx = torch.cuda.current_device()
-        ## Get device free memory using NVML
-        total_available_bytes = get_free_memory_bytes(device_idx)
-        ## Use 80% of the free memory for safety
-        total_available_bytes *= 0.8
-
-        return state_dict_info, total_available_bytes
-
-    @torch.no_grad()
-    def get_weights_ipc_handles(self, keys: list[str]) -> dict[str, Any]:
-        from torch.distributed.tensor import DTensor
-        from torch.multiprocessing.reductions import reduce_tensor
-
-        assert self._held_sharded_state_dict_reference is not None, (
-            "prepare_weights_for_ipc must be called before get_weights_ipc_handles"
-        )
-
-        # Clean up the held tensors to reduce peak memory
-        if self._held_streamed_param_reference is not None:
-            del self._held_streamed_param_reference
-            self._held_streamed_param_reference = None
-
-        converted_params = {}
-        for key in keys:
-            # Get full_tensor for dtensor (GPU > 1)
-            tensor = self._held_sharded_state_dict_reference[key]
-            if isinstance(tensor, DTensor):
-                full_tensor = tensor.full_tensor()
-            else:
-                full_tensor = tensor
-            # Convert parameters to the configured dtype
-            converted_params[key] = full_tensor.to(self.dtype, non_blocking=True)
-
-        # Temporary record the full tensor for cleanup
-        # It is needed for cleanup the last full_tensor in the refit process
-        self._held_streamed_param_reference = converted_params
-
-        # Get device UUID for IPC
-        device_uuid = self.report_device_id()
-        # Create handles for the tensors
-        all_handles = []
-        for key, p in converted_params.items():
-            handle = reduce_tensor(p.detach())
-            all_handles.append((key, handle))
-
-        # (pack_tensor_for_ipc: bool, handles: list)
-        serialized = (False, all_handles)
-
-        return {device_uuid: serialized}
-
-    def prepare_for_lp_inference(self) -> None:
-        self.model = self.manual_load_to_gpu(self.model)
-        self.model.eval()
-        self.offload_before_refit()
-
-    def prepare_for_training(self, *args: Any, **kwargs: Any) -> None:
-        # onload models and optimizer state to cuda
-        self.model = self.manual_load_to_gpu(self.model)
-        self.model.train()
-
-        if not self.cfg["fsdp_offload_enabled"]:
-            # Move optimizer state to CUDA if it exists
-            if hasattr(self, "optimizer") and self.optimizer is not None:
-                for state in self.optimizer.state.values():
-                    for k, v in state.items():
-                        if torch.is_tensor(v) and not v.is_cuda:
-                            state[k] = v.to("cuda")
-
-        torch.cuda.empty_cache()
-
-    @torch.no_grad()
-    def offload_before_refit(self) -> None:
-        """Offload the optimizer and buffers to the CPU."""
-        torch.randn(1).cuda()  # wake up torch allocator
-        if not self.cfg["fsdp_offload_enabled"]:
-            if hasattr(self, "optimizer") and self.optimizer is not None:
-                for state in self.optimizer.state.values():
-                    for k, v in state.items():
-                        if torch.is_tensor(v):
-                            state[k] = v.to("cpu")
-
-        gc.collect()
-        torch.cuda.empty_cache()
-
-        # Print memory stats after offloading
-        allocated = torch.cuda.memory_allocated() / (1024**3)  # Convert to GB
-        reserved = torch.cuda.memory_reserved() / (1024**3)  # Convert to GB
-        print(
-            f"GPU Memory after optimizer offload: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved"
-        )
-
-    @torch.no_grad()
-    def offload_after_refit(self) -> None:
-        # Offload as much as possible on the CPU
-        self.model = self.manual_offload_to_cpu(self.model)
-        self.model.eval()
-        torch.randn(1).cuda()  # wake up torch allocator
-        self.offload_before_refit()  # rerun the old offload function
-
-        # Clean up the held tensors
-        if self._held_sharded_state_dict_reference is not None:
-            del self._held_sharded_state_dict_reference
-            self._held_sharded_state_dict_reference = None
-        if self._held_streamed_param_reference is not None:
-            del self._held_streamed_param_reference
-            self._held_streamed_param_reference = None
-
-        gc.collect()
-        torch.cuda.empty_cache()
-
-        allocated = torch.cuda.memory_allocated() / (1024**3)  # Convert to GB
-        reserved = torch.cuda.memory_reserved() / (1024**3)  # Convert to GB
-        print(
-            f"GPU Memory after refit complete: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved"
-        )
-
-    def manual_offload_to_cpu(self, model: torch.nn.Module) -> torch.nn.Module:
-        if self.cfg["fsdp_offload_enabled"]:
-            return model
-
-        for param in model.parameters():
-            param.data = param.data.to("cpu", non_blocking=True)
-            if hasattr(param, "_local_shard"):
-                param._local_shard = param.data
-            if param.grad is not None:
-                param.grad = param.grad.to("cpu", non_blocking=True)
-        for buffer in model.buffers():
-            buffer.data = buffer.data.to("cpu", non_blocking=True)
-
-        if hasattr(model, "_fsdp_wrapped_module"):
-            wrapped_module = model._fsdp_wrapped_module
-            assert isinstance(wrapped_module, torch.nn.Module), (
-                f"wrapped_module is not a torch.nn.Module: instead, {type(wrapped_module)}"
-            )
-            self.manual_offload_to_cpu(wrapped_module)
-
-        return model
-
-    def manual_load_to_gpu(self, model: torch.nn.Module) -> torch.nn.Module:
-        if self.cfg["fsdp_offload_enabled"]:
-            return model
-
-        for param in model.parameters():
-            param.data = param.data.to("cuda", non_blocking=True)
-            if hasattr(param, "_local_shard"):
-                param._local_shard = param.data
-            if param.grad is not None:
-                param.grad = param.grad.to("cuda", non_blocking=True)
-        for buffer in model.buffers():
-            buffer.data = buffer.data.to("cuda", non_blocking=True)
-
-        if hasattr(model, "_fsdp_wrapped_module"):
-            wrapped_module = model._fsdp_wrapped_module
-            assert isinstance(wrapped_module, torch.nn.Module), (
-                f"wrapped_module is not a torch.nn.Module: instead, {type(wrapped_module)}"
-            )
-            self.manual_load_to_gpu(wrapped_module)
-
-        return model
-
-    def save_checkpoint(
-        self,
-        weights_path: str,
-        optimizer_path: Optional[str] = None,
-        tokenizer_path: Optional[str] = None,
-    ) -> None:
-        """Save a checkpoint of the model.
-
-        The checkpoint is saved in the following format:
-
-        weights_path/
-            __0_1.distcp
-            __1_0.distcp
-            ...
-        optimizer_path/
-            __0_0.distcp
-            __1_0.distcp
-            ...
-
-        the optimizer states are saved only if `optimizer` and `optimizer_path` are provided.
-        """
-        save_checkpoint(
-            model=self.model,
-            weights_path=weights_path,
-            optimizer=self.optimizer if optimizer_path else None,
-            scheduler=self.scheduler if optimizer_path else None,
-            optimizer_path=optimizer_path,
-            tokenizer=self.tokenizer if tokenizer_path else None,
-            tokenizer_path=tokenizer_path,
-        )
-
-    def load_checkpoint(
-        self, weights_path: str, optimizer_path: Optional[str] = None
-    ) -> None:
-        """Load a checkpoint into the model."""
-        load_checkpoint(
-            model=self.model,
-            weights_path=weights_path,
-            optimizer=self.optimizer if optimizer_path else None,
-            scheduler=self.scheduler if optimizer_path else None,
-            optimizer_path=optimizer_path,
-        )
-
-    def shutdown(self) -> None:
-        """Shutdown the policy."""
-        pass
-
-    def start_gpu_profiling(self) -> None:
-        """Start GPU profiling."""
-        torch.cuda.profiler.start()
-
-    def stop_gpu_profiling(self) -> None:
-        """Stop GPU profiling."""
-        torch.cuda.profiler.stop()
diff --git a/nemo_rl/models/policy/lm_policy.py b/nemo_rl/models/policy/lm_policy.py
index e469b32d16..22bfcd690b 100644
--- a/nemo_rl/models/policy/lm_policy.py
+++ b/nemo_rl/models/policy/lm_policy.py
@@ -62,41 +62,29 @@ def __init__(
         if optimizer_path:
             optimizer_path = os.path.abspath(optimizer_path)
 
-        node_bundle_indices = None
-        self.cp_size = 1
+        worker_builder_cls: str
         tp_size = 1
         pp_size = 1
         cp_size = 1
 
-        worker_builder_cls: str
-        training_backend = None
-        if not config.get("megatron_cfg", {}).get(
-            "enabled", False
-        ):  # Huggingface backend
-            if config["dtensor_cfg"]["enabled"]:
-                worker_builder_cls = (
-                    "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker"
-                )
-                tp_size = config["dtensor_cfg"]["tensor_parallel_size"]
-                cp_size = config["dtensor_cfg"]["context_parallel_size"]
-            else:
-                worker_builder_cls = (
-                    "nemo_rl.models.policy.fsdp1_policy_worker.FSDP1PolicyWorker"
-                )
-            training_backend = "hf"
-        elif config["megatron_cfg"]["enabled"]:  # Megatron backend
+        megatron_enable = config.get("megatron_cfg", {}).get("enabled", False)
+        if megatron_enable:
             worker_builder_cls = (
                 "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker"
             )
             tp_size = config["megatron_cfg"]["tensor_model_parallel_size"]
             pp_size = config["megatron_cfg"]["pipeline_model_parallel_size"]
             cp_size = config["megatron_cfg"]["context_parallel_size"]
-            training_backend = "megatron"
         else:
-            training_backend = "hf"
+            assert config["dtensor_cfg"]["enabled"], (
+                "Please either set policy.megatron_cfg.enabled=true to use Megatron training backend "
+                "or set policy.dtensor_cfg.enabled=true to use DTensor training backend."
+            )
             worker_builder_cls = (
-                "nemo_rl.models.policy.fsdp1_policy_worker.FSDP1PolicyWorker"
+                "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker"
             )
+            tp_size = config["dtensor_cfg"]["tensor_parallel_size"]
+            cp_size = config["dtensor_cfg"]["context_parallel_size"]
 
         self.sharding_annotations = NamedSharding(
             layout=np.arange(cluster.world_size()).reshape(
@@ -135,9 +123,6 @@ def __init__(
         )
 
         if config["dynamic_batching"]["enabled"]:
-            assert config["dtensor_cfg"]["enabled"] or training_backend == "megatron", (
-                "Dynamic batch is only supported for DTensor or Megatron policy."
-            )
             assert pp_size == 1, (
                 "Dynamic batching is only supported for single pipeline parallel stage"
             )
diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh
deleted file mode 100755
index f5b29b7db7..0000000000
--- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
-source $SCRIPT_DIR/common.env
-
-# ===== BEGIN CONFIG =====
-NUM_NODES=4
-STEPS_PER_RUN=20
-MAX_STEPS=20
-NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=30
-# ===== END CONFIG =====
-
-exit_if_max_steps_reached
-
-# Run the experiment
-cd $PROJECT_ROOT
-uv run examples/run_dpo.py \
-    --config $CONFIG_PATH \
-    dpo.max_num_steps=$MAX_STEPS \
-    logger.log_dir=$LOG_DIR \
-    logger.wandb_enabled=True \
-    logger.wandb.project=nemo-rl \
-    logger.wandb.name=$EXP_NAME \
-    logger.monitor_gpus=True \
-    logger.tensorboard_enabled=True \
-    checkpointing.enabled=True \
-    checkpointing.checkpoint_dir=$CKPT_DIR \
-    $@ \
-    2>&1 | tee $RUN_LOG
-
-# Convert tensorboard logs to json
-uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
-
-# Only run metrics if the target step is reached
-if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
-    uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 3.6' \
-        'data["train/loss"]["20"] < 3.4' \
-        'data["train/preference_loss"]["1"] > 0.69314' \
-        'data["train/preference_loss"]["1"] < 0.69316' \
-        'data["train/preference_loss"]["20"] < 0.6'
-fi 
diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh
deleted file mode 100755
index 49c96a6f58..0000000000
--- a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
-source $SCRIPT_DIR/common.env
-
-# ===== BEGIN CONFIG =====
-NUM_NODES=4
-STEPS_PER_RUN=30
-MAX_STEPS=30
-NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=90
-# ===== END CONFIG =====
-
-exit_if_max_steps_reached
-
-# Run the experiment
-cd $PROJECT_ROOT
-uv run examples/run_grpo_math.py \
-    --config $CONFIG_PATH \
-    grpo.max_num_steps=$MAX_STEPS \
-    logger.log_dir=$LOG_DIR \
-    logger.wandb_enabled=True \
-    logger.wandb.project=nemo-rl \
-    logger.wandb.name=$EXP_NAME \
-    logger.monitor_gpus=True \
-    logger.tensorboard_enabled=True \
-    checkpointing.enabled=True \
-    checkpointing.checkpoint_dir=$CKPT_DIR \
-    $@ \
-    2>&1 | tee $RUN_LOG
-
-# Convert tensorboard logs to json
-uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
-
-# Only run metrics if the target step is reached
-if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
-    uv run tests/check_metrics.py $JSON_METRICS \
-        'mean(data["train/token_mult_prob_error"]) < 1.1' \
-        'data["train/token_mult_prob_error"]["30"] < 1.1'
-fi
-
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh b/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh
deleted file mode 100755
index 5e10e4fff0..0000000000
--- a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
-source $SCRIPT_DIR/common.env
-
-# ===== BEGIN CONFIG =====
-NUM_NODES=1
-STEPS_PER_RUN=250
-MAX_STEPS=250
-NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=30
-# ===== END CONFIG =====
-
-exit_if_max_steps_reached
-
-# Run the experiment
-cd $PROJECT_ROOT
-uv run examples/run_sft.py \
-    --config $CONFIG_PATH \
-    sft.max_num_steps=$MAX_STEPS \
-    logger.log_dir=$LOG_DIR \
-    logger.wandb_enabled=True \
-    logger.wandb.project=nemo-rl \
-    logger.wandb.name=$EXP_NAME \
-    logger.monitor_gpus=True \
-    logger.tensorboard_enabled=True \
-    checkpointing.enabled=True \
-    checkpointing.checkpoint_dir=$CKPT_DIR \
-    $@ \
-    2>&1 | tee $RUN_LOG
-
-# Convert tensorboard logs to json
-uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
-
-# Only run metrics if the target step is reached
-if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
-    # TODO: FIGURE OUT CORRECT METRICS
-    uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 4' \
-        'data["train/loss"]["250"] < 0.5' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 60'
-fi 
diff --git a/tests/test_suites/nightly.txt b/tests/test_suites/nightly.txt
index d734b4511a..59503c10be 100644
--- a/tests/test_suites/nightly.txt
+++ b/tests/test_suites/nightly.txt
@@ -7,8 +7,7 @@ tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh
 tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh
 tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
 
-# FSDP1 vs Dtensor (Qwen/Qwen2.5-7B-Instruct)
-tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp1.v3.sh
+# Dtensor (Qwen/Qwen2.5-7B-Instruct)
 tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh
 
 # Functional 32b run
@@ -21,9 +20,8 @@ tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh
 # 1N 1B/8B runs
 tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh
 
-# Dtensor vs fsdp1 (8B)
+# Dtensor (8B)
 tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh
-tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp1.v2.sh
 
 # Functional 32b test
 tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh
@@ -38,8 +36,7 @@ tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-megatron.sh
 # 1N dtensor
 tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh
 
-# Short dtensor vs fsdp1 comparison
-tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp1-quick.v2.sh
+# Short dtensor
 tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh
 
 # Short megatron
diff --git a/tests/unit/experience/test_rollouts.py b/tests/unit/experience/test_rollouts.py
index db41fe2d39..853b19145e 100644
--- a/tests/unit/experience/test_rollouts.py
+++ b/tests/unit/experience/test_rollouts.py
@@ -35,8 +35,6 @@
 )
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration
-from nemo_rl.models.policy import PolicyConfig
-from nemo_rl.models.policy.lm_policy import Policy
 
 # Import the test environment definitions
 from tests.unit.test_envs import (
@@ -183,43 +181,6 @@ def initial_multi_step_calculator_batch(rollout_tokenizer):
     return BatchedDataDict(initial_batch_dict)
 
 
-# Keep the base config separate
-base_hf_test_config: PolicyConfig = {
-    "policy_type": "hf",
-    "model_name": MODEL_NAME,
-    "tokenizer_name": None,
-    "model_path": None,
-    "num_workers": 1,
-    "train_global_batch_size": 2,
-    "train_micro_batch_size": 1,
-    "logprob_batch_size": 2,
-    "generation_batch_size": 1,  # Smaller for simpler testing
-    "learning_rate": 5e-6,
-    "precision": "float32",
-    "activation_checkpointing_enabled": False,
-    "fsdp_offload_enabled": False,
-    "generation": {
-        "backend": "hf",
-        "max_new_tokens": 50,  # Increased for tool call format
-        "temperature": 0.01,
-        "top_p": 1.0,
-        "top_k": None,
-        "stop_token_ids": None,
-        "stop_strings": None,
-    },
-    "optimizer": {
-        "name": "torch.optim.AdamW",
-        "kwargs": {
-            "lr": 5e-6,
-            "weight_decay": 0.01,
-            "betas": [0.9, 0.999],
-            "eps": 1e-8,
-        },
-    },
-    "dtensor_cfg": {"enabled": False},
-    "dynamic_batching": {"enabled": False},
-}
-
 base_vllm_test_config: VllmConfig = {
     "backend": "vllm",
     "model_name": MODEL_NAME,
@@ -253,47 +214,6 @@ def initial_multi_step_calculator_batch(rollout_tokenizer):
 }
 
 
-@pytest.fixture(scope="function")
-def multi_step_setup_hf(
-    rollout_cluster,
-    rollout_tokenizer,
-    multi_step_calculator_environment,
-    initial_multi_step_calculator_batch,
-):
-    """Sets up components for multi-step calculator tests using Policy."""
-    policy = None
-    task_to_env, _ = multi_step_calculator_environment
-    print("Creating Policy for Multi-Step Calculator Test...")
-    try:
-        config = deepcopy(base_hf_test_config)
-        config["tokenizer_name"] = rollout_tokenizer.name_or_path
-        if "gpt2" in rollout_tokenizer.name_or_path.lower():
-            config["model_name"] = "gpt2"
-        config["generation"] = configure_generation_config(
-            config["generation"], rollout_tokenizer
-        )
-        config["generation"]["stop_strings"] = None
-        policy = Policy(
-            cluster=rollout_cluster,
-            config=config,
-            tokenizer=rollout_tokenizer,
-            init_reference_model=False,
-            init_optimizer=False,
-        )
-        yield (
-            policy,
-            rollout_tokenizer,
-            task_to_env,
-            initial_multi_step_calculator_batch,
-            rollout_cluster,
-        )
-    finally:
-        print("Cleaning up Policy (Multi-Step Calc Test)...")
-        if policy:
-            policy.shutdown()
-        print("Policy cleanup finished (Multi-Step Calc Test).")
-
-
 @pytest.fixture(scope="function")
 def multi_step_setup_vllm_sync(
     rollout_cluster,
@@ -377,71 +297,6 @@ def multi_step_setup_vllm_async(
         print("VllmGeneration cleanup finished (async engine, Multi-Step Calc Test).")
 
 
-def test_run_multi_step_calculator_hf(multi_step_setup_hf):
-    """Tests multi-step calculator rollout with Policy."""
-    policy, rollout_tokenizer, task_to_env, initial_batch, rollout_cluster = (
-        multi_step_setup_hf
-    )
-    max_rollout_turns = (
-        initial_batch["extra_env_info"][0]["max_steps"] + 1
-    )  # Allow max steps + final answer
-    max_seq_len = 1024  # Increased for potentially longer interaction
-
-    print("\nRunning multi-step calculator rollout (HF)...")
-    policy.prepare_for_generation()
-    final_batch, rollout_metrics = run_multi_turn_rollout(
-        policy_generation=policy,
-        input_batch=initial_batch,
-        tokenizer=rollout_tokenizer,
-        task_to_env=task_to_env,
-        max_seq_len=max_seq_len,
-        max_rollout_turns=max_rollout_turns,
-    )
-    policy.finish_generation()
-    print("Multi-step calculator rollout complete (HF).")
-
-    # --- Assertions ---
-    assert isinstance(final_batch, BatchedDataDict)
-    assert "message_log" in final_batch
-    assert "total_reward" in final_batch
-    assert len(final_batch["message_log"]) == len(initial_batch["message_log"])
-
-    sample_log = final_batch["message_log"][0]
-    expected_final_answer = initial_batch["extra_env_info"][0]["expected_final_answer"]
-    print("\nSample Interaction Log (Multi-Step Calculator - HF):")
-    tool_call_count = 0
-    final_answer_msg = None
-    for i, msg in enumerate(sample_log):
-        print(f"  {i}: Role={msg['role']}, Content='{msg['content']}'")
-        if msg["role"] == "assistant":
-            if msg["content"].strip().endswith("<call: calculator>"):
-                tool_call_count += 1
-            else:
-                final_answer_msg = msg["content"].strip()
-
-    assert tool_call_count >= 1, "Expected at least one tool call"
-    assert final_answer_msg is not None, (
-        "Expected a final answer message from assistant"
-    )
-
-    # Check final answer correctness (allowing for different final answer formats)
-    final_answer_logic = _MultiStepCalculatorLogic()
-    extracted_final_answer = final_answer_logic._is_final_answer(final_answer_msg)
-    assert extracted_final_answer is not None, (
-        f"Could not parse final answer from: {final_answer_msg}"
-    )
-    assert abs(extracted_final_answer - expected_final_answer) < 1e-6, (
-        f"Final answer incorrect. Expected {expected_final_answer}, Got {extracted_final_answer}"
-    )
-
-    # Check total reward (should be 1.0 if correct)
-    assert torch.all(final_batch["total_reward"] == 1.0), (
-        f"Expected total reward 1.0, got {final_batch['total_reward']}"
-    )
-
-    print("\nMulti-Step Calculator HF Test assertions passed.")
-
-
 @pytest.mark.skipif(
     not torch.cuda.is_available() or torch.cuda.device_count() < 1,
     reason="VLLM test requires at least 1 GPU",
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 8f37e1e23d..9c3ddc3435 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -68,52 +68,49 @@
     "vllm_kwargs": {},
 }
 
-
-def get_basic_hf_test_config(enable_dtensor: bool = False) -> PolicyConfig:
-    # Create HF-specific config with required parameters
-    return {
-        "model_name": basic_vllm_test_config["model_name"],
-        "tokenizer": {
-            "name": basic_vllm_test_config["tokenizer"]["name"],
-        },
-        # Required training parameters
-        "train_global_batch_size": 1,
-        "train_micro_batch_size": 1,
-        "learning_rate": 5e-6,
-        "logprob_batch_size": 1,
-        "max_new_tokens": 16,
-        "do_sample": False,
-        "precision": "float32",
-        "fsdp_offload_enabled": False,
-        "activation_checkpointing_enabled": False,
-        "optimizer": {
-            "name": "torch.optim.AdamW",
-            "kwargs": {
-                "lr": 5e-6,
-                "weight_decay": 0.01,
-                "betas": [0.9, 0.999],
-                "eps": 1e-8,
-            },
-        },
-        "dtensor_cfg": {
-            "enabled": enable_dtensor,
-            "cpu_offload": False,
-            "sequence_parallel": False,
-            "activation_checkpointing": False,
-            "tensor_parallel_size": 1,
-            "context_parallel_size": 1,
-            "custom_parallel_plan": None,
-        },
-        "dynamic_batching": {
-            "enabled": enable_dtensor,  # Dynamic batching is only supported with DTensor
-            "train_mb_tokens": 40,
-            "logprob_mb_tokens": 40,
-            "sequence_length_round": 4,
+basic_dtensor_test_config: PolicyConfig = {
+    "model_name": basic_vllm_test_config["model_name"],
+    "tokenizer": {
+        "name": basic_vllm_test_config["tokenizer"]["name"],
+    },
+    # Required training parameters
+    "train_global_batch_size": 1,
+    "train_micro_batch_size": 1,
+    "learning_rate": 5e-6,
+    "logprob_batch_size": 1,
+    "max_new_tokens": 16,
+    "do_sample": False,
+    "precision": "float32",
+    "fsdp_offload_enabled": False,
+    "activation_checkpointing_enabled": False,
+    "optimizer": {
+        "name": "torch.optim.AdamW",
+        "kwargs": {
+            "lr": 5e-6,
+            "weight_decay": 0.01,
+            "betas": [0.9, 0.999],
+            "eps": 1e-8,
         },
-        "max_grad_norm": 1.0,
-        "make_sequence_length_divisible_by": 1,
-        "generation": deepcopy(basic_vllm_test_config),
-    }
+    },
+    "dtensor_cfg": {
+        "enabled": True,
+        "cpu_offload": False,
+        "sequence_parallel": False,
+        "activation_checkpointing": False,
+        "tensor_parallel_size": 1,
+        "context_parallel_size": 1,
+        "custom_parallel_plan": None,
+    },
+    "dynamic_batching": {
+        "enabled": True,
+        "train_mb_tokens": 40,
+        "logprob_mb_tokens": 40,
+        "sequence_length_round": 4,
+    },
+    "max_grad_norm": 1.0,
+    "make_sequence_length_divisible_by": 1,
+    "generation": deepcopy(basic_vllm_test_config),
+}
 
 
 def get_basic_megatron_test_config(
@@ -420,14 +417,14 @@ async def test_vllm_policy_generation_async(
         vllm_config["vllm_cfg"]["async_engine"] = True
         vllm_config["vllm_cfg"]["tensor_parallel_size"] = tensor_parallel_size
         vllm_config["vllm_cfg"]["pipeline_parallel_size"] = pipeline_parallel_size
-        hf_config = get_basic_hf_test_config(enable_dtensor=True)
+        dtensor_config = basic_dtensor_test_config
         from nemo_rl.models.policy.lm_policy import Policy
 
         async_policy = VllmGeneration(cluster, vllm_config)
         async_policy.finish_generation()
         print("creating hf policy...")
 
-        lm_policy = Policy(cluster, hf_config, tokenizer)
+        lm_policy = Policy(cluster, dtensor_config, tokenizer)
         refit_policy_generation(
             lm_policy, async_policy, vllm_config["colocated"]["enabled"]
         )
@@ -522,8 +519,8 @@ def test_vllm_worker_seed_behavior(cluster, tokenizer):
 
     from nemo_rl.models.policy.lm_policy import Policy
 
-    hf_config = get_basic_hf_test_config(enable_dtensor=False)
-    lm_policy = Policy(cluster, hf_config, tokenizer)
+    dtensor_config = basic_dtensor_test_config
+    lm_policy = Policy(cluster, dtensor_config, tokenizer)
 
     print("refitting vllm policy...")
     refit_policy_generation(lm_policy, policy, vllm_config["colocated"]["enabled"])
@@ -620,10 +617,7 @@ def configure_worker_fixed_seed(num_gpus, bundle_indices=None):
 @pytest.mark.timeout(140)
 @pytest.mark.asyncio
 @pytest.mark.parametrize("async_engine", [True, False])
-@pytest.mark.parametrize("enable_dtensor", [True, False])
-async def test_vllm_generation_with_hf_training(
-    cluster, tokenizer, enable_dtensor, async_engine
-):
+async def test_vllm_generation_with_hf_training(cluster, tokenizer, async_engine):
     """1. Use vLLM for generation
     2. Use HF policy for training and logprob computation
 
@@ -637,8 +631,8 @@ async def test_vllm_generation_with_hf_training(
     vllm_config["vllm_cfg"]["async_engine"] = async_engine
     vllm_config = configure_generation_config(vllm_config, tokenizer)
 
-    hf_config = get_basic_hf_test_config(enable_dtensor=enable_dtensor)
-    hf_config["train_global_batch_size"] = 4
+    dtensor_config = deepcopy(basic_dtensor_test_config)
+    dtensor_config["train_global_batch_size"] = 4
 
     vllm_policy = None
     lm_policy = None
@@ -679,8 +673,8 @@ async def test_vllm_generation_with_hf_training(
         vllm_policy = VllmGeneration(cluster, vllm_config)
         vllm_policy.finish_generation()
 
-        print("Creating HF policy...")
-        lm_policy = Policy(cluster, hf_config, tokenizer)
+        print("Creating DTensor policy...")
+        lm_policy = Policy(cluster, dtensor_config, tokenizer)
 
         print("refitting vllm policy...")
         refit_policy_generation(
@@ -922,14 +916,14 @@ def test_vllm_weight_update_and_prefix_cache_reset(
     if tensor_parallel_size > 1:
         vllm_config["vllm_kwargs"] = {"distributed_executor_backend": "ray"}
 
-    hf_config = get_basic_hf_test_config(enable_dtensor=enable_dtensor)
+    dtensor_config = basic_dtensor_test_config
 
     # Create policies
     vllm_policy = None
     lm_policy = None
     try:
-        print(f"Creating HF policy for TP={tensor_parallel_size}...")
-        lm_policy = Policy(cluster, hf_config, tokenizer)
+        print(f"Creating DTensor policy for TP={tensor_parallel_size}...")
+        lm_policy = Policy(cluster, dtensor_config, tokenizer)
         print(f"Creating vLLM policy for TP={tensor_parallel_size}...")
         vllm_policy = VllmGeneration(cluster, vllm_config)
 
@@ -1031,9 +1025,9 @@ def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor):
     vllm_policy = VllmGeneration(cluster, vllm_config)
     vllm_policy.finish_generation()
 
-    print("Creating HF policy...")
-    hf_config = get_basic_hf_test_config(enable_dtensor=enable_dtensor)
-    lm_policy = Policy(cluster, hf_config, tokenizer)
+    print("Creating DTensor policy...")
+    dtensor_config = basic_dtensor_test_config
+    lm_policy = Policy(cluster, dtensor_config, tokenizer)
 
     print("refitting vllm policy...")
     # take it outside statistics to get clean peak memory during refit
@@ -1108,9 +1102,9 @@ def test_vllm_generation_with_stop(
         # set to sleep first if not in eval mode
         vllm_generation.finish_generation()
 
-        print("Creating HF policy...")
-        hf_config = get_basic_hf_test_config(enable_dtensor=enable_dtensor)
-        lm_policy = Policy(cluster, hf_config, tokenizer)
+        print("Creating DTensor policy...")
+        dtensor_config = basic_dtensor_test_config
+        lm_policy = Policy(cluster, dtensor_config, tokenizer)
 
         print("refitting vllm policy...")
         refit_policy_generation(
@@ -1201,9 +1195,9 @@ async def test_vllm_refit_non_collocated_update_weights(
         )
 
     # Create Policy on its own cluster
-    hf_config = get_basic_hf_test_config(enable_dtensor=True)
-    hf_config["generation"]["colocated"]["enabled"] = False
-    lm_policy = Policy(policy_cluster_separate, hf_config, tokenizer)
+    dtensor_config = deepcopy(basic_dtensor_test_config)
+    dtensor_config["generation"]["colocated"]["enabled"] = False
+    lm_policy = Policy(policy_cluster_separate, dtensor_config, tokenizer)
 
     # Create VllmGeneration policy on its own cluster
     vllm_config = deepcopy(basic_vllm_test_config)
diff --git a/tests/unit/models/policy/test_fsdp1_worker.py b/tests/unit/models/policy/test_fsdp1_worker.py
deleted file mode 100644
index ab0fbb647c..0000000000
--- a/tests/unit/models/policy/test_fsdp1_worker.py
+++ /dev/null
@@ -1,879 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import pprint
-from copy import deepcopy
-
-import pytest
-import ray
-import torch
-
-from nemo_rl.algorithms.interfaces import LossFunction
-from nemo_rl.algorithms.loss_functions import ClippedPGLossFn, NLLLoss
-from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.distributed.batched_data_dict import BatchedDataDict
-from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
-from nemo_rl.models.generation import configure_generation_config
-from nemo_rl.models.policy import PolicyConfig
-from nemo_rl.models.policy.lm_policy import Policy
-from tests.unit.test_utils import SimpleLoss, SimpleNLLLoss
-
-basic_llama_test_config: PolicyConfig = {
-    "model_name": "Qwen/Qwen3-0.6B",
-    "tokenizer": {
-        "name": "Qwen/Qwen3-0.6B",
-    },
-    "generation_batch_size": 1,  # Small batch size for testing
-    "train_global_batch_size": 4,
-    "train_micro_batch_size": 1,
-    "learning_rate": 5e-6,
-    "logprob_batch_size": 1,
-    "precision": "float32",
-    "fsdp_offload_enabled": False,
-    "activation_checkpointing_enabled": False,
-    "generation": {
-        "backend": "hf",
-        "temperature": 1.0,
-        "max_new_tokens": 16,  # Small number of tokens for testing
-        "top_p": 1.0,
-        "top_k": None,
-        "stop_token_ids": None,
-        "stop_strings": None,
-    },
-    "dtensor_cfg": {
-        "enabled": False,
-        "cpu_offload": False,
-        "sequence_parallel": False,
-        "activation_checkpointing": False,
-        "tensor_parallel_size": 1,
-        "context_parallel_size": 1,
-        "custom_parallel_plan": None,
-    },
-    "dynamic_batching": {
-        "enabled": False,
-    },
-    "optimizer": {
-        "name": "torch.optim.AdamW",
-        "kwargs": {
-            "lr": 5e-6,
-            "weight_decay": 0.01,
-            "betas": [0.9, 0.999],
-            "eps": 1e-8,
-        },
-    },
-    "scheduler": {
-        "name": "torch.optim.lr_scheduler.CosineAnnealingLR",
-        "kwargs": {
-            "T_max": 100,
-        },
-    },
-    "max_grad_norm": 1.0,
-}
-
-
-@pytest.fixture(scope="module", autouse=True)
-def skip_tied_weight_check_for_all():
-    """Automatically skip tied weight check for all tests in this module."""
-    os.environ["NRL_SKIP_TIED_WEIGHT_CHECK"] = "1"
-
-    yield
-
-    # Restore the original value
-    os.environ.pop("NRL_SKIP_TIED_WEIGHT_CHECK", None)
-
-
-@pytest.fixture(scope="function")
-def gc_collect():
-    """Helper function to force garbage collection after a test"""
-    import gc
-
-    gc.collect()
-
-
-@pytest.fixture(scope="function")
-def tokenizer():
-    """Initialize tokenizer for the test model."""
-    tokenizer = get_tokenizer(basic_llama_test_config["tokenizer"])
-    return tokenizer
-
-
-@pytest.fixture(scope="function")
-def test_input_data(tokenizer):
-    """Create test input data for inference."""
-    prompts = [
-        "Write a story about a magical forest",
-        "Explain how photosynthesis works",
-        "What are the benefits of exercise?",
-        "Describe the water cycle",
-        "What is the capital of France?",
-        "Who is the president of the USA?",
-        "What is the capital of the moon?",
-        "Where is the sun?",
-    ]
-
-    expected_generations = [
-        "Write a story about a magical forest where the trees are made of stars and the ground is made of light. The",
-        "Explain how photosynthesis works in the context of the environment and the role of the sun in it.\nAnswer",
-        "What are the benefits of exercise? What are the risks of exercise? What are the benefits and risks of physical activity",
-        "Describe the water cycle and its importance in the environment.\nAnswer:\nThe **water cycle** is a",
-        "What is the capital of France? The capital of France is Paris. The answer is Paris. The answer is Paris",
-        "Who is the president of the USA? The answer is the president of the United States of America, which is the president",
-        "What is the capital of the moon? The answer is...? Let me think. I know that the moon is a",
-        "Where is the sun? Where is the moon? Where is the earth? Where is the sun in the",
-    ]
-
-    # Tokenize the prompts
-    tokenized = tokenizer(
-        prompts,
-        padding=True,
-        truncation=True,
-        max_length=64,
-        return_tensors="pt",
-        padding_side="right",
-    )
-
-    # Calculate input lengths from attention mask
-    input_lengths = tokenized["attention_mask"].sum(dim=1).to(torch.int32)
-
-    data = BatchedDataDict(
-        {
-            "input_ids": tokenized["input_ids"],
-            "input_lengths": input_lengths,
-        }
-    )
-
-    return data, prompts, expected_generations
-
-
-@pytest.fixture
-def policy_setup(tokenizer, num_gpus):
-    """Setup and teardown for policy tests - creates a virtual cluster and policy."""
-    policy = None
-    cluster = None
-
-    cluster_name = f"test-init-{num_gpus}gpu"
-    print(f"Creating virtual cluster '{cluster_name}' for {num_gpus} GPUs...")
-
-    cluster = RayVirtualCluster(
-        name=cluster_name,
-        bundle_ct_per_node_list=[num_gpus],
-        use_gpus=True,
-        num_gpus_per_node=num_gpus,
-        max_colocated_worker_groups=1,
-    )
-
-    config = basic_llama_test_config
-    config["generation"] = configure_generation_config(config["generation"], tokenizer)
-
-    print("Creating Policy...")
-    policy = Policy(cluster=cluster, config=config, tokenizer=tokenizer)
-
-    yield policy, cluster
-
-    # Clean up after the test
-    print("Cleaning up resources for test")
-    cluster.shutdown()
-    policy.worker_group.shutdown()
-
-
-@pytest.mark.timeout(180)
-@pytest.mark.parametrize("num_gpus", [1, 2], ids=["1gpu", "2gpu"])
-def test_lm_policy_init(policy_setup, num_gpus):
-    policy, cluster = policy_setup
-
-    # Verify cluster and policy were properly created
-    assert policy is not None, "Policy was not created properly"
-    assert cluster is not None, "Cluster was not created properly"
-
-    # Verify we have workers matching the GPU count
-    assert len(policy.worker_group.workers) == num_gpus, (
-        f"Should have {num_gpus} worker(s), one per GPU"
-    )
-
-    # Check workers are alive
-    worker_alive = ray.get([w.is_alive.remote() for w in policy.worker_group.workers])
-    assert all(worker_alive), f"Not all workers are alive: {worker_alive}"
-
-    # Get GPU info from both workers to verify GPU usage
-    print("\nGetting GPU information from workers...")
-    gpu_infos = ray.get([w.get_gpu_info.remote() for w in policy.worker_group.workers])
-    print("\nGPU Information:")
-    for i, info in enumerate(gpu_infos):
-        print(f"\nWorker {i} GPU Info:")
-        pprint.pprint(info)
-
-    # Check 1: Verify workers have different ranks
-    gpu_ranks = [info["rank"] for info in gpu_infos]
-    assert len(set(gpu_ranks)) == num_gpus, (
-        f"Expected {num_gpus} different ranks, got {gpu_ranks}"
-    )
-    assert set(gpu_ranks) == set(range(num_gpus)), (
-        f"Expected ranks {set(range(num_gpus))}, got {gpu_ranks}"
-    )
-
-    # Check 2: Verify workers have different local_ranks
-    local_ranks = [info["local_rank"] for info in gpu_infos]
-    assert len(set(local_ranks)) == num_gpus, (
-        f"Expected {num_gpus} different local_ranks, got {local_ranks}"
-    )
-    assert set(local_ranks) == set(range(num_gpus)), (
-        f"Expected local_ranks {set(range(num_gpus))}, got {local_ranks}"
-    )
-
-    # Check 3: Verify workers have different CUDA_VISIBLE_DEVICES
-    cuda_visible_devices = [
-        info["env_vars"].get("CUDA_VISIBLE_DEVICES") for info in gpu_infos
-    ]
-    if num_gpus > 1:
-        assert len(set(cuda_visible_devices)) == num_gpus, (
-            f"Expected different CUDA_VISIBLE_DEVICES, got {cuda_visible_devices}"
-        )
-    else:
-        assert len(set(cuda_visible_devices)) == 1, (
-            f"Expected one CUDA_VISIBLE_DEVICES for 1 GPU, got {cuda_visible_devices}"
-        )
-
-    # Check 4: Verify all workers report correct world_size
-    for info in gpu_infos:
-        assert info["world_size"] == num_gpus, (
-            f"Expected world_size={num_gpus}, got {info['world_size']}"
-        )
-        assert info["env_vars"]["WORLD_SIZE"] == str(num_gpus), (
-            f"Expected WORLD_SIZE={num_gpus}, got {info['env_vars']['WORLD_SIZE']}"
-        )
-
-    # Check 5: Verify significant GPU memory is allocated (at least 1GB) on all GPUs
-    for info in gpu_infos:
-        assert info["memory_allocated_mb"] > 1000, (
-            f"Not enough memory allocated on GPU for rank {info['rank']}: {info['memory_allocated_mb']:.2f} MB"
-        )
-
-    # Check 6: Verify model parameters are on CUDA devices for all workers
-    for info in gpu_infos:
-        param_sample = list(info["parameter_sample"].values())[0]
-        assert "cuda" in param_sample["device"], (
-            f"Parameter not on CUDA device: {param_sample['device']}"
-        )
-
-    # Check 8: Verify same model parameters are being tracked across workers
-    param_names = [list(info["parameter_sample"].keys())[0] for info in gpu_infos]
-    assert len(set(param_names)) == 1, (
-        f"Workers are not tracking the same parameter: {param_names}"
-    )
-
-    # Check 9: Both workers should see their device as cuda:0 (correct distributed behavior)
-    for info in gpu_infos:
-        param_device = list(info["parameter_sample"].values())[0]["device"]
-        assert param_device == "cuda:0", (
-            f"Expected parameter device to be cuda:0, got {param_device}"
-        )
-
-
-@pytest.fixture
-def training_setup(tokenizer, request, num_gpus):
-    """
-    Setup and teardown specifically for training tests.
-
-    When used without parameterization, uses the default config.
-    When parameterized, takes any config updates as a dictionary in request.param
-    and applies them to the basic config.
-    """
-    policy = None
-    cluster = None
-    data = None
-    loss_fn = None
-
-    # Get config updates from request.param if available
-    config_updates = {}
-    config_suffix = ""
-    if hasattr(request, "param") and request.param is not None:
-        config_updates = request.param
-        config_suffix = "-" + "-".join([f"{k}={v}" for k, v in config_updates.items()])
-
-    try:
-        # Create resources with unique name
-        cluster_name = f"test-train-{num_gpus}gpu{config_suffix}"
-        print(
-            f"Creating training virtual cluster '{cluster_name}' for {num_gpus} GPUs"
-            f"{' with config updates: ' + str(config_updates) if config_updates else ''}"
-        )
-
-        cluster = RayVirtualCluster(
-            name=cluster_name,
-            bundle_ct_per_node_list=[num_gpus],
-            use_gpus=True,
-            num_gpus_per_node=num_gpus,
-            max_colocated_worker_groups=1,
-        )
-
-        # Create a config with optional modifications
-        config = deepcopy(basic_llama_test_config)
-        if config_updates:
-            config.update(config_updates)
-
-        print("Creating training Policy...")
-        policy = Policy(
-            cluster=cluster,
-            config=config,
-            init_reference_model=False,
-            tokenizer=tokenizer,
-        )
-
-        # Create a test batch
-        print("Creating test batch...")
-        # set random seed
-        torch.manual_seed(42)
-
-        # Create test input_ids and attention_mask
-        input_ids = torch.randint(0, 32000, (8, 128))  # 8 sequences, each of length 128
-        attention_mask = torch.ones(8, 128)
-
-        # Calculate input_lengths (all sequences are full length in this test)
-        input_lengths = attention_mask.sum(dim=1).to(torch.int32)
-
-        data = BatchedDataDict(
-            {
-                "input_ids": input_ids,
-                "input_lengths": input_lengths,
-                "attention_mask": attention_mask,  # Keep for compatibility with loss functions
-                "labels": torch.randint(0, 32000, (8, 128)),
-                "sample_mask": torch.ones(8),
-            }
-        )
-
-        # Create loss function
-        loss_fn: LossFunction = SimpleLoss()
-
-        # Provide the resources to the test
-        yield policy, cluster, data, loss_fn
-
-    except Exception as e:
-        print(f"Error during training setup: {e}")
-        pytest.skip(f"Training setup failed: {e}")
-    finally:
-        # Clean up after the test
-        print("Cleaning up resources for test")
-        policy.worker_group.shutdown()
-        cluster.shutdown()
-
-
-def get_max_gpu_utilization(policy):
-    max_memory_allocated = 0
-    max_memory_reserved = 0
-    gpu_infos = ray.get([w.get_gpu_info.remote() for w in policy.worker_group.workers])
-    for info in gpu_infos:
-        max_memory_allocated = max(max_memory_allocated, info["memory_allocated_mb"])
-        max_memory_reserved = max(max_memory_reserved, info["memory_reserved_mb"])
-    return max_memory_allocated, max_memory_reserved
-
-
-@pytest.mark.timeout(180)
-@pytest.mark.parametrize(
-    "num_gpus, training_setup, config_name",
-    [
-        (1, None, "default"),
-        (2, None, "default"),
-        (2, {"fsdp_offload_enabled": True}, "fsdp_offload"),
-        (2, {"activation_checkpointing_enabled": True}, "activation_checkpointing"),
-    ],
-    indirect=["training_setup"],
-    ids=[
-        "1gpu_default",
-        "2gpu_default",
-        "2gpu_fsdp_offload",
-        "2gpu_activation_checkpointing",
-    ],
-)
-def test_lm_policy_training(training_setup, tracker, num_gpus, config_name):
-    def verify_loss_tensor(loss_tensor):
-        assert not torch.isnan(loss_tensor).any(), "Loss should not be NaN"
-        assert not torch.isinf(loss_tensor).any(), "Loss should not be Inf"
-        return loss_tensor
-
-    policy, cluster, data, loss_fn = training_setup
-
-    # Verify resources were created properly
-    assert policy is not None, "Training policy was not created properly"
-    assert cluster is not None, "Training cluster was not created properly"
-    assert data is not None, "Test data was not created properly"
-    assert loss_fn is not None, "Loss function was not created properly"
-
-    # Call prepare_for_training if available
-    print(
-        f"\nPreparing for training with {num_gpus} GPU(s) and {config_name} config..."
-    )
-    policy.prepare_for_training()
-
-    losses = []
-    for steps in range(4):
-        results = policy.train(data, loss_fn)
-
-        # Verify results
-        assert "loss" in results, "Training results should contain 'loss'"
-        loss_tensor = results["loss"]
-        verify_loss_tensor(loss_tensor)
-        losses.append(loss_tensor[-1].item())
-
-        print(
-            f"Training loss with {num_gpus} GPU(s) and {config_name} config: {results['loss']}"
-        )
-
-    policy.finish_training()
-    assert losses[0] > losses[-1], "Loss should decrease over training iterations"
-
-    after_training_mem_allocated, after_training_mem_reserved = get_max_gpu_utilization(
-        policy
-    )
-    print(
-        f"Max GPU Utilization after training with {num_gpus} GPU(s) and {config_name} config: {after_training_mem_allocated:,.1f} MB allocated, "
-        f"{after_training_mem_reserved:,.1f} MB reserved"
-    )
-    tracker.track(
-        f"{num_gpus}gpu_{config_name}_after_training_mem_allocated",
-        after_training_mem_allocated,
-    )
-    tracker.track(
-        f"{num_gpus}gpu_{config_name}_after_training_mem_reserved",
-        after_training_mem_reserved,
-    )
-
-    policy.offload_after_refit()
-    after_offload_mem_allocated, after_offload_mem_reserved = get_max_gpu_utilization(
-        policy
-    )
-    print(
-        f"Max GPU Utilization after offload with {num_gpus} GPU(s) and {config_name} config: {after_offload_mem_allocated:,.1f} MB allocated, "
-        f"{after_offload_mem_reserved:,.1f} MB reserved"
-    )
-    tracker.track(
-        f"{num_gpus}gpu_{config_name}_after_offload_mem_allocated",
-        after_offload_mem_allocated,
-    )
-    tracker.track(
-        f"{num_gpus}gpu_{config_name}_after_offload_mem_reserved",
-        after_offload_mem_reserved,
-    )
-
-    # Compare memory after offload to memory after training
-    if config_name == "fsdp_offload":
-        # With FSDP offload, memory usage after training should already be low
-        assert after_training_mem_allocated < 1_200, (
-            "FSDP offload after training should be less than 1.2GB)"
-        )
-    else:
-        assert after_training_mem_allocated > 5_000, (
-            f"Memory after training with {config_name} config should be more than 5GB"
-        )
-
-    assert after_offload_mem_allocated < 1_200, (
-        "Memory after offload should be less than 1.2GB"
-    )
-
-
-@pytest.fixture
-def generation_setup(request, test_input_data, tokenizer, num_gpus):
-    """Setup and teardown specifically for generation tests."""
-    policy = None
-    cluster = None
-    data = None
-    init_reference_model = request.param
-
-    try:
-        # Create resources with unique name
-        cluster_name = f"test-gen-{num_gpus}gpu-ref{init_reference_model}"
-        print(
-            f"Creating generation virtual cluster '{cluster_name}' for {num_gpus} GPUs "
-            f"(ref_model={init_reference_model})..."
-        )
-
-        cluster = RayVirtualCluster(
-            name=cluster_name,
-            bundle_ct_per_node_list=[num_gpus],
-            use_gpus=True,
-            num_gpus_per_node=num_gpus,
-            max_colocated_worker_groups=1,
-        )
-
-        config = basic_llama_test_config
-        config["generation"] = configure_generation_config(
-            config["generation"], tokenizer
-        )
-
-        print("Creating generation Policy...")
-        policy = Policy(
-            cluster=cluster,
-            config=config,
-            tokenizer=tokenizer,
-            init_reference_model=request.param,
-        )
-
-        # Create a test batch
-        print("Creating test batch...")
-        torch.manual_seed(42)  # For reproducibility
-
-        # Prepare test data
-        data, prompts, expected_generations = test_input_data
-
-        # Provide the resources to the test
-        yield policy, cluster, data, prompts, expected_generations
-
-    except Exception as e:
-        print(f"Error during generation setup: {e}")
-        pytest.skip(f"Generation setup failed: {e}")
-    finally:
-        # Clean up after the test
-        print("Cleaning up resources for test")
-        policy.worker_group.shutdown()
-        cluster.shutdown()
-
-
-@pytest.mark.timeout(180)
-@pytest.mark.parametrize("num_gpus", [1, 2], ids=["1gpu", "2gpu"])
-@pytest.mark.parametrize("generation_setup", [False], indirect=True)
-def test_lm_policy_generation(generation_setup, tokenizer, num_gpus, tracker):
-    policy, cluster, data, prompts, expected_generations = generation_setup
-
-    # Verify resources were created properly
-    assert policy is not None, "Generation policy was not created properly"
-    assert cluster is not None, "Generation cluster was not created properly"
-    assert data is not None, "Test data was not created properly"
-
-    # Call prepare_for_generation if available
-    print("Preparing for generation...")
-    policy.prepare_for_generation()
-
-    # Generate text
-    print("Generating text...")
-    results = policy.generate(data, greedy=True)
-
-    # Verify results
-    assert "output_ids" in results, "Generation results should contain 'output_ids'"
-    output_ids = results["output_ids"]
-
-    # run logprob calculation manually to verify
-    fprop_logprob_data = BatchedDataDict(
-        {
-            "input_ids": results.get("output_ids"),
-            "input_lengths": results.get("unpadded_sequence_lengths"),
-        }
-    )
-    fprop_results = policy.get_logprobs(fprop_logprob_data)
-    for i, length in enumerate(data["input_lengths"]):
-        fprop_results["logprobs"][i, :length] = 0
-
-    for i, valid_seq_len in enumerate(results["unpadded_sequence_lengths"]):
-        fprop_results["logprobs"][i, valid_seq_len:] = 0
-
-    # Basic validation of output shape and content
-    assert isinstance(output_ids, torch.Tensor), "Output should be a tensor"
-    assert output_ids.dim() == 2, (
-        "Output should be 2-dimensional [batch_size, seq_length]"
-    )
-    assert output_ids.size(0) == data.get("input_ids").size(0), (
-        "Output batch size should match input"
-    )
-    assert output_ids.size(1) > data.get("input_ids").size(1), (
-        "Output should be longer than input"
-    )
-
-    # validate that the logprobs are correct
-    avg_prob_mult_error = torch.mean(
-        torch.exp(torch.abs(results["logprobs"] - fprop_results["logprobs"]))
-    )
-    print(f"avg prob mult error: {avg_prob_mult_error}")
-    tracker.track(f"avg_prob_mult_error_{num_gpus}gpu", float(avg_prob_mult_error))
-    assert avg_prob_mult_error <= 1.025
-
-    # get logprobs for the expected generations
-    expected_tokenized = tokenizer(
-        expected_generations,
-        padding=True,
-        truncation=True,
-        max_length=64,
-        return_tensors="pt",
-        padding_side="right",
-    )
-
-    # Calculate input_lengths for expected generations
-    expected_lengths = expected_tokenized["attention_mask"].sum(dim=1).to(torch.int32)
-
-    expected_data = BatchedDataDict(
-        {
-            "input_ids": expected_tokenized["input_ids"],
-            "input_lengths": expected_lengths,
-        }
-    )
-
-    expected_logprobs = policy.get_logprobs(expected_data)["logprobs"]
-    mean_lps = torch.mean(expected_logprobs * expected_tokenized["attention_mask"])
-    tracker.track(f"mean_lps_{num_gpus}gpu", float(mean_lps))
-    assert mean_lps > -1.7, "Expected logprobs should be greater than -1.7"
-    assert mean_lps < -1.4, "Expected logprobs should be less than -1.4"
-
-    # Call finish_generation if available
-    print("Finishing generation...")
-    policy.finish_generation()
-
-
-@pytest.mark.timeout(180)
-@pytest.mark.parametrize("num_gpus", [1, 2], ids=["1gpu", "2gpu"])
-@pytest.mark.parametrize("generation_setup", [True], indirect=True)
-def test_all_lm_policy_generation_lps_ref_training(generation_setup):
-    policy, cluster, data, prompts, expected_generations = generation_setup
-
-    # Verify resources were created properly
-    assert policy is not None, "Generation policy was not created properly"
-    assert cluster is not None, "Generation cluster was not created properly"
-    assert data is not None, "Test data was not created properly"
-
-    # Create reference data by generating with the model
-    print("creating some data")
-    ref_results = policy.generate(data, greedy=True)
-
-    # Create training data with reference outputs
-    token_loss_mask = torch.ones_like(ref_results["output_ids"])
-    token_loss_mask[:, : data.get("input_ids").size(1)] = 0
-
-    for idx, length in enumerate(ref_results["unpadded_sequence_lengths"]):
-        token_loss_mask[idx, length:] = 0
-
-    train_data = BatchedDataDict(
-        {
-            "input_ids": ref_results["output_ids"],
-            "input_lengths": ref_results["unpadded_sequence_lengths"],
-            "token_loss_mask": token_loss_mask,
-            "sample_mask": torch.ones(data.get("input_ids").size(0)),
-        }
-    )
-
-    fprop_logprobs = policy.get_logprobs(train_data)["logprobs"]
-
-    loss_fn: LossFunction = SimpleNLLLoss()
-
-    # Train for a few steps
-    policy.prepare_for_training()
-    losses = []
-    for step in range(8):
-        results = policy.train(train_data, loss_fn)
-
-        # Verify results
-        assert "loss" in results, "Training results should contain 'loss'"
-        loss_tensor = results["loss"]
-        assert not torch.isnan(loss_tensor).any(), "Loss should not be NaN"
-        assert not torch.isinf(loss_tensor).any(), "Loss should not be Inf"
-        losses.append(loss_tensor[-1].item())
-
-        print(f"Training loss at step {step}: {results['loss']}")
-
-    policy.finish_training()
-
-    post_train_reference_logprobs = policy.get_reference_policy_logprobs(train_data)[
-        "reference_logprobs"
-    ]
-    post_train_fprop_logprobs = policy.get_logprobs(train_data)["logprobs"]
-
-    # Verify that the reference policy logprobs match the original policy logprobs
-    assert torch.allclose(fprop_logprobs, post_train_reference_logprobs), (
-        "Logprobs from policy before training and reference policy after training should match"
-    )
-
-    # Calculate NLL before and after training
-    pre_train_nll = -torch.sum(fprop_logprobs * token_loss_mask, dim=-1)
-    post_train_nll = -torch.sum(post_train_fprop_logprobs * token_loss_mask, dim=-1)
-    print(f"Pre-training NLL: {pre_train_nll.mean().item()}")
-    print(f"Post-training NLL: {post_train_nll.mean().item()}")
-
-    # Verify that training improved the model's predictions on every sample
-    assert torch.all(post_train_nll < pre_train_nll), (
-        "Model should improve at predicting its own generations after training"
-    )
-    assert torch.all(post_train_nll < 5), (
-        "Model should improve at predicting its own generations after training"
-    )
-
-    # Verify loss decreased during training
-    assert losses[0] > losses[-1], "Loss should decrease over training iterations"
-
-
-def test_lm_policy_generation_with_stop(test_input_data, tokenizer):
-    # Create resources with unique name
-    cluster_name = "test-generate-with-stop"
-    print(f"Creating training virtual cluster '{cluster_name}'...")
-
-    cluster = RayVirtualCluster(
-        name=cluster_name,
-        bundle_ct_per_node_list=[2],  # Single node, 2 gpus
-        use_gpus=True,
-        num_gpus_per_node=2,  # Using both GPUs
-        max_colocated_worker_groups=1,  # Only one worker group
-    )
-
-    # Create separate configs for each policy
-    config = deepcopy(basic_llama_test_config)
-    config["generation"] = configure_generation_config(config["generation"], tokenizer)
-    # Add stop strings for testing
-    config["generation"]["stop_token_ids"] = [12095, 1112]  # ["ĠParis", "..."]
-    config["generation"]["stop_strings"] = ["the"]
-
-    # Ensure we can get same output
-    assert config["model_name"] == "Qwen/Qwen3-0.6B", (
-        "Model name should be Qwen/Qwen3-0.6B to get expected output"
-    )
-
-    # Create policy
-    policy = Policy(cluster=cluster, config=config, tokenizer=tokenizer)
-
-    # Call prepare_for_generation if available
-    print("Preparing for generation...")
-    policy.prepare_for_generation()
-
-    # Generate text
-    print("Generating text...")
-    data, _, _ = test_input_data
-    results = policy.generate(data, greedy=True)
-    output_ids = results["output_ids"]
-
-    # Call finish_generation if available
-    print("Finishing generation...")
-    policy.finish_generation()
-
-    # Check result
-    generated_texts = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
-    assert (
-        generated_texts
-        == [
-            "Write a story about a magical forest where the",  # trees are made of stars and the ground is made of light. The
-            "Explain how photosynthesis works in the",  # context of the environment and the role of the sun in it.\nAnswer
-            "What are the benefits of exercise? What are the",  # risks of exercise? What are the benefits and risks of physical activity
-            "Describe the water cycle and its importance in the",  # environment.\nAnswer:\nThe **water cycle** is a
-            "What is the capital of France? The capital of France is Paris",  # . The answer is Paris. The answer is Paris
-            "Who is the president of the USA? The answer is the",  # president of the United States of America, which is the president
-            "What is the capital of the moon? The answer is...",  # ? Let me think. I know that the moon is a
-            "Where is the sun? Where is the",  # moon? Where is the earth? Where is the sun in the
-        ]
-    ), "Output should be the same as the expected output"
-
-    # Clean up after the test
-    print("Cleaning up resources for test")
-    policy.worker_group.shutdown()
-    cluster.shutdown()
-
-
-@pytest.mark.timeout(180)
-@pytest.mark.parametrize("num_gpus", [2], ids=["2gpu"])
-def test_loss_independent_of_microbatch_size(num_gpus, tokenizer):
-    """Tests that changing microbatch size while keeping global batch size constant does not affect loss values."""
-
-    # Create test batch with global batch size of 8
-    global_batch_size = 8
-    seq_len = 128
-    vocab_size = 32000
-
-    # Create test input_ids and attention_mask
-    input_ids = torch.randint(0, vocab_size, (global_batch_size, seq_len))
-    attention_mask = torch.ones(global_batch_size, seq_len)
-    input_lengths = attention_mask.sum(dim=1).to(torch.int32)
-
-    # Create data dictionary
-    data = BatchedDataDict(
-        {
-            "input_ids": input_ids,
-            "input_lengths": input_lengths,
-            "attention_mask": attention_mask,
-            "token_mask": torch.triu(
-                torch.ones(global_batch_size, seq_len), diagonal=1
-            ),  ## give different examples different numbers of valid tokens
-            "sample_mask": torch.ones((global_batch_size,)),
-            "labels": torch.randint(0, vocab_size, (global_batch_size, seq_len)),
-            "num_valid_tokens_in_batch": torch.tensor(
-                [seq_len] * global_batch_size, dtype=torch.float32
-            ),
-            "advantages": torch.randn(global_batch_size, seq_len),
-            "prev_logprobs": torch.randn(global_batch_size, seq_len),
-            "reference_policy_logprobs": torch.randn(global_batch_size, seq_len),
-            "generation_logprobs": torch.randn(global_batch_size, seq_len),
-        }
-    )
-
-    # Compute loss with microbatching
-    cluster = RayVirtualCluster(
-        name=f"test-{num_gpus}gpu",
-        bundle_ct_per_node_list=[num_gpus],
-        use_gpus=True,
-        num_gpus_per_node=num_gpus,
-        max_colocated_worker_groups=1,
-    )
-
-    config = basic_llama_test_config
-
-    print("Creating training Policy...")
-    policy_mbs1 = Policy(
-        cluster=cluster,
-        config=config,
-        init_reference_model=False,
-        tokenizer=tokenizer,
-    )
-    # Test NLLLoss and ClippedPGLossFn with mbs=1
-    nll_loss_fn = NLLLoss()
-    pg_loss_fn = ClippedPGLossFn(
-        {
-            "ratio_clip_min": 0.2,
-            "ratio_clip_max": 0.2,
-            "ratio_clip_c": None,
-            "reference_policy_kl_penalty": 0.1,
-            "disable_ppo_ratio": False,
-            "use_on_policy_kl_approximation": False,
-            "use_importance_sampling_correction": False,
-            "token_level_loss": True,
-        }
-    )
-
-    # Compute loss with mbs1
-    policy_mbs1.prepare_for_training()
-    mbs1_results = policy_mbs1.train(data, nll_loss_fn)
-    mbs1_nll_loss = mbs1_results["loss"]
-
-    mbs1_results = policy_mbs1.train(data, pg_loss_fn)
-    mbs1_pg_loss = mbs1_results["loss"]
-
-    policy_mbs1.worker_group.shutdown()
-
-    # Compute loss with mbs2
-    config = basic_llama_test_config
-    config["train_micro_batch_size"] = 2
-    config["generation"] = configure_generation_config(config["generation"], tokenizer)
-
-    print("Creating training Policy...")
-    policy_mbs2 = Policy(
-        cluster=cluster,
-        config=config,
-        init_reference_model=False,
-        tokenizer=tokenizer,
-    )
-
-    # Compute loss with mbs2
-    policy_mbs2.prepare_for_training()
-    mbs2_results = policy_mbs2.train(data, nll_loss_fn)
-    mbs2_nll_loss = mbs2_results["loss"]
-
-    mbs2_results = policy_mbs2.train(data, pg_loss_fn)
-    mbs2_pg_loss = mbs1_results["loss"]
-
-    # Verify NLLLoss is independent of microbatch size
-    torch.testing.assert_close(mbs1_nll_loss, mbs2_nll_loss)
-    torch.testing.assert_close(mbs1_pg_loss, mbs2_pg_loss)
-
-    cluster.shutdown()
-    policy_mbs2.worker_group.shutdown()
diff --git a/tests/unit/utils/test_native_checkpoint.py b/tests/unit/utils/test_native_checkpoint.py
index 7df7f8543b..feca16365d 100755
--- a/tests/unit/utils/test_native_checkpoint.py
+++ b/tests/unit/utils/test_native_checkpoint.py
@@ -54,7 +54,7 @@
         },
     },
     "dtensor_cfg": {
-        "enabled": False,
+        "enabled": True,
         "cpu_offload": False,
         "sequence_parallel": False,
         "activation_checkpointing": False,

From 76b6c89ce95f34526a08fe14f5dbbff10319e075 Mon Sep 17 00:00:00 2001
From: Xuehan Xiong <xxman@google.com>
Date: Tue, 8 Jul 2025 11:52:44 -0700
Subject: [PATCH 15/59] fix: fix a answer parsing bug in MMLU-Pro. (#598)

Signed-off-by: Xuehan <xxman@google.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/evals/mmlu_pro.yaml          | 15 +++++++
 examples/prompts/mmlu_pro.txt                 |  1 +
 nemo_rl/environments/math_environment.py      | 41 ++++++++++++++++---
 .../environments/test_math_environment.py     | 32 ++++++++++-----
 4 files changed, 72 insertions(+), 17 deletions(-)
 create mode 100644 examples/configs/evals/mmlu_pro.yaml
 create mode 100644 examples/prompts/mmlu_pro.txt

diff --git a/examples/configs/evals/mmlu_pro.yaml b/examples/configs/evals/mmlu_pro.yaml
new file mode 100644
index 0000000000..bbcfecf4cb
--- /dev/null
+++ b/examples/configs/evals/mmlu_pro.yaml
@@ -0,0 +1,15 @@
+# GPQA evaluation Configuration
+defaults: "eval.yaml"
+
+generation:
+  model_name: "Qwen/Qwen2.5-7B-Instruct"
+  vllm_cfg:
+    max_model_len: 3072
+
+data:
+  prompt_file: "examples/prompts/mmlu_pro.txt"
+  dataset_name: "mmlu_pro"
+
+env:
+  math:
+    verifier_type: "english_multichoice"
diff --git a/examples/prompts/mmlu_pro.txt b/examples/prompts/mmlu_pro.txt
new file mode 100644
index 0000000000..52d47ccdc0
--- /dev/null
+++ b/examples/prompts/mmlu_pro.txt
@@ -0,0 +1 @@
+Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of A, B, C, D, E, F, G, H, I, J. Think step by step before answering.
diff --git a/nemo_rl/environments/math_environment.py b/nemo_rl/environments/math_environment.py
index 8dd5247f1c..f23896c25d 100644
--- a/nemo_rl/environments/math_environment.py
+++ b/nemo_rl/environments/math_environment.py
@@ -101,7 +101,7 @@ def verify(
 
 
 @ray.remote
-class MultichoiceVerifyWorker:
+class MultilingualMultichoiceVerifyWorker:
     def verify(
         self, pred_responses: list[str], ground_truths: list[str]
     ) -> list[float]:
@@ -133,6 +133,35 @@ def verify(
         return results
 
 
+@ray.remote
+class EnglishMultichoiceVerifyWorker:
+    def verify(
+        self, pred_responses: list[str], ground_truths: list[str]
+    ) -> list[float]:
+        """Verify the correctness of the predicted responses against the ground truth.
+
+        Args:
+            pred_responses: list[str]. The predicted responses from the LLM.
+            ground_truths: list[str]. The ground truth responses.
+
+        Returns:
+            list[float]. The rewards for each predicted response.
+        """
+        results = []
+        for response, ground_truth in zip(pred_responses, ground_truths):
+            ground_truth = answer_parsing.normalize_response(ground_truth)
+            response = answer_parsing.normalize_response(response)
+            extracted_answer = None
+            match = re.search("(?i)Answer\s*:[ \t]*([A-Z])", response)
+            if match:
+                extracted_answer = answer_parsing.normalize_extracted_answer(
+                    match.group(1)
+                )
+            score = 1.0 if extracted_answer == ground_truth else 0.0
+            results.append(score)
+        return results
+
+
 class MathEnvironmentMetadata(TypedDict):
     ground_truth: str
 
@@ -142,11 +171,11 @@ class MathEnvironment(EnvironmentInterface):
     def __init__(self, cfg: MathEnvConfig):
         self.cfg = cfg
         self.num_workers = cfg["num_workers"]
-        worker_cls = (
-            MultichoiceVerifyWorker
-            if cfg.get("verifier_type", "math") == "multichoice"
-            else HFVerifyWorker
-        )
+        worker_cls = {
+            "math": HFVerifyWorker,
+            "english_multichoice": EnglishMultichoiceVerifyWorker,
+            "multilingual_multichoice": MultilingualMultichoiceVerifyWorker,
+        }[cfg.get("verifier_type", "math")]
         self.workers = [
             worker_cls.options(  # type: ignore # (decorated with @ray.remote)
                 runtime_env={"py_executable": PY_EXECUTABLES.SYSTEM}
diff --git a/tests/unit/environments/test_math_environment.py b/tests/unit/environments/test_math_environment.py
index b254f2ef5f..4d46bbba83 100644
--- a/tests/unit/environments/test_math_environment.py
+++ b/tests/unit/environments/test_math_environment.py
@@ -43,8 +43,9 @@ def math_env():
 
 
 @pytest.fixture(scope="module")
-def multichoice_env():
+def multichoice_env(request):
     """Create a MathEnvironment actor for testing."""
+    verifier_type = request.param
     env = MathEnvironment.options(
         runtime_env={
             "py_executable": get_actor_python_env(
@@ -52,7 +53,7 @@ def multichoice_env():
             ),
             "env_vars": dict(os.environ),
         }
-    ).remote({"num_workers": 2, "verifier_type": "multichoice"})
+    ).remote({"num_workers": 2, "verifier_type": verifier_type})
     yield env
     # Clean up the actor and wait for it to be killed
     env.shutdown.remote()
@@ -88,8 +89,9 @@ def basic_test_data():
 
 
 @pytest.fixture
-def basic_multichoice_test_data():
+def multichoice_test_data(request):
     """Common test data for basic multichoice problems."""
+    answer_key = request.param
     return {
         "message_log_batch": [
             [
@@ -97,21 +99,21 @@ def basic_multichoice_test_data():
                     "role": "user",
                     "content": "Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD",
                 },
-                {"role": "assistant", "content": "\nAnswer: C"},
+                {"role": "assistant", "content": f"\n{answer_key}: C"},
             ],
             [
                 {
                     "role": "user",
                     "content": "Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD",
                 },
-                {"role": "assistant", "content": "\nAnswer: B"},
+                {"role": "assistant", "content": f"\n{answer_key}: B"},
             ],
             [
                 {
                     "role": "user",
                     "content": "Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD",
                 },
-                {"role": "assistant", "content": "\nAnswer: D"},
+                {"role": "assistant", "content": f"\n{answer_key}: D"},
             ],
         ],
         "metadata": [
@@ -202,12 +204,20 @@ def test_math_env_step_basic(math_env, basic_test_data):
     assert all(result.terminateds == 1.0), "All terminated flags should be 1.0"
 
 
-def test_multichoice_env_step_basic(multichoice_env, basic_multichoice_test_data):
+@pytest.mark.parametrize(
+    "multichoice_env, multichoice_test_data",
+    [
+        ("english_multichoice", "Answer"),
+        ("multilingual_multichoice", "答案"),
+    ],
+    indirect=True,
+)
+def test_multichoice_env_step_basic(multichoice_env, multichoice_test_data):
     """Test basic functionality of MathEnvironment step with multichoice verifier."""
     result = ray.get(
         multichoice_env.step.remote(
-            basic_multichoice_test_data["message_log_batch"],
-            basic_multichoice_test_data["metadata"],
+            multichoice_test_data["message_log_batch"],
+            multichoice_test_data["metadata"],
         )
     )
 
@@ -227,7 +237,7 @@ def test_multichoice_env_step_basic(multichoice_env, basic_multichoice_test_data
 
     # Check metadata
     assert len(result.metadata) == 3, "Should return metadata for all 3 messages"
-    assert result.metadata == basic_multichoice_test_data["metadata"], (
+    assert result.metadata == multichoice_test_data["metadata"], (
         "Metadata should be unchanged"
     )
 
@@ -236,7 +246,7 @@ def test_multichoice_env_step_basic(multichoice_env, basic_multichoice_test_data
     assert all(result.rewards[:2] == 1.0), (
         "The first two rewards should be 1.0 for correct answers"
     )
-    assert result.rewards[2] == 0.0, "The thrid  reward should be 0.0 for wrong answer"
+    assert result.rewards[2] == 0.0, "The third reward should be 0.0 for wrong answer"
     assert result.terminateds.shape == (3,), (
         "Terminated flags should be a tensor of shape (3,)"
     )

From 87112496e814b6740c3edc2ab2243b260b323072 Mon Sep 17 00:00:00 2001
From: Xuehan Xiong <xxman@google.com>
Date: Tue, 8 Jul 2025 11:52:50 -0700
Subject: [PATCH 16/59] feat: add MMMLU eval benchmark. (#596)

Signed-off-by: Xuehan <xxman@google.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 docs/guides/eval.md                    |  2 +-
 examples/configs/evals/mmlu.yaml       | 13 ++++++++++++
 examples/configs/evals/mmlu_zh_cn.yaml |  6 ++++++
 nemo_rl/data/eval_datasets/__init__.py | 18 +++++++++++-----
 nemo_rl/data/eval_datasets/mmlu.py     | 29 +++++++++++++++++++++++---
 5 files changed, 59 insertions(+), 9 deletions(-)
 create mode 100644 examples/configs/evals/mmlu.yaml
 create mode 100644 examples/configs/evals/mmlu_zh_cn.yaml

diff --git a/docs/guides/eval.md b/docs/guides/eval.md
index b4f97b8c64..250c84f189 100644
--- a/docs/guides/eval.md
+++ b/docs/guides/eval.md
@@ -92,6 +92,6 @@ score=0.1000 (3.0/30)
 - [AIME-2024](../../nemo_rl/data/eval_datasets/aime2024.py)
 - [GPQA and GPQA-diamond](../../nemo_rl/data/eval_datasets/gpqa.py)
 - [MATH and MATH-500](../../nemo_rl/data/eval_datasets/math.py)
-- [MMLU](../../nemo_rl/data/eval_datasets/mmlu.py)
+- [MMLU](../../nemo_rl/data/eval_datasets/mmlu.py): this also includes MMMLU (Multilingual MMLU), a total of 14 languages.
 - [MMLU-Pro](../../nemo_rl/data/eval_datasets/mmlu_pro.py)
 
diff --git a/examples/configs/evals/mmlu.yaml b/examples/configs/evals/mmlu.yaml
new file mode 100644
index 0000000000..179e54d9fb
--- /dev/null
+++ b/examples/configs/evals/mmlu.yaml
@@ -0,0 +1,13 @@
+# MMLU evaluation Configuration
+defaults: "eval.yaml"
+
+generation:
+  model_name: "Qwen/Qwen2.5-7B-Instruct"
+
+data:
+  prompt_file: "examples/prompts/mmlu.txt"
+  dataset_name: "mmlu"
+
+env:
+  math:
+    verifier_type: "multichoice"
diff --git a/examples/configs/evals/mmlu_zh_cn.yaml b/examples/configs/evals/mmlu_zh_cn.yaml
new file mode 100644
index 0000000000..ee3cd9bc30
--- /dev/null
+++ b/examples/configs/evals/mmlu_zh_cn.yaml
@@ -0,0 +1,6 @@
+# MMLU_ZH-CN evaluation Configuration
+defaults: "mmlu.yaml"
+
+data:
+  dataset_name: "mmlu_ZH-CN"
+
diff --git a/nemo_rl/data/eval_datasets/__init__.py b/nemo_rl/data/eval_datasets/__init__.py
index 2e5ba97974..e99a7c6af2 100644
--- a/nemo_rl/data/eval_datasets/__init__.py
+++ b/nemo_rl/data/eval_datasets/__init__.py
@@ -23,11 +23,19 @@
 def load_eval_dataset(data_config):
     """Loads evaluation dataset."""
     dataset_name = data_config["dataset_name"]
-    if dataset_name == "mmlu":
-        base_dataset = MMLUDataset(
-            prompt_file=data_config["prompt_file"],
-            system_prompt_file=data_config["system_prompt_file"],
-        )
+    if dataset_name.startswith("mmlu") and dataset_name != "mmlu_pro":
+        if dataset_name == "mmlu":
+            base_dataset = MMLUDataset(
+                prompt_file=data_config["prompt_file"],
+                system_prompt_file=data_config["system_prompt_file"],
+            )
+        else:
+            language = dataset_name.split("_")[1]
+            base_dataset = MMLUDataset(
+                language=language,
+                prompt_file=data_config["prompt_file"],
+                system_prompt_file=data_config["system_prompt_file"],
+            )
     elif dataset_name == "aime2024":
         base_dataset = AIME2024Dataset(
             prompt_file=data_config["prompt_file"],
diff --git a/nemo_rl/data/eval_datasets/mmlu.py b/nemo_rl/data/eval_datasets/mmlu.py
index f8b75d3b56..c9a373fc10 100644
--- a/nemo_rl/data/eval_datasets/mmlu.py
+++ b/nemo_rl/data/eval_datasets/mmlu.py
@@ -14,7 +14,7 @@
 
 """MMLU dataset and its variants."""
 
-from typing import Any, Optional
+from typing import Any, Literal, Optional
 
 from datasets import load_dataset
 
@@ -25,18 +25,41 @@
 class MMLUDataset:
     def __init__(
         self,
+        language: Literal[
+            "AR-XY",
+            "BN-BD",
+            "DE-DE",
+            "EN-US",
+            "ES-LA",
+            "FR-FR",
+            "HI-IN",
+            "ID-ID",
+            "IT-IT",
+            "JA-JP",
+            "KO-KR",
+            "PT-BR",
+            "ZH-CN",
+            "SW-KE",
+            "YO-NG",
+        ] = "EN-US",
         prompt_file: Optional[str] = None,
         system_prompt_file: Optional[str] = None,
     ):
+        if language != "EN-US":
+            data_files = f"https://openaipublic.blob.core.windows.net/simple-evals/mmlu_{language}.csv"
+        else:
+            data_files = (
+                "https://openaipublic.blob.core.windows.net/simple-evals/mmlu.csv"
+            )
         ds = load_dataset(
             "csv",
-            data_files="https://openaipublic.blob.core.windows.net/simple-evals/mmlu.csv",
+            data_files=data_files,
             split="train",
         )
         self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
 
         self.task_spec = TaskDataSpec(
-            task_name="MMLU",
+            task_name=f"MMLU_{language}",
             prompt_file=prompt_file,
             system_prompt_file=system_prompt_file,
         )

From 0ce6470f32972d7d61e71023d707c84a91167d2b Mon Sep 17 00:00:00 2001
From: Felipe Vieira Frujeri <ffrujeri@gmail.com>
Date: Tue, 8 Jul 2025 16:19:18 -0700
Subject: [PATCH 17/59] fix: pytest_sessionfinish hook in case there is no
 _unit_test_data. (#628)

Signed-off-by: Felipe Vieira Frujeri <ffrujeri@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 tests/unit/conftest.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 9e4d1a7800..3197fa2d57 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -199,6 +199,9 @@ def log_max_mem(self, metric_name: str):
 
 
 def pytest_sessionfinish(session, exitstatus):
+    if not hasattr(session.config, "_unit_test_data"):
+        return
+
     data = session.config._unit_test_data
     data["exit_status"] = exitstatus
     print(f"\nSaving unit test data to {UNIT_RESULTS_FILE}")

From adf167cd8d5f471b94c378d75433e704acafa15c Mon Sep 17 00:00:00 2001
From: Parth Chadha <pchadha@nvidia.com>
Date: Tue, 8 Jul 2025 16:27:19 -0700
Subject: [PATCH 18/59] fix: Don't call broadcast on dtensor  (#627)

Signed-off-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 README.md                                      | 2 +-
 nemo_rl/models/policy/dtensor_policy_worker.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 45900c78f0..2e59d3b191 100644
--- a/README.md
+++ b/README.md
@@ -216,7 +216,7 @@ HF_HOME=/path/to/hf_home huggingface-cli download Qwen/Qwen2.5-32B
 
 # Ensure HF_HOME is included in your MOUNTS
 HF_HOME=/path/to/hf_home \
-COMMAND="uv run ./examples/run_grpo_math.py --config examples/configs/grpo_math_8B.yaml policy.model_name='Qwen/Qwen2.5-32B' policy.generation.vllm_cfg.tensor_parallel_size=4 policy.max_total_sequence_length=16384 cluster.num_nodes=${NUM_ACTOR_NODES} policy.dtensor_cfg.enabled=True policy.dtensor_cfg.tensor_parallel_size=8 policy.dtensor_cfg.sequence_parallel=True policy.dtensor_cfg.activation_checkpointing=True checkpointing.checkpoint_dir='results/qwen2.5-32b' logger.wandb_enabled=True logger.wandb.name='qwen2.5-32b'" \
+COMMAND="uv run ./examples/run_grpo_math.py --config examples/configs/grpo_math_8B.yaml policy.model_name='Qwen/Qwen2.5-32B' policy.generation.vllm_cfg.tensor_parallel_size=4 policy.max_total_sequence_length=16384 cluster.num_nodes=${NUM_ACTOR_NODES} policy.dtensor_cfg.enabled=True policy.dtensor_cfg.tensor_parallel_size=8 policy.dtensor_cfg.sequence_parallel=True policy.dtensor_cfg.activation_checkpointing=True policy.dynamic_batching.train_mb_tokens=16384 policy.dynamic_batching.logprob_mb_tokens=32768 checkpointing.checkpoint_dir='results/qwen2.5-32b' logger.wandb_enabled=True logger.wandb.name='qwen2.5-32b'" \
 CONTAINER=YOUR_CONTAINER \
 MOUNTS="$PWD:$PWD" \
 sbatch \
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 379ce8ba52..5249f88f90 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -263,7 +263,7 @@ def __init__(
 
         # Manually broadcast buffers
         for _, buf in self.model.named_buffers():
-            torch.distributed.broadcast(buf, src=0)
+            torch.distributed.broadcast(to_local_if_dtensor(buf), src=0)
 
         if self.cpu_offload:
             self.model = self.move_to_device(self.model, "cpu")

From c2384bbdfdc69fff05553f05002444c8b6f4410b Mon Sep 17 00:00:00 2001
From: Parth Chadha <pchadha@nvidia.com>
Date: Tue, 8 Jul 2025 16:31:23 -0700
Subject: [PATCH 19/59] fix: Fix eval when using async engine (#626)

Signed-off-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .github/workflows/cicd-main.yml   |   1 +
 nemo_rl/evals/eval.py             |  63 +++++++++-
 nemo_rl/models/generation/vllm.py | 192 ++++++++++++++++++++++++++++--
 tests/functional/eval_async.sh    |  31 +++++
 4 files changed, 271 insertions(+), 16 deletions(-)
 create mode 100644 tests/functional/eval_async.sh

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index e3cf6cc42b..3000bd5cfe 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -230,6 +230,7 @@ jobs:
           time uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh
           time uv run --no-sync bash ./tests/functional/dpo.sh
           time uv run --no-sync bash ./tests/functional/eval.sh
+          time uv run --no-sync bash ./tests/functional/eval_async.sh
           time uv run --no-sync bash ./tests/functional/test_mcore_extra_installed_correctly.sh
         else
           echo Skipping functional tests for level ${{ needs.pre-flight.outputs.test_level }}
diff --git a/nemo_rl/evals/eval.py b/nemo_rl/evals/eval.py
index 5788e1971e..d809b98199 100644
--- a/nemo_rl/evals/eval.py
+++ b/nemo_rl/evals/eval.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import asyncio
 import os
 from typing import TypedDict
 
@@ -201,6 +202,25 @@ def run_env_eval(vllm_generation, dataloader, env, master_config):
         env: Environment that scores responses.
         master_config: Configuration settings.
     """
+    # Check if async engine is enabled and run appropriate version
+    if master_config["generation"]["vllm_cfg"]["async_engine"]:
+        asyncio.run(
+            _run_env_eval_impl(
+                vllm_generation, dataloader, env, master_config, use_async=True
+            )
+        )
+    else:
+        asyncio.run(
+            _run_env_eval_impl(
+                vllm_generation, dataloader, env, master_config, use_async=False
+            )
+        )
+
+
+async def _run_env_eval_impl(
+    vllm_generation, dataloader, env, master_config, use_async=False
+):
+    """Unified implementation for both sync and async evaluation."""
     # Extract for easier access
     generation_config = master_config["generation"]
     eval_config = master_config["eval"]
@@ -224,7 +244,7 @@ def run_env_eval(vllm_generation, dataloader, env, master_config):
 
         # generate by vllm
         inputs = BatchedDataDict({"prompts": prompts})
-        outputs = vllm_generation.generate_text(inputs)["texts"]
+        outputs = await _generate_texts(vllm_generation, inputs, use_async)
 
         # append to message_log
         for idx, output in enumerate(outputs):
@@ -253,17 +273,54 @@ def run_env_eval(vllm_generation, dataloader, env, master_config):
     vllm_generation.shutdown()
 
     # Print results
+    _print_results(
+        master_config,
+        generation_config,
+        score,
+        len(dataloader.dataset),
+        metric,
+        pass_k_value,
+        num_tests_per_prompt,
+    )
+
+
+async def _generate_texts(vllm_generation, inputs, use_async):
+    """Generate texts using either sync or async method."""
+    if use_async:
+        # Use async generation - collect all results
+        results = []
+        async for idx, result in vllm_generation.generate_text_async(inputs):
+            results.append((idx, result["texts"][0]))
+
+        # Sort by index to maintain order
+        results.sort(key=lambda x: x[0])
+        return [text for _, text in results]
+    else:
+        # Use sync generation
+        return vllm_generation.generate_text(inputs)["texts"]
+
+
+def _print_results(
+    master_config,
+    generation_config,
+    score,
+    dataset_size,
+    metric,
+    pass_k_value,
+    num_tests_per_prompt,
+):
+    """Print evaluation results."""
     dataset_name = os.path.basename(master_config["data"]["dataset_name"])
     model_name = os.path.basename(generation_config["model_name"])
     max_new_tokens = generation_config["vllm_cfg"]["max_model_len"]
     temperature = generation_config["temperature"]
     top_p = generation_config["top_p"]
     top_k = generation_config["top_k"]
-    average_score = score / len(dataloader.dataset)
+    average_score = score / dataset_size
 
     print("\n" + "=" * 60)
     print(f"{model_name=} {dataset_name=}")
     print(f"{max_new_tokens=} {temperature=} {top_p=} {top_k=}\n")
     print(f"{metric=} {pass_k_value=} {num_tests_per_prompt=}\n")
-    print(f"score={average_score:.4f} ({score}/{len(dataloader.dataset)})")
+    print(f"score={average_score:.4f} ({score}/{dataset_size})")
     print("=" * 60 + "\n")
diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index 9d7c7873e9..7a1be51c19 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -799,6 +799,12 @@ def generate_text(
             BatchedDataDict containing:
                 - texts: List of generated text responses
         """
+        # Check if async engine is enabled
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "generate_text cannot be used with async_engine=True. Use generate_text_async instead."
+            )
+
         # Extract stop_strings if provided, else use default from config
         batch_stop_strings: list[list[str] | None] = data.get(
             "stop_strings", [self.cfg.get("stop_strings")] * len(data["prompts"])
@@ -843,6 +849,107 @@ def generate_text(
         )
         return return_data
 
+    async def generate_text_async(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
+        """Generate text responses asynchronously, yielding results as they are ready.
+
+        Args:
+            data: BatchedDataDict containing prompts with text strings
+            greedy: Whether to use greedy decoding instead of sampling
+
+        Yields:
+            Tuple of (original_index, BatchedDataDict containing single text response)
+        """
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "generate_text_async can only be used when async_engine is enabled in vLLM config."
+            )
+
+        # Handle empty input case
+        if len(data["prompts"]) == 0:
+            return
+
+        prompts = data["prompts"]
+        batch_size = len(prompts)
+
+        # Extract stop_strings if provided, else use default from config
+        batch_stop_strings: list[list[str] | None] = data.get(
+            "stop_strings", [self.cfg.get("stop_strings")] * batch_size
+        )
+
+        # Create tasks for each prompt
+        async def process_single_prompt(prompt_idx):
+            """Process a single prompt and return the result."""
+            prompt = prompts[prompt_idx]
+
+            # Get stop strings for this specific prompt
+            per_prompt_stop_strings = None
+            if batch_stop_strings and prompt_idx < len(batch_stop_strings):
+                per_prompt_stop_strings = batch_stop_strings[prompt_idx]
+
+            # Merge stop strings
+            final_stop_strings = self._merge_stop_strings(
+                [per_prompt_stop_strings] if per_prompt_stop_strings else None
+            )
+
+            # Create sampling parameters
+            top_k = self.cfg["top_k"] if self.cfg["top_k"] is not None else -1
+            sampling_params = self.SamplingParams(
+                temperature=self.cfg["temperature"] if not greedy else 0,
+                top_p=self.cfg["top_p"],
+                top_k=top_k if not greedy else 1,
+                max_tokens=self.cfg["max_new_tokens"],
+                stop_token_ids=self.cfg["stop_token_ids"],
+                stop=final_stop_strings,
+                include_stop_str_in_output=True,  # returning stop strings like hf
+            )
+
+            request_id = str(uuid.uuid4())
+
+            # Generate using vLLM async engine
+            vllm_request_generator = self.llm.generate(
+                prompt=prompt,
+                sampling_params=sampling_params,
+                request_id=request_id,
+            )
+
+            # Get the final result from the generator
+            final_request_output = None
+            async for req_output in vllm_request_generator:
+                final_request_output = req_output
+
+            if final_request_output is None:
+                raise RuntimeError(f"No output received for request {request_id}")
+
+            # Extract the generated text
+            generated_text = final_request_output.outputs[0].text
+
+            # Create result in BatchedDataDict format
+            result_batch = BatchedDataDict[GenerationOutputSpec](
+                {"texts": [generated_text]}
+            )
+
+            return (prompt_idx, result_batch)
+
+        # Create tasks for all prompts and yield results as they complete
+        prompt_tasks = [
+            asyncio.create_task(process_single_prompt(i)) for i in range(batch_size)
+        ]
+
+        # Yield results as they become available
+        for completed_task in asyncio.as_completed(prompt_tasks):
+            try:
+                result = await completed_task
+                yield result
+            except Exception as e:
+                # Cancel remaining tasks
+                for task in prompt_tasks:
+                    if not task.done():
+                        task.cancel()
+                await asyncio.gather(*prompt_tasks, return_exceptions=True)
+                raise e
+
     def shutdown(self) -> bool:
         """Clean up vLLM resources."""
         try:
@@ -1482,6 +1589,12 @@ def generate_text(
             f"data must be a BatchedDataDict, got type: {type(data)}"
         )
 
+        # Check if async engine is enabled
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "generate_text cannot be used with async_engine=True. Use generate_text_async instead."
+            )
+
         # Shard the data across the tied worker groups
         dp_size = self.sharding_annotations.get_axis_size("data_parallel")
         sharded_data: list[SlicedDataDict] = data.shard_by_batch_size(
@@ -1514,28 +1627,35 @@ def generate_text(
 
         return combined
 
-    async def generate_async(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    async def _async_generate_base(
+        self,
+        data: BatchedDataDict[GenerationDatumSpec],
+        method_name: str,
+        data_validation_fn,
+        greedy: bool = False,
     ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
-        """Generate responses asynchronously, yielding individual samples as they complete.
+        """Base async generation method that handles common worker management logic.
 
-        This method provides per-sample streaming across all workers, yielding each
-        sample result as soon as it's ready, regardless of which worker processed it.
+        Args:
+            data: Input data for generation
+            method_name: Name of the worker method to call ('generate_async' or 'generate_text_async')
+            data_validation_fn: Function to validate input data
+            greedy: Whether to use greedy decoding
+
+        Yields:
+            Tuple of (original_index, BatchedDataDict containing generation result)
         """
         if not self.cfg["vllm_cfg"]["async_engine"]:
             raise RuntimeError(
-                "generate_async can only be used when async_engine is enabled in VllmConfig."
+                f"{method_name} can only be used when async_engine is enabled in vLLM config."
             )
 
         assert isinstance(data, BatchedDataDict), (
             f"data must be a BatchedDataDict, got type: {type(data)}"
         )
-        assert "input_ids" in data and "input_lengths" in data, (
-            "input_ids and input_lengths are required in data for vLLM generation"
-        )
 
-        # Handle empty input case
-        if len(data["input_ids"]) == 0:
+        # Validate input data and handle empty case
+        if not data_validation_fn(data):
             return
 
         # Determine the leader worker for the current data parallel shard
@@ -1543,9 +1663,9 @@ async def generate_async(
             self.current_generate_dp_shard_idx
         )
 
-        # Run the generate_async method on the selected leader worker. This returns an ObjectRefGenerator.
+        # Run the async method on the selected leader worker
         worker_gen_proxy = self.worker_group.run_single_worker_single_data(
-            method_name="generate_async",
+            method_name=method_name,
             worker_idx=leader_worker_idx,
             data=data,
             greedy=greedy,
@@ -1629,6 +1749,52 @@ async def consume_worker_generator(worker_idx, worker_gen):
             f"Worker task {leader_worker_idx} should be done but isn't"
         )
 
+    async def generate_text_async(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
+        """Generate text responses asynchronously, yielding results as they are ready.
+
+        Args:
+            data: BatchedDataDict containing prompts with text strings
+            greedy: Whether to use greedy decoding instead of sampling
+
+        Yields:
+            Tuple of (original_index, BatchedDataDict containing single text response)
+        """
+
+        def validate_text_data(data):
+            if len(data["prompts"]) == 0:
+                return False  # Return False for empty case to trigger early return
+            return True
+
+        async for result in self._async_generate_base(
+            data, "generate_text_async", validate_text_data, greedy
+        ):
+            yield result
+
+    async def generate_async(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
+        """Generate responses asynchronously, yielding individual samples as they complete.
+
+        This method provides per-sample streaming across all workers, yielding each
+        sample result as soon as it's ready, regardless of which worker processed it.
+        """
+
+        def validate_generate_data(data):
+            if "input_ids" not in data or "input_lengths" not in data:
+                raise AssertionError(
+                    "input_ids and input_lengths are required in data for vLLM generation"
+                )
+            if len(data["input_ids"]) == 0:
+                return False  # Return False for empty case to trigger early return
+            return True
+
+        async for result in self._async_generate_base(
+            data, "generate_async", validate_generate_data, greedy
+        ):
+            yield result
+
     def prepare_for_generation(self, *args: Any, **kwargs: Any) -> bool:
         """Wake workers up for colocated inference."""
         # non-colocated no need to wake up
diff --git a/tests/functional/eval_async.sh b/tests/functional/eval_async.sh
new file mode 100644
index 0000000000..55a89ef012
--- /dev/null
+++ b/tests/functional/eval_async.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run $PROJECT_ROOT/examples/run_eval.py \
+    cluster.gpus_per_node=2 \
+    generation.vllm_cfg.async_engine=True \
+    generation.vllm_cfg.pipeline_parallel_size=2 \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+cat $RUN_LOG | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+  'data["score"] == 0.1' \

From 299bf0cfa2bd6eb50671a6b30bb6af58ecd50f56 Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Tue, 8 Jul 2025 21:37:29 -0700
Subject: [PATCH 20/59] feat: Megatron MoE Support (#590)

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
Signed-off-by: Anna Shors <ashors@nvidia.com>
Co-authored-by: Guyue Huang <guyueh@nvidia.com>
Co-authored-by: Anna Shors <ashors@nvidia.com>
Co-authored-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 3rdparty/NeMo-workspace/NeMo                  |   2 +-
 examples/configs/dpo.yaml                     |   6 +
 examples/configs/grpo_math_1B_megatron.yaml   |   6 +
 ...po-llama3.1-8b-instruct-4n8g-megatron.yaml |   6 +
 ...8b-instruct-4n8g-megatrontp2pp2-quick.yaml |   6 +
 ...ft-llama3.1-8b-instruct-1n8g-megatron.yaml |   6 +
 examples/configs/sft.yaml                     |   6 +
 nemo_rl/algorithms/grpo.py                    |   4 +
 nemo_rl/models/generation/vllm_backend.py     |   1 -
 .../models/megatron/converters/__init__.py    |   6 +-
 nemo_rl/models/megatron/converters/common.py  |  65 +++++-
 nemo_rl/models/megatron/refit_utils.py        |  91 ++++----
 .../models/policy/megatron_policy_worker.py   | 194 +++++++++++++++---
 .../models/generation/test_vllm_generation.py |   6 +
 .../models/policy/test_megatron_worker.py     |   6 +
 15 files changed, 338 insertions(+), 73 deletions(-)

diff --git a/3rdparty/NeMo-workspace/NeMo b/3rdparty/NeMo-workspace/NeMo
index 4b7ded58d8..0e0894300e 160000
--- a/3rdparty/NeMo-workspace/NeMo
+++ b/3rdparty/NeMo-workspace/NeMo
@@ -1 +1 @@
-Subproject commit 4b7ded58d804bf3470499c6cfa385c6fa915879d
+Subproject commit 0e0894300e09aca042bc07859f660f22858f0a9f
diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
index 25f5b59ef2..110729a966 100755
--- a/examples/configs/dpo.yaml
+++ b/examples/configs/dpo.yaml
@@ -92,12 +92,18 @@ policy:
     empty_unused_memory_level: 1
     activation_checkpointing: false
     tensor_model_parallel_size: 2
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
     pipeline_model_parallel_size: 1
     context_parallel_size: 1
     pipeline_dtype: ${policy.precision}
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: true
+    freeze_moe_router: false
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "aux_loss"
+    moe_router_bias_update_rate: 1e-3
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
     
diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index e9bcbf20b8..e6dbc8f18e 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -73,12 +73,18 @@ policy:
     activation_checkpointing: false
     converter_type: "Qwen2ForCausalLM"
     tensor_model_parallel_size: 1
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
     pipeline_model_parallel_size: 1
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     context_parallel_size: 1
     pipeline_dtype: ${policy.precision}
     sequence_parallel: false
+    freeze_moe_router: true
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
+    moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
     
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
index 5b2b073691..6139ce6788 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
@@ -49,12 +49,18 @@ policy:
     empty_unused_memory_level: 1
     activation_checkpointing: false
     tensor_model_parallel_size: 2
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
     pipeline_model_parallel_size: 1
     context_parallel_size: 1
     pipeline_dtype: ${policy.precision}
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: true
+    freeze_moe_router: false
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "aux_loss"
+    moe_router_bias_update_rate: 1e-3
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
     
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
index 9388e8ed6f..733cce01da 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
@@ -49,12 +49,18 @@ policy:
     empty_unused_memory_level: 1
     activation_checkpointing: false
     tensor_model_parallel_size: 2
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
     pipeline_model_parallel_size: 2
     context_parallel_size: 1
     pipeline_dtype: ${policy.precision}
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: true
+    freeze_moe_router: false
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "aux_loss"
+    moe_router_bias_update_rate: 1e-3
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
     
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
index 68f0d177cd..585ea7cafa 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
@@ -37,12 +37,18 @@ policy:
     empty_unused_memory_level: 1
     activation_checkpointing: false
     tensor_model_parallel_size: 2
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
     pipeline_model_parallel_size: 2
     context_parallel_size: 1
     pipeline_dtype: ${policy.precision}
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: false
+    freeze_moe_router: false
+    moe_router_dtype: null
+    moe_router_load_balancing_type: "aux_loss"
+    moe_router_bias_update_rate: 1e-3
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
     
diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
index 4127411af6..01c9c2c452 100644
--- a/examples/configs/sft.yaml
+++ b/examples/configs/sft.yaml
@@ -67,12 +67,18 @@ policy:
     empty_unused_memory_level: 1
     activation_checkpointing: false
     tensor_model_parallel_size: 2
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
     pipeline_model_parallel_size: 2
     context_parallel_size: 1
     pipeline_dtype: ${policy.precision}
     num_layers_in_first_pipeline_stage: null
     num_layers_in_last_pipeline_stage: null
     sequence_parallel: false
+    freeze_moe_router: false
+    moe_router_dtype: null
+    moe_router_load_balancing_type: "aux_loss"
+    moe_router_bias_update_rate: 1e-3
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True   
 
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
index 29b08f7bdb..29e3ba2773 100644
--- a/nemo_rl/algorithms/grpo.py
+++ b/nemo_rl/algorithms/grpo.py
@@ -428,6 +428,10 @@ def refit_policy_generation(
         grouped_param_keys = policy.prepare_weights_for_ipc(
             _refit_buffer_size_gb=_refit_buffer_size_gb
         )
+        total_num_keys = sum(len(k) for k in grouped_param_keys)
+        print(
+            f"[Refit] Split {total_num_keys} keys into {len(grouped_param_keys)} groups"
+        )
         # do update
         for keys in grouped_param_keys:
             ipc_handles = policy.get_weights_ipc_handles(keys)
diff --git a/nemo_rl/models/generation/vllm_backend.py b/nemo_rl/models/generation/vllm_backend.py
index 8fe204e21f..8aaa34cd15 100644
--- a/nemo_rl/models/generation/vllm_backend.py
+++ b/nemo_rl/models/generation/vllm_backend.py
@@ -96,7 +96,6 @@ def update_weights_from_ipc_handles(self, ipc_handles):
 
             # Load weights into the model
             self.model_runner.model.load_weights(weights=weights)
-            torch.cuda.synchronize()
             return True
         except Exception as e:
             print(
diff --git a/nemo_rl/models/megatron/converters/__init__.py b/nemo_rl/models/megatron/converters/__init__.py
index 35e6293153..3563de4959 100644
--- a/nemo_rl/models/megatron/converters/__init__.py
+++ b/nemo_rl/models/megatron/converters/__init__.py
@@ -13,11 +13,15 @@
 # limitations under the License.
 
 from .common import (
+    get_global_expert_num,
     get_global_layer_num,
+    get_local_expert_num,
     get_local_layer_num,
 )
 
 __all__ = [
-    "get_local_layer_num",
+    "get_global_expert_num",
     "get_global_layer_num",
+    "get_local_expert_num",
+    "get_local_layer_num",
 ]
diff --git a/nemo_rl/models/megatron/converters/common.py b/nemo_rl/models/megatron/converters/common.py
index c6591645f1..d8999fc014 100644
--- a/nemo_rl/models/megatron/converters/common.py
+++ b/nemo_rl/models/megatron/converters/common.py
@@ -45,6 +45,15 @@ def get_local_layer_num(s):
     return number
 
 
+def get_local_expert_num(s):
+    """Assumes experts have 'experts.' in their name. Expert num succeeds '.weight'."""
+    segments = s.split(".")
+    if "experts" not in segments or segments[-1] == "_extra_state":
+        return None
+    number = int(segments[-1].strip("weight"))
+    return number
+
+
 def get_global_layer_num(s, cfg):
     """Assumes layer number is preceeded by 'layers.'.
 
@@ -79,6 +88,23 @@ def get_global_layer_num(s, cfg):
     return global_offset + local_layer_num
 
 
+def get_global_expert_num(s, cfg):
+    """Assumes experts have 'experts.' in their name. Expert num succeeds '.weight'.
+
+    Assumes expert model parallel size is set.
+    In the state dict, the expert number is the local expert number (expert local).
+    This function converts the local expert number to the global expert number.
+    """
+    local_expert_num = get_local_expert_num(s)
+    global_expert_num = (
+        parallel_state.get_expert_model_parallel_rank()
+        * cfg.num_moe_experts
+        // parallel_state.get_expert_model_parallel_world_size()
+        + local_expert_num
+    )
+    return global_expert_num
+
+
 def get_global_key_from_local_key(local_key, model_cfg):
     local_layer = get_local_layer_num(local_key)
     if local_layer is not None:
@@ -87,6 +113,11 @@ def get_global_key_from_local_key(local_key, model_cfg):
         global_key = re.sub(r"(?<=layers\.)\d+", str(global_layer), local_key, count=1)
     else:
         global_key = local_key
+    local_expert = get_local_expert_num(global_key)
+    if local_expert is not None:
+        global_expert = get_global_expert_num(global_key, model_cfg)
+        # Replace the last occurrence of the digits after "weight" with the global expert number.
+        global_key = re.sub(r"(?<=weight)\d+", str(global_expert), global_key)
     return global_key
 
 
@@ -97,7 +128,6 @@ def split_fc1_tp(ctx: TransformCTX, linear_fc1: torch.Tensor):
     # [ gate_tp1 ] --/ [  up_tp0  ] --/ (split  up)
     # [  up_tp1  ]     [  up_tp1  ]
     megatron_config = ctx.source.config
-    # TODO: handle expert_tensor_parallel_size
     tp = megatron_config.tensor_model_parallel_size
     linear_fc1 = einops.rearrange(linear_fc1, "(t c d) a1 ->  c (t d) a1", c=2, t=tp)
     mlp_gate_proj_weight = linear_fc1[0]
@@ -105,6 +135,20 @@ def split_fc1_tp(ctx: TransformCTX, linear_fc1: torch.Tensor):
     return mlp_gate_proj_weight, mlp_up_proj_weight
 
 
+def split_fc1_etp(ctx: TransformCTX, linear_fc1: torch.Tensor):
+    # gate proj and up proj are mixed right now, and we need to reshape them
+    # [ gate_tp0 ]     [ gate_tp0 ]
+    # [  up_tp0  ] --\ [ gate_tp1 ] --\ (split gate)
+    # [ gate_tp1 ] --/ [  up_tp0  ] --/ (split  up)
+    # [  up_tp1  ]     [  up_tp1  ]
+    megatron_config = ctx.source.config
+    etp = megatron_config.expert_tensor_parallel_size
+    linear_fc1 = einops.rearrange(linear_fc1, "(t c d) a1 ->  c (t d) a1", c=2, t=etp)
+    mlp_gate_proj_weight = linear_fc1[0]
+    mlp_up_proj_weight = linear_fc1[1]
+    return mlp_gate_proj_weight, mlp_up_proj_weight
+
+
 def split_qkv_gpu(ctx: TransformCTX, linear_qkv: torch.Tensor):
     """Split interleave-concatenated qkv to q, k, v.
 
@@ -177,8 +221,13 @@ def update_transforms_for_nemorl(export_transforms):
     # In place update
     for transform in export_transforms:
         if transform.transform.__name__ == "split_fc1":
-            # Need to modify this transform to take into account the TP size
-            transform.transform = split_fc1_tp
+            if (
+                "experts" in transform.source_key
+                and "shared_experts" not in transform.source_key
+            ):
+                transform.transform = split_fc1_etp
+            else:
+                transform.transform = split_fc1_tp
         elif transform.transform.__name__ == "split_qkv":
             # This transform previously moved qkv weights to cpu
             transform.transform = split_qkv_gpu
@@ -211,7 +260,15 @@ def __init__(self, hf_model_name, megatron_model):
         )
         pp_gathered_global_keys = list({k for l in pp_gathered_global_keys for k in l})  # type: ignore
 
-        global_keys = pp_gathered_global_keys
+        ep_group = parallel_state.get_expert_model_parallel_group()
+        ep_world_size = parallel_state.get_expert_model_parallel_world_size()
+        ep_gathered_global_keys = [None] * ep_world_size
+        torch.distributed.all_gather_object(
+            ep_gathered_global_keys, pp_gathered_global_keys, group=ep_group
+        )
+        ep_gathered_global_keys = list({k for l in ep_gathered_global_keys for k in l})
+
+        global_keys = ep_gathered_global_keys
         global_keys_map = {k: None for k in global_keys}
 
         if "qwen" in hf_model_name.lower():
diff --git a/nemo_rl/models/megatron/refit_utils.py b/nemo_rl/models/megatron/refit_utils.py
index b0792a6e93..fb46030ce9 100644
--- a/nemo_rl/models/megatron/refit_utils.py
+++ b/nemo_rl/models/megatron/refit_utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import re
 import time
+from typing import Dict, List
 
 import torch
 from megatron.core import parallel_state
@@ -28,8 +29,6 @@
     VocabParallelEmbedding,
 )
 
-from nemo_rl.models.megatron.converters.common import get_global_key_from_local_key
-
 
 def get_tp_dim(model, param_name, named_modules_dict):
     # pass in named_modules_dict so we can get it ahead of time instead
@@ -46,7 +45,6 @@ def get_tp_dim(model, param_name, named_modules_dict):
     key = prefix + ".".join(param_name.split(".")[:-1])
     module = named_modules_dict.get(key)
     if module is None:
-        print(f"Module {key} not found in named_modules_dict")
         return None
     if hasattr(module, "parallel_mode") and module.parallel_mode is not None:
         # TE layers sometimes have parallel_mode we can check directly
@@ -75,68 +73,85 @@ def get_tp_dim(model, param_name, named_modules_dict):
 
 
 @torch.no_grad()
-def gather_params(
-    model,
-    keys,
-):
-    st = time.time()
+def gather_params(model, keys, key_to_global_keys: Dict[str, List[str]]):
+    st = time.perf_counter()
 
     tp_group = parallel_state.get_tensor_model_parallel_group()
     tp_world_size = torch.distributed.get_world_size(tp_group)
+    etp_group = parallel_state.get_expert_tensor_parallel_group()
+    etp_world_size = torch.distributed.get_world_size(etp_group)
     pp_group = parallel_state.get_pipeline_model_parallel_group()
     pp_world_size = torch.distributed.get_world_size(pp_group)
+    pp_global_ranks = torch.distributed.get_process_group_ranks(group=pp_group)
+    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
+    ep_group = parallel_state.get_expert_model_parallel_group()
+    ep_world_size = torch.distributed.get_world_size(ep_group)
 
     named_modules_dict = dict(model.named_modules())
     state_dict = model.state_dict()
     gathered_params = {}
-    for local_key, shape, dtype in sorted(keys):
-        if local_key in state_dict:
+    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
+
+    for local_key, owner_pp_local_rank_id, shape, dtype in sorted(keys):
+        if local_key in state_dict and owner_pp_local_rank_id == pp_local_rank_id:
             param = state_dict[local_key]
 
-            # Check if param is TP-sharded
             tp_dim = get_tp_dim(model, local_key, named_modules_dict)
 
             # If the parameter is TP-sharded, gather its slices on GPU.
             if tp_dim is not None:
-                gathered_slices = [
-                    torch.empty_like(param) for _ in range(tp_world_size)
-                ]
-                torch.distributed.all_gather(gathered_slices, param, group=tp_group)
-                # TODO: why cast to torch.bfloat16 instead of param.dtype?
+                if ep_pattern.search(local_key):
+                    world_size = etp_world_size
+                    group = etp_group
+                else:
+                    world_size = tp_world_size
+                    group = tp_group
+
+                gathered_slices = [torch.empty_like(param) for _ in range(world_size)]
+                torch.distributed.all_gather(gathered_slices, param, group=group)
                 full_param = torch.cat(gathered_slices, dim=tp_dim)
             else:
-                # TODO: why do we need to clone?
                 full_param = param
-            global_key = get_global_key_from_local_key(local_key, model.config)
         else:
-            #  params that may not be on every rank, e.g. the embedding layer
-            global_key = None
             full_param = torch.empty(
                 *shape, dtype=dtype, device=torch.cuda.current_device()
             )
 
-        # gather across PP group
-        pp_gathered_global_keys = [None] * pp_world_size
-        torch.distributed.all_gather_object(
-            pp_gathered_global_keys, global_key, group=pp_group
-        )
-        # To test no gather:
-        # pp_gathered_global_keys = [global_key] * pp_world_size
-
-        pp_gathered_params = [
-            torch.empty(*shape, dtype=dtype, device=torch.cuda.current_device())
-            for _ in range(pp_world_size)
-        ]
-        torch.distributed.all_gather(pp_gathered_params, full_param, group=pp_group)
+        # Broadcast across PP group.
+        src_global_rank = pp_global_ranks[owner_pp_local_rank_id]
+
+        # Broadcast from the rank that has the parameter
+        torch.distributed.broadcast(full_param, src=src_global_rank, group=pp_group)
+        pp_gathered_params = [full_param]
+
+        # gather across EP group
+        if ep_pattern.search(local_key):
+            stacked_pp_gathered_params = torch.stack(pp_gathered_params)
+
+            ep_gathered_params = [
+                torch.empty(
+                    stacked_pp_gathered_params.shape,
+                    dtype=dtype,
+                    device=torch.cuda.current_device(),
+                )
+                for _ in range(ep_world_size)
+            ]
+            torch.distributed.all_gather(
+                ep_gathered_params, stacked_pp_gathered_params, group=ep_group
+            )
+            flat_gathered_params = [
+                x for y in ep_gathered_params for x in torch.unbind(y)
+            ]
 
-        flat_gathered_global_keys = pp_gathered_global_keys
-        flat_gathered_params = pp_gathered_params
+        else:
+            flat_gathered_params = pp_gathered_params
 
+        flat_gathered_global_keys = key_to_global_keys[
+            (local_key, owner_pp_local_rank_id)
+        ]
         for k, p in zip(flat_gathered_global_keys, flat_gathered_params):
             if k is not None:
                 gathered_params[k] = p
 
-    torch.cuda.empty_cache()
-    torch.cuda.synchronize()
-    print(f"Time taken to gather params: {time.time() - st}")
+    print(f"Time taken to gather params: {time.perf_counter() - st}")
     return gathered_params
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 368ce72dc2..778cb4b857 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -13,12 +13,13 @@
 # limitations under the License.
 import gc
 import os
+import re
 import time
 import warnings
 from collections import defaultdict
 from contextlib import AbstractContextManager, contextmanager, nullcontext
 from functools import partial
-from typing import Any, Iterator, Optional, TypeVar
+from typing import Any, Iterator, List, Optional, Tuple, TypeVar
 
 import ray
 import torch
@@ -101,6 +102,7 @@
 from nemo_rl.models.megatron.community_import import import_model_from_hf_name
 from nemo_rl.models.megatron.converters.common import (
     MegatronToHFConverter,
+    get_global_key_from_local_key,
 )
 from nemo_rl.models.megatron.refit_utils import (
     gather_params,
@@ -172,6 +174,16 @@ def setup_megatron_model(
 
     torch.distributed.barrier()
 
+    model_post_init_fns = []
+    if policy_cfg["megatron_cfg"]["freeze_moe_router"]:
+
+        def freeze_moe_router(model_module):
+            for layer in model_module.decoder.layers:
+                if hasattr(layer.mlp, "router"):
+                    layer.mlp.router.weight.requires_grad = False
+
+        model_post_init_fns.append(freeze_moe_router)
+
     # Model, optimizer, and learning rate.
     model = get_model_from_config(
         cfg.model_config,
@@ -179,6 +191,7 @@ def setup_megatron_model(
         use_torch_fsdp2=cfg.dist_config.use_torch_fsdp2,
         overlap_param_gather_with_optimizer_step=cfg.optimizer_config.overlap_param_gather_with_optimizer_step,
         data_parallel_random_init=cfg.rng_config.data_parallel_random_init,
+        model_post_init_fns=model_post_init_fns,
     )
     if load_optimizer:
         optimizer, scheduler = setup_optimizer(
@@ -191,13 +204,6 @@ def setup_megatron_model(
         optimizer = None
         scheduler = None
 
-    _update_model_config_funcs(
-        model,
-        cfg.model_config,
-        cfg.ddp_config,
-        optimizer,
-        align_grad_reduce=cfg.dist_config.align_grad_reduce,
-    )
     print("Model, optimizer, and learning rate scheduler built")
     torch.distributed.barrier()
 
@@ -443,6 +449,13 @@ def __init__(
         assert model_cfg.context_parallel_size == 1, (
             "Context parallel is not supported right now"
         )
+        model_cfg.expert_tensor_parallel_size = self.cfg["megatron_cfg"][
+            "expert_tensor_parallel_size"
+        ]
+        model_cfg.expert_model_parallel_size = self.cfg["megatron_cfg"][
+            "expert_model_parallel_size"
+        ]
+        model_cfg.sequence_parallel = self.cfg["megatron_cfg"]["sequence_parallel"]
         model_cfg.bf16 = self.dtype == torch.bfloat16
         model_cfg.fp16 = self.dtype == torch.float16
         if model_cfg.fp16:
@@ -455,6 +468,22 @@ def __init__(
             model_cfg.params_dtype = torch.float32
         model_cfg.pipeline_dtype = dtype_map[self.cfg["megatron_cfg"]["pipeline_dtype"]]
         model_cfg.parallel_output = True
+        # Setting moe_router_dtype to higher precision (e.g. fp64) can improve numerical stability,
+        # especially when using many experts.
+        model_cfg.moe_router_dtype = self.cfg["megatron_cfg"]["moe_router_dtype"]
+
+        # The below two configs (and "freeze_moe_router") are used to stabilize moe training
+        # by preventing updates to the moe router. We found that this is helpful in reducing
+        # logprob error during training.
+
+        # Set this to "none" to disable load balancing loss.
+        model_cfg.moe_router_load_balancing_type = self.cfg["megatron_cfg"][
+            "moe_router_load_balancing_type"
+        ]
+        # Set this to 0.0 to disable updates to the moe router expert bias
+        model_cfg.moe_router_bias_update_rate = self.cfg["megatron_cfg"][
+            "moe_router_bias_update_rate"
+        ]
         if self.cfg["megatron_cfg"]["activation_checkpointing"]:
             model_cfg.activations_checkpoint_granularity = "full"
             model_cfg.activations_checkpoint_method = "uniform"
@@ -612,6 +641,14 @@ def __init__(
 
             self.model = self.move_model(self.model, "cuda")
 
+        _update_model_config_funcs(
+            [self.model],
+            self.megatron_cfg.model_config,
+            self.megatron_cfg.ddp_config,
+            self.optimizer,
+            align_grad_reduce=self.megatron_cfg.dist_config.align_grad_reduce,
+        )
+
         from nemo.tron.tokenizers.tokenizer import build_tokenizer
 
         tokenizer_config = TokenizerConfig(
@@ -632,6 +669,11 @@ def __init__(
         self._held_gather_buffer = None
         self.megatron_to_hf_converter = MegatronToHFConverter(hf_model_name, self.model)
 
+        # Create a map that maps any local parameter name to a list of global parameter names.
+        # This map is repeatedly used by parameter gatherring phase during refit of every step.
+        self.local_key_to_global_keys = self.get_local_key_to_global_keys(
+            state_dict_info=self.prepare_weights_for_ipc()[0]
+        )
         self.should_disable_forward_pre_hook = (
             self.cfg["megatron_cfg"]["optimizer"]["use_distributed_optimizer"]
             and self.cfg["megatron_cfg"]["distributed_data_parallel_config"][
@@ -951,7 +993,9 @@ def collection_fn(output_tensor):
                 token_logprobs = torch.cat(
                     [torch.zeros_like(token_logprobs[:, :1]), token_logprobs], dim=1
                 )
-                return torch.tensor(0.0), {"logprobs": token_logprobs}
+                return torch.tensor(0.0, device=token_logprobs.device), {
+                    "logprobs": token_logprobs
+                }
 
             return output_tensor, collection_fn
 
@@ -1210,6 +1254,60 @@ def report_device_id(self) -> str:
         # Get device UUID using NVML
         return get_device_uuid(device_idx)
 
+    @torch.no_grad()
+    def get_local_key_to_global_keys(self, state_dict_info: List[Tuple[Any, int]]):
+        """Get the local key to global keys mapping."""
+        # Get parallel info
+        tp_group = parallel_state.get_tensor_model_parallel_group()
+        tp_world_size = torch.distributed.get_world_size(tp_group)
+
+        pp_group = parallel_state.get_pipeline_model_parallel_group()
+        pp_world_size = torch.distributed.get_world_size(pp_group)
+        pp_global_ranks = torch.distributed.get_process_group_ranks(group=pp_group)
+        pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
+
+        ep_group = parallel_state.get_expert_model_parallel_group()
+        ep_world_size = torch.distributed.get_world_size(ep_group)
+
+        # start calculating the global key
+        ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
+        state_dict = self.model.state_dict()
+        final_key_to_global_keys = {}
+
+        for param_info, size in state_dict_info:
+            local_key, owner_pp_local_rank_id, _, _ = param_info
+
+            # Step 1: create global key from local key
+            # if: for if a parameter is sharded along PP or EP;
+            # else: not sharded (like embedding)
+            pp_gathered_objs = [None]
+            if local_key in state_dict and owner_pp_local_rank_id == pp_local_rank_id:
+                pp_gathered_objs[0] = get_global_key_from_local_key(
+                    local_key, self.model.config
+                )
+
+            # Step 2: gather global keys from ranks in PP group
+            src_global_rank = pp_global_ranks[owner_pp_local_rank_id]
+            torch.distributed.broadcast_object_list(
+                pp_gathered_objs, src=src_global_rank, group=pp_group
+            )
+
+            # Step 3: gather global keys from ranks in EP group
+            if ep_pattern.search(local_key):
+                ep_gathered_objs = [None] * ep_world_size
+                torch.distributed.all_gather_object(
+                    ep_gathered_objs, pp_gathered_objs, group=ep_group
+                )
+                flat_gathered_objs = [x for y in ep_gathered_objs for x in y]
+            else:
+                flat_gathered_objs = pp_gathered_objs
+
+            final_key_to_global_keys[(local_key, owner_pp_local_rank_id)] = (
+                flat_gathered_objs
+            )
+
+        return final_key_to_global_keys
+
     def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         """Prepare Megatron model weights for IPC transfer to vLLM.
 
@@ -1228,9 +1326,18 @@ def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         tp_world_size = torch.distributed.get_world_size(tp_group)
         tp_group_rank_ids = get_process_group_ranks(tp_group)
 
+        etp_group = parallel_state.get_expert_tensor_parallel_group()
+        etp_world_size = torch.distributed.get_world_size(etp_group)
+        etp_group_rank_ids = get_process_group_ranks(etp_group)
+
         pp_group = parallel_state.get_pipeline_model_parallel_group()
         pp_world_size = torch.distributed.get_world_size(pp_group)
         pp_group_rank_ids = get_process_group_ranks(pp_group)
+        pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
+
+        ep_group = parallel_state.get_expert_model_parallel_group()
+        ep_world_size = torch.distributed.get_world_size(ep_group)
+        ep_group_rank_ids = get_process_group_ranks(ep_group)
 
         # Collect parameter info
         param_info = []
@@ -1240,20 +1347,33 @@ def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
 
         # Process each parameter in the model
         # state_dict includes parameters and persistent buffers
+        ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
         for name, param in self.model.state_dict().items():
             # Skip _extra_state entries (these are metadata, not actual weights)
             if "_extra_state" in name:
                 continue
 
+            use_etp = True if ep_pattern.search(name) else False
+            if use_etp:
+                tensor_mp_rank_ids = etp_group_rank_ids
+            else:
+                tensor_mp_rank_ids = tp_group_rank_ids
+
             shape = list(param.shape)
             tp_dim = get_tp_dim(self.model, name, named_modules_dict)
             if tp_dim is not None:
-                tp_rank_ids = tuple(sorted(tp_group_rank_ids))
+                tp_rank_ids = tuple(sorted(tensor_mp_rank_ids))
                 shape[tp_dim] *= len(tp_rank_ids)
             else:
                 tp_rank_ids = (torch.distributed.get_rank(),)
 
             pp_rank_ids = tuple(sorted(pp_group_rank_ids))
+            ep_rank_ids = tuple(sorted(ep_group_rank_ids))
+
+            if ep_pattern.search(name):
+                ep_rank_ids = tuple(sorted(ep_group_rank_ids))
+            else:
+                ep_rank_ids = (torch.distributed.get_rank(),)
 
             # Calculate size for this parameter
             prec_to_bytes = {
@@ -1265,14 +1385,15 @@ def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
             size_in_bytes = (
                 param.element_size()
                 * param.numel()
-                * len(tp_rank_ids)
-                * len(pp_rank_ids)
+                * len(tensor_mp_rank_ids)
+                * len(ep_rank_ids)
                 * scale
             )
             param_info.append(
                 (
                     (
                         name,
+                        pp_local_rank_id,
                         tuple(shape),
                         param.dtype,
                     ),
@@ -1290,7 +1411,16 @@ def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         )
         pp_gathered_param_infos = [x for y in pp_gathered_param_infos for x in y]  # type: ignore
 
-        all_param_infos = pp_gathered_param_infos
+        # Gather parameter info from all expert parallel ranks to ensure complete coverage
+        ep_group = parallel_state.get_expert_model_parallel_group()
+        ep_world_size = torch.distributed.get_world_size(ep_group)
+
+        # Gather all parameter info from all EP ranks
+        ep_gathered_param_infos = [None] * ep_world_size
+        torch.distributed.all_gather_object(
+            ep_gathered_param_infos, pp_gathered_param_infos, group=ep_group
+        )
+        all_param_infos = [x for y in ep_gathered_param_infos for x in y]
 
         # Merge all parameter infos, keeping only unique parameter names
         merged_param_info = []
@@ -1312,8 +1442,9 @@ def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         device_idx = torch.cuda.current_device()
         ## Get device free memory using NVML
         total_available_bytes = get_free_memory_bytes(device_idx)
-        ## Use 80% of the free memory for safety
-        total_available_bytes *= 0.8
+        # TODO: setting to low value (10%) since
+        # more buckets seems to have better perf
+        total_available_bytes *= 0.1
 
         return param_info, total_available_bytes
 
@@ -1326,10 +1457,16 @@ def get_weights_ipc_handles(self, *, keys: list[str]) -> dict[str, Any]:
         Returns:
             Dict mapping device UUID to list of (mapped_key, handle) tuples
         """
+        if self._held_gather_buffer is not None:
+            del self._held_gather_buffer
+            self._held_gather_buffer = None
+
         gathered_megatron_params = gather_params(
             self.model,
             keys,
+            key_to_global_keys=self.local_key_to_global_keys,
         )
+
         gathered_hf_params = self.megatron_to_hf_converter.convert(
             gathered_megatron_params, self.model.config
         )
@@ -1493,19 +1630,20 @@ def move_model(self, model, device: str, move_params=True, move_grads=True):
         # move all param and grad buffers to the device
         if isinstance(model, DistributedDataParallel):
             # DDP case
-            for buffer_idx in range(len(model.buffers)):
-                if device == "cpu":
-                    model.buffers[buffer_idx].offload_to_cpu(
-                        move_params=move_params, move_grads=move_grads
-                    )
-                elif device == "cuda":
-                    model.buffers[buffer_idx].reload_from_cpu(
-                        move_params=move_params, move_grads=move_grads
-                    )
-                else:
-                    raise ValueError(
-                        f"Invalid device: {device}. Only strings 'cpu' and 'cuda' are supported."
-                    )
+            for buffers in [model.buffers, model.expert_parallel_buffers]:
+                for buffer_idx in range(len(buffers)):
+                    if device == "cpu":
+                        buffers[buffer_idx].offload_to_cpu(
+                            move_params=move_params, move_grads=move_grads
+                        )
+                    elif device == "cuda":
+                        buffers[buffer_idx].reload_from_cpu(
+                            move_params=move_params, move_grads=move_grads
+                        )
+                    else:
+                        raise ValueError(
+                            f"Invalid device: {device}. Only strings 'cpu' and 'cuda' are supported."
+                        )
         elif isinstance(model, custom_FSDP):
             if device == "cpu":
                 model.param_and_grad_buffer.offload_to_cpu(move_params, move_grads)
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 9c3ddc3435..626fce2b6f 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -154,12 +154,18 @@ def get_basic_megatron_test_config(
             "activation_checkpointing": activation_checkpointing,
             "converter_type": "Qwen2ForCausalLM",  # Use Qwen2 converter for Qwen3 models (compatible)
             "tensor_model_parallel_size": tp,
+            "expert_tensor_parallel_size": 1,
+            "expert_model_parallel_size": 1,
             "pipeline_model_parallel_size": pp,
             "num_layers_in_first_pipeline_stage": None,
             "num_layers_in_last_pipeline_stage": None,
             "context_parallel_size": 1,
             "pipeline_dtype": precision,
             "sequence_parallel": sequence_parallel,
+            "freeze_moe_router": True,
+            "moe_router_dtype": "fp64",
+            "moe_router_load_balancing_type": "none",
+            "moe_router_bias_update_rate": 0.0,
             "apply_rope_fusion": True,
             "optimizer": {
                 "optimizer": "adam",
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index 7b56977258..a23c1b5559 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -73,12 +73,18 @@ def create_megatron_test_config(
             "activation_checkpointing": activation_checkpointing,
             "converter_type": converter_type,
             "tensor_model_parallel_size": tp,
+            "expert_tensor_parallel_size": 1,
+            "expert_model_parallel_size": 1,
             "pipeline_model_parallel_size": pp,
             "num_layers_in_first_pipeline_stage": None,
             "num_layers_in_last_pipeline_stage": None,
             "context_parallel_size": 1,
             "pipeline_dtype": precision,
             "sequence_parallel": sequence_parallel,
+            "freeze_moe_router": True,
+            "moe_router_dtype": "fp64",
+            "moe_router_load_balancing_type": "none",
+            "moe_router_bias_update_rate": 0.0,
             "apply_rope_fusion": True,
             "optimizer": {
                 "optimizer": "adam",

From e7d9253527682fa51842b8843479805be2ce7356 Mon Sep 17 00:00:00 2001
From: Terry Kong <terrycurtiskong@gmail.com>
Date: Wed, 9 Jul 2025 10:03:14 -0700
Subject: [PATCH 21/59] chore: exclude ray.remote from coverage (#624)

Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .github/workflows/cicd-main.yml                 | 2 +-
 nemo_rl/distributed/virtual_cluster.py          | 2 +-
 nemo_rl/environments/games/sliding_puzzle.py    | 2 +-
 nemo_rl/environments/math_environment.py        | 8 ++++----
 nemo_rl/models/generation/vllm.py               | 2 +-
 nemo_rl/models/policy/dtensor_policy_worker.py  | 4 +++-
 nemo_rl/models/policy/megatron_policy_worker.py | 4 +++-
 nemo_rl/utils/venvs.py                          | 2 +-
 8 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 3000bd5cfe..cbf45afd20 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -210,7 +210,7 @@ jobs:
       UNIT_TEST_SCRIPT: |
         cd /opt/nemo-rl
         if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L0|L1|L2)$ ]]; then
-          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-report=term --cov-report=json
+          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-report=term-missing --cov-report=json
         else
           echo Skipping unit tests for docs-only level
         fi
diff --git a/nemo_rl/distributed/virtual_cluster.py b/nemo_rl/distributed/virtual_cluster.py
index 22fe0bd670..ff9d135871 100644
--- a/nemo_rl/distributed/virtual_cluster.py
+++ b/nemo_rl/distributed/virtual_cluster.py
@@ -55,7 +55,7 @@ class PY_EXECUTABLES:
     MCORE = "uv run --reinstall --extra mcore"
 
 
-@ray.remote
+@ray.remote  # pragma: no cover
 def _get_node_ip_and_free_port() -> tuple[str, int]:
     import socket
 
diff --git a/nemo_rl/environments/games/sliding_puzzle.py b/nemo_rl/environments/games/sliding_puzzle.py
index 7440992bf6..a2dd0fe241 100644
--- a/nemo_rl/environments/games/sliding_puzzle.py
+++ b/nemo_rl/environments/games/sliding_puzzle.py
@@ -338,7 +338,7 @@ def process_turn(
         )
 
 
-@ray.remote
+@ray.remote  # pragma: no cover
 class SlidingPuzzleEnv(EnvironmentInterface):
     """Sliding Puzzle environment (Ray Actor)."""
 
diff --git a/nemo_rl/environments/math_environment.py b/nemo_rl/environments/math_environment.py
index f23896c25d..068eb9f35c 100644
--- a/nemo_rl/environments/math_environment.py
+++ b/nemo_rl/environments/math_environment.py
@@ -52,7 +52,7 @@ def _mute_output():
         yield
 
 
-@ray.remote
+@ray.remote  # pragma: no cover
 class HFVerifyWorker:
     def __init__(self) -> None:
         logging.getLogger("math_verify").setLevel(logging.CRITICAL)
@@ -100,7 +100,7 @@ def verify(
         return results
 
 
-@ray.remote
+@ray.remote  # pragma: no cover
 class MultilingualMultichoiceVerifyWorker:
     def verify(
         self, pred_responses: list[str], ground_truths: list[str]
@@ -133,7 +133,7 @@ def verify(
         return results
 
 
-@ray.remote
+@ray.remote  # pragma: no cover
 class EnglishMultichoiceVerifyWorker:
     def verify(
         self, pred_responses: list[str], ground_truths: list[str]
@@ -166,7 +166,7 @@ class MathEnvironmentMetadata(TypedDict):
     ground_truth: str
 
 
-@ray.remote(max_restarts=-1, max_task_retries=-1)
+@ray.remote(max_restarts=-1, max_task_retries=-1)  # pragma: no cover
 class MathEnvironment(EnvironmentInterface):
     def __init__(self, cfg: MathEnvConfig):
         self.cfg = cfg
diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index 7a1be51c19..9125b67665 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -76,7 +76,7 @@ class VllmConfig(GenerationConfig):
 
 @ray.remote(
     runtime_env={**get_nsight_config_if_pattern_matches("vllm_generation_worker")}
-)
+)  # pragma: no cover
 class VllmGenerationWorker:
     def __repr__(self) -> str:
         """Customizes the actor's prefix in the Ray logs.
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 5249f88f90..2fd7032bdd 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -114,7 +114,9 @@ def get_cpu_state_dict(
     return new_state_dict
 
 
-@ray.remote(runtime_env=get_runtime_env_for_policy_worker("dtensor_policy_worker"))
+@ray.remote(
+    runtime_env=get_runtime_env_for_policy_worker("dtensor_policy_worker")
+)  # pragma: no cover
 class DTensorPolicyWorker:
     def __repr__(self) -> str:
         """Customizes the actor's prefix in the Ray logs.
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 778cb4b857..ae1918c842 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -327,7 +327,9 @@ def destroy_parallel_state():
         pass
 
 
-@ray.remote(runtime_env=get_runtime_env_for_policy_worker("megatron_policy_worker"))
+@ray.remote(
+    runtime_env=get_runtime_env_for_policy_worker("megatron_policy_worker")
+)  # pragma: no cover
 class MegatronPolicyWorker:
     def __repr__(self):
         """Customizes the actor's prefix in the Ray logs.
diff --git a/nemo_rl/utils/venvs.py b/nemo_rl/utils/venvs.py
index aab4a6fba5..2d9c8018e2 100644
--- a/nemo_rl/utils/venvs.py
+++ b/nemo_rl/utils/venvs.py
@@ -104,7 +104,7 @@ def create_local_venv(
 
 
 # Ray-based helper to create a virtual environment on each Ray node
-@ray.remote(num_cpus=1)
+@ray.remote(num_cpus=1)  # pragma: no cover
 def _env_builder(
     py_executable: str, venv_name: str, node_idx: int, force_rebuild: bool = False
 ):

From ea7938f6dafcf1f31dfe2599b453c00cb392958c Mon Sep 17 00:00:00 2001
From: Terry Kong <terrycurtiskong@gmail.com>
Date: Wed, 9 Jul 2025 10:04:25 -0700
Subject: [PATCH 22/59] feat: guide to configure custom vllm version (#529)

Signed-off-by: Terry Kong <terrycurtiskong@gmail.com>
Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .gitignore                     |  1 +
 docs/guides/use-custom-vllm.md | 67 +++++++++++++++++++++++++++++++
 docs/index.md                  |  1 +
 tools/build-custom-vllm.sh     | 73 ++++++++++++++++++++++++++++++++++
 4 files changed, 142 insertions(+)
 create mode 100644 docs/guides/use-custom-vllm.md
 create mode 100644 tools/build-custom-vllm.sh

diff --git a/.gitignore b/.gitignore
index e7d0cfcaff..954db7041d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@ dist/
 *.vscode/
 release_run*
 ckpts/
+3rdparty/vllm
 
 # Test
 coverage.json
diff --git a/docs/guides/use-custom-vllm.md b/docs/guides/use-custom-vllm.md
new file mode 100644
index 0000000000..ff196e0e53
--- /dev/null
+++ b/docs/guides/use-custom-vllm.md
@@ -0,0 +1,67 @@
+# Experiment with Custom vLLM
+
+This guide explains how to use your own version of vLLM while leveraging a pre-compiled vLLM wheel, so you don't have to recompile the C++ source code.
+
+## Clone and Build Your Custom vLLM
+
+Clone your vLLM fork and build it using the provided script. For example:
+
+```sh
+# Usage: bash tools/build-custom-vllm.sh <GIT_URL> <GIT_BRANCH> <VLLM_PRECOMILED_WHEEL_COMMIT>
+bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm a3319f4f04fbea7defe883e516df727711e516cd
+```
+## Update `pyproject.toml` to Use Your Local vLLM
+Edit your [pyproject.toml](https://github.com/NVIDIA-NeMo/RL/blob/main/pyproject.toml) so that the  `vLLM`  dependency points to your local clone instead of PyPI.
+
+**Change the pyproject.toml:**
+```toml
+# Add setuptools_scm
+[project]
+# ...<OMITTED>
+dependencies = [
+# ...<OMITTED>
+    "setuptools_scm",  # <-- Add
+# ...<OMITTED>
+]
+
+# Change the vLLM dependency:
+
+[project.optional-dependencies]
+vllm = [
+    #"vllm==0.9.0",  # <-- BEFORE
+    "vllm",          # <-- AFTER
+]
+
+# ...<OMITTED>
+
+# Add a local source entry:
+[tool.uv.sources]
+# ...<OMITTED>
+vllm = { path = "3rdparty/vllm", editable = true }  # <-- ADD AN ENTRY
+
+# ...<OMITTED>
+
+# Update build isolation packages:
+[tool.uv]
+no-build-isolation-package = ["transformer-engine-torch", "transformer-engine"]          # <-- BEFORE
+no-build-isolation-package = ["transformer-engine-torch", "transformer-engine", "vllm"]  # <-- AFTER
+```
+## Re-Lock and Install Dependencies
+Install any missing build dependencies and re-lock your environment:
+
+```sh
+uv pip install setuptools_scm  # vLLM doesn't declare this build dependency so we install it manually
+uv lock
+```
+## Verify Your Custom vLLM
+Test your setup to ensure your custom vLLM is being used:
+```sh
+uv run --extra vllm python -c 'import vllm; print("Successfully imported vLLM")'
+# Uninstalled 1 package in 1ms
+# Installed 1 package in 2ms
+# Hi! If you see this, you're using a custom version of vLLM for the purposes of this tutorial
+# INFO 06-18 09:22:44 [__init__.py:244] Automatically detected platform cuda.
+# Successfully imported vLLM
+```
+
+If you don't see the log message `Hi! If you see this...`, it's because this message is unique to the tutorial's specific `vLLM` fork. It was added in [this commit](https://github.com/terrykong/vllm/commit/69d5add744e51b988e985736f35c162d3e87b683) and doesn't exist in the main `vLLM` project.
diff --git a/docs/index.md b/docs/index.md
index 33d507b6f4..81eb5e778e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -47,6 +47,7 @@ testing.md
 documentation.md
 debugging.md
 nsys-profiling.md
+guides/use-custom-vllm.md
 apidocs/index.rst
 ```
 
diff --git a/tools/build-custom-vllm.sh b/tools/build-custom-vllm.sh
new file mode 100644
index 0000000000..0ae3ec0c58
--- /dev/null
+++ b/tools/build-custom-vllm.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -eoux pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Default values
+DEFAULT_GIT_URL="https://github.com/terrykong/vllm.git"
+DEFAULT_BRANCH="terryk/demo-custom-vllm"
+DEFAULT_VLLM_COMMIT=a3319f4f04fbea7defe883e516df727711e516cd # use full commit hash from the main branch
+
+# Parse command line arguments
+GIT_URL=${1:-$DEFAULT_GIT_URL}
+BRANCH=${2:-$DEFAULT_BRANCH}
+export VLLM_COMMIT=${3:-$DEFAULT_VLLM_COMMIT}
+export VLLM_PRECOMPILED_WHEEL_LOCATION="https://wheels.vllm.ai/${DEFAULT_VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+
+BUILD_DIR=$(realpath "$SCRIPT_DIR/../3rdparty/vllm")
+if [[ -e "$BUILD_DIR" ]]; then
+  echo "[ERROR] $BUILD_DIR already exists. Please remove or move it before running this script."
+  exit 1 
+fi
+
+echo "Building vLLM from:"
+echo "  Vllm Git URL: $GIT_URL"
+echo "  Vllm Branch: $BRANCH"
+echo "  VLLM Wheel Commit: $VLLM_COMMIT"
+echo "  VLLM Precompiled Wheel Location: $VLLM_PRECOMPILED_WHEEL_LOCATION"
+
+# Clone the repository
+echo "Cloning repository..."
+git clone "$GIT_URL" "$BUILD_DIR"
+cd "$BUILD_DIR"
+git checkout "$BRANCH"
+
+# Create a new Python environment using uv
+echo "Creating Python environment..."
+uv venv
+
+# Remove all comments from requirements files to prevent use_existing_torch.py from incorrectly removing xformers
+echo "Removing comments from requirements files..."
+find requirements/ -name "*.txt" -type f -exec sed -i 's/#.*$//' {} \; 2>/dev/null || true
+find requirements/ -name "*.txt" -type f -exec sed -i '/^[[:space:]]*$/d' {} \; 2>/dev/null || true
+
+uv run --no-project use_existing_torch.py
+
+# Install dependencies
+echo "Installing dependencies..."
+uv pip install --upgrade pip
+uv pip install numpy setuptools setuptools_scm
+uv pip install torch==2.7.0 --torch-backend=cu128
+
+# Install vLLM using precompiled wheel
+echo "Installing vLLM with precompiled wheel..."
+uv pip install --no-build-isolation -e .
+
+echo "Build completed successfully!"
+echo "The built vLLM is available in: $BUILD_DIR"
+echo "You can now update your pyproject.toml to use this local version."
+echo "Follow instructions on https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md for how to configure your local NeMo RL environment to use this custom vLLM."

From 9f5f833fc40e737f58497e60f50b8efe7411155f Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Wed, 9 Jul 2025 16:06:04 -0700
Subject: [PATCH 23/59] feat: Deepseek Support (#591)

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
Signed-off-by: Anna Shors <ashors@nvidia.com>
Co-authored-by: Guyue Huang <guyueh@nvidia.com>
Co-authored-by: Anna Shors <ashors@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 docs/guides/deepseek.md                       |  28 ++++
 docs/index.md                                 |   1 +
 nemo_rl/models/megatron/community_import.py   |  20 ++-
 nemo_rl/models/megatron/converters/common.py  |  16 ++-
 .../models/megatron/converters/deepseek.py    | 121 ++++++++++++++++++
 5 files changed, 177 insertions(+), 9 deletions(-)
 create mode 100644 docs/guides/deepseek.md
 create mode 100644 nemo_rl/models/megatron/converters/deepseek.py

diff --git a/docs/guides/deepseek.md b/docs/guides/deepseek.md
new file mode 100644
index 0000000000..1b419be54f
--- /dev/null
+++ b/docs/guides/deepseek.md
@@ -0,0 +1,28 @@
+# DeepSeek-V3
+
+## Create BF16 Hugging Face checkpoint
+
+(adapted from https://docs.nvidia.com/nemo-framework/user-guide/latest/llms/deepseek_v3.html)
+
+```bash
+# clone DeepSeek V3 weights from HF  (This can take hours)
+git lfs install
+git clone https://huggingface.co/deepseek-ai/DeepSeek-V3 DeepSeek-V3-FP8
+
+# clone DeepSeek-V3 code
+git clone https://github.com/deepseek-ai/DeepSeek-V3.git
+
+# transformers (since v4.23.0) (checks for tensor format in the metadata)[https://github.com/huggingface/transformers/blob/9ae22fe3c1b81f99a764d382054b6ebe2b025bd4/src/transformers/modeling_utils.py#L388]
+cd DeepSeek-V3/inference
+sed -i '88{s/new_safetensor_file/new_safetensor_file, metadata={"format": "pt"}/}' fp8_cast_bf16.py
+
+# convert weights
+python fp8_cast_bf16.py --input-fp8-hf-path ../../DeepSeek-V3-FP8 --output-bf16-hf-path ../../DeepSeek-V3-BF16
+
+# copy other files
+cd ../..
+cp DeepSeek-V3-FP8/{tokenizer_config.json,tokenizer.json,modeling_deepseek.py,configuration_deepseek.py} DeepSeek-V3-BF16/
+
+# copy config.json, remove `quantization_config`, and set num_nextn_predict_layers to 0 (we currently do not support mtp):
+jq 'del(.quantization_config) | .num_nextn_predict_layers=0' DeepSeek-V3-FP8/config.json > DeepSeek-V3-BF16/config.json
+```
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
index 81eb5e778e..f9252656e5 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -29,6 +29,7 @@ guides/dpo.md
 guides/grpo.md
 guides/grpo-deepscaler.md
 guides/eval.md
+guides/deepseek.md
 model-quirks.md
 ```
 
diff --git a/nemo_rl/models/megatron/community_import.py b/nemo_rl/models/megatron/community_import.py
index 5ad061c54a..d5ef60c684 100644
--- a/nemo_rl/models/megatron/community_import.py
+++ b/nemo_rl/models/megatron/community_import.py
@@ -14,9 +14,12 @@
 
 import os
 
+from transformers import AutoConfig
+
 
 def import_model_from_hf_name(hf_model_name: str, output_path: str):
-    if "llama" in hf_model_name.lower():
+    hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
+    if hf_config.model_type == "llama":
         from nemo.tron.converter.llama import HFLlamaImporter
 
         print(f"Importing model {hf_model_name} to {output_path}...")
@@ -24,7 +27,7 @@ def import_model_from_hf_name(hf_model_name: str, output_path: str):
             hf_model_name,
             output_path=output_path,
         )
-    elif "qwen" in hf_model_name.lower():
+    elif hf_config.model_type == "qwen2":
         from nemo.tron.converter.qwen import HFQwen2Importer
 
         print(f"Importing model {hf_model_name} to {output_path}...")
@@ -32,11 +35,16 @@ def import_model_from_hf_name(hf_model_name: str, output_path: str):
             hf_model_name,
             output_path=output_path,
         )
-    else:
-        raise ValueError(
-            f"Unknown model: {hf_model_name}. Currently, only Qwen2 and Llama are supported. "
-            "If you'd like to run with a different model, please raise an issue or consider adding your own converter."
+    elif hf_config.model_type in ("deepseek_v2", "deepseek_v3"):
+        from nemo.tron.converter.deepseek import HFDeepSeekImporter
+
+        print(f"Importing model {hf_model_name} to {output_path}...")
+        importer = HFDeepSeekImporter(
+            hf_model_name,
+            output_path=output_path,
         )
+    else:
+        raise ValueError(f"Unknown model_type: {hf_config.model_type}")
     importer.apply()
     # resetting mcore state
     import megatron.core.rerun_state_machine
diff --git a/nemo_rl/models/megatron/converters/common.py b/nemo_rl/models/megatron/converters/common.py
index d8999fc014..80be5e5755 100644
--- a/nemo_rl/models/megatron/converters/common.py
+++ b/nemo_rl/models/megatron/converters/common.py
@@ -29,9 +29,12 @@
 from transformers import AutoConfig, AutoModelForCausalLM
 from transformers.integrations.accelerate import init_empty_weights
 
+import nemo_rl.models.megatron.converters.deepseek as deepseek_converter
 import nemo_rl.models.megatron.converters.llama as llama_converter
 import nemo_rl.models.megatron.converters.qwen2 as qwen2_converter
 
+_GROUP_TO_RANKS_CACHE = {}
+
 
 def get_local_layer_num(s):
     """Assumes layer number is preceeded by 'layers.'."""
@@ -271,21 +274,28 @@ def __init__(self, hf_model_name, megatron_model):
         global_keys = ep_gathered_global_keys
         global_keys_map = {k: None for k in global_keys}
 
-        if "qwen" in hf_model_name.lower():
+        if config.model_type == "qwen2":
             self.export_mapping = qwen2_converter.get_export_mapping(megatron_model)
             self.export_transforms = qwen2_converter.get_export_transforms(config)
             self.get_source_fn = lambda source_state_dict, _: _ModelState(
                 source_state_dict
             )
-        elif "llama" in hf_model_name.lower():
+        elif config.model_type == "llama":
             self.export_mapping = llama_converter.get_export_mapping()
             self.export_transforms = llama_converter.get_export_transforms(config)
             self.get_source_fn = lambda source_state_dict, _: _ModelState(
                 source_state_dict
             )
+        elif config.model_type in ("deepseek_v2", "deepseek_v3"):
+            self.export_mapping = deepseek_converter.get_export_mapping(
+                source=global_keys_map,
+                source_config=megatron_model.config.__dict__,
+            )
+            self.export_transforms = deepseek_converter.get_export_transforms()
+            self.get_source_fn = deepseek_converter.get_source_fn
         else:
             raise ValueError(
-                f"No converter mapping and transforms found for {hf_model_name}"
+                f"No converter mapping and transforms found for {hf_model_name} with model_type {config.model_type}"
             )
 
         self.export_transforms = update_transforms_for_nemorl(self.export_transforms)
diff --git a/nemo_rl/models/megatron/converters/deepseek.py b/nemo_rl/models/megatron/converters/deepseek.py
new file mode 100644
index 0000000000..e900cca95d
--- /dev/null
+++ b/nemo_rl/models/megatron/converters/deepseek.py
@@ -0,0 +1,121 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Dict
+
+from nemo.lightning import io
+from nemo.lightning.io.state import TransformFns, _ModelState
+
+
+def get_export_mapping(source, source_config):
+    mapping = {
+        # Embed
+        "embedding.word_embeddings.weight": "model.embed_tokens.weight",
+        # Attention
+        "decoder.layers.*.input_layernorm.weight": "model.layers.*.input_layernorm.weight",
+        "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight",
+        "decoder.layers.*.self_attention.linear_q_down_proj.weight": "model.layers.*.self_attn.q_a_proj.weight",
+        "decoder.layers.*.self_attention.linear_q_up_proj.weight": "model.layers.*.self_attn.q_b_proj.weight",
+        "decoder.layers.*.self_attention.linear_kv_down_proj.weight": "model.layers.*.self_attn.kv_a_proj_with_mqa.weight",
+        "decoder.layers.*.self_attention.linear_kv_up_proj.weight": "model.layers.*.self_attn.kv_b_proj.weight",
+        "decoder.layers.*.self_attention.linear_q_up_proj.layer_norm_weight": "model.layers.*.self_attn.q_a_layernorm.weight",
+        "decoder.layers.*.self_attention.linear_kv_up_proj.layer_norm_weight": "model.layers.*.self_attn.kv_a_layernorm.weight",
+        "decoder.layers.*.pre_mlp_layernorm.weight": "model.layers.*.post_attention_layernorm.weight",
+        # Dense MLP
+        "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight",
+        # MoE
+        "decoder.layers.*.mlp.router.weight": "model.layers.*.mlp.gate.weight",
+        "decoder.layers.*.mlp.experts.linear_fc2.weight*": "model.layers.*.mlp.experts.*.down_proj.weight",
+        "decoder.layers.*.mlp.shared_experts.linear_fc2.weight": "model.layers.*.mlp.shared_experts.down_proj.weight",
+        # LM Head
+        "decoder.final_layernorm.weight": "model.norm.weight",
+        "output_layer.weight": "lm_head.weight",
+    }
+    # For lite model
+    if source_config["q_lora_rank"] is None:
+        del mapping["decoder.layers.*.self_attention.linear_q_down_proj.weight"]
+        del mapping["decoder.layers.*.self_attention.linear_q_up_proj.weight"]
+        mapping["decoder.layers.*.self_attention.linear_q_proj.weight"] = (
+            "model.layers.*.self_attn.q_proj.weight"
+        )
+    # Account for Mcore local spec
+    if (
+        source_config["q_lora_rank"] is not None
+        and "decoder.layers.0.self_attention.q_layernorm.weight" in source
+    ):
+        mapping["decoder.layers.*.self_attention.q_layernorm.weight"] = mapping.pop(
+            "decoder.layers.*.self_attention.linear_q_up_proj.layer_norm_weight"
+        )
+
+    if "decoder.layers.0.self_attention.kv_layernorm.weight" in source:
+        mapping["decoder.layers.*.self_attention.kv_layernorm.weight"] = mapping.pop(
+            "decoder.layers.*.self_attention.linear_kv_up_proj.layer_norm_weight"
+        )
+
+    if source_config.get("moe_router_enable_expert_bias", False):
+        mapping.update(
+            {
+                "decoder.layers.*.mlp.router.expert_bias": "model.layers.*.mlp.gate.e_score_correction_bias",
+            }
+        )
+    return mapping
+
+
+def get_export_transforms():
+    transforms = [
+        io.state_transform(
+            source_key="decoder.layers.*.mlp.linear_fc1.weight",
+            target_key=(
+                "model.layers.*.mlp.gate_proj.weight",
+                "model.layers.*.mlp.up_proj.weight",
+            ),
+            fn=TransformFns.split_fc1,
+        ),
+        io.state_transform(
+            source_key="decoder.layers.*.mlp.experts.linear_fc1.weight*",
+            target_key=(
+                "model.layers.*.mlp.experts.*.gate_proj.weight",
+                "model.layers.*.mlp.experts.*.up_proj.weight",
+            ),
+            fn=TransformFns.split_fc1,
+        ),
+        io.state_transform(
+            source_key="decoder.layers.*.mlp.shared_experts.linear_fc1.weight",
+            target_key=(
+                "model.layers.*.mlp.shared_experts.gate_proj.weight",
+                "model.layers.*.mlp.shared_experts.up_proj.weight",
+            ),
+            fn=TransformFns.split_fc1,
+        ),
+    ]
+    return transforms
+
+
+def get_source_fn(source: Dict[str, Any], source_config: Dict[str, Any]) -> _ModelState:
+    """Modify source state_dict before conversion.
+
+    In deepseek, HF weight `model.layers.*.post_attention_layernorm.weight` is mapped to mcore weight
+    a) `decoder.layers.*.mlp.linear_fc1.layer_norm_weight`, if the layer is dense
+    b) `decoder.layers.*.pre_mlp_layernorm.weight`, if the layer is MoE
+
+    We rename decoder.layers.*.mlp.linear_fc1.layer_norm_weight in the first case to unify key names
+    """
+    for layer_i in range(source_config["num_layers"]):
+        if f"decoder.layers.{layer_i}.mlp.linear_fc1.layer_norm_weight" in source:
+            weight = source.pop(
+                f"decoder.layers.{layer_i}.mlp.linear_fc1.layer_norm_weight"
+            )
+            source[f"decoder.layers.{layer_i}.pre_mlp_layernorm.weight"] = weight
+    modified_source = _ModelState(source)
+    return modified_source

From 387856a860a015eca7c1496bef68ec7ff185dcd2 Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Wed, 9 Jul 2025 16:30:48 -0700
Subject: [PATCH 24/59] feat: decouple checkpointing from validation (#575)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/sft.yaml         |  2 +-
 nemo_rl/algorithms/dpo.py         | 33 +++++++++++-----------
 nemo_rl/algorithms/grpo.py        | 33 ++++++++++++----------
 nemo_rl/algorithms/sft.py         | 36 ++++++++++++-----------
 nemo_rl/utils/checkpoint.py       | 47 +++++++++++++++++++++----------
 tests/unit/algorithms/test_sft.py |  8 +++---
 6 files changed, 91 insertions(+), 68 deletions(-)

diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
index 01c9c2c452..3839d455e2 100644
--- a/examples/configs/sft.yaml
+++ b/examples/configs/sft.yaml
@@ -15,7 +15,7 @@ sft:
 checkpointing:
   enabled: true
   checkpoint_dir: "results/sft"
-  metric_name: "val_loss"
+  metric_name: "val_loss" ## set to null to save most recent k checkpoints
   higher_is_better: false
   keep_top_k: 3
   save_period: 10
diff --git a/nemo_rl/algorithms/dpo.py b/nemo_rl/algorithms/dpo.py
index 3883328216..259b23d665 100644
--- a/nemo_rl/algorithms/dpo.py
+++ b/nemo_rl/algorithms/dpo.py
@@ -134,18 +134,6 @@ def setup(
     dpo_save_state: Optional[DPOSaveState] = checkpointer.load_training_info(
         last_checkpoint_path
     )
-    # config validation checks
-    if master_config["checkpointing"]["enabled"]:
-        assert master_config["checkpointing"]["save_period"] > 0
-        assert (
-            master_config["checkpointing"]["save_period"]
-            % master_config["dpo"]["val_period"]
-            == 0
-        ), (
-            f"Checkpointing save period {master_config['checkpointing']['save_period']} "
-            f"must be a multiple of validation period {master_config['dpo']['val_period']}"
-            f", or we won't know what metric to save!"
-        )
 
     # ==========================
     #           Data
@@ -436,9 +424,7 @@ def dpo_train(
                 )
 
                 # Run validation if it's a validation step
-                if is_last_step or (
-                    val_period > 0 and (total_steps + 1) % val_period == 0
-                ):
+                if val_period > 0 and (total_steps + 1) % val_period == 0:
                     val_metrics, validation_timings = validate(
                         policy,
                         val_dataloader,
@@ -469,7 +455,22 @@ def dpo_train(
                     dpo_save_state["step"] = (current_step + 1) % len(train_dataloader)
                     dpo_save_state["total_steps"] = total_steps + 1
                     dpo_save_state["epoch"] = current_epoch
-                    dpo_save_state["val_loss"] = val_metrics["loss"]
+                    if val_metrics is not None:
+                        dpo_save_state["val_loss"] = val_metrics["loss"]
+                    elif "val_loss" in dpo_save_state:
+                        del dpo_save_state["val_loss"]
+
+                    if master_config["checkpointing"]["metric_name"] is not None:
+                        if (
+                            master_config["checkpointing"]["metric_name"]
+                            not in dpo_save_state
+                        ):
+                            warnings.warn(
+                                f"You asked to save checkpoints based on {master_config['checkpointing']['metric_name']} but the metric is not found in the save state. "
+                                "Saving most recent k checkpoints instead."
+                            )
+                            master_config["checkpointing"]["metric_name"] = None
+
                     with timer.time("checkpointing"):
                         print(f"Saving checkpoint for step {total_steps + 1}...")
                         checkpoint_path = checkpointer.init_tmp_checkpoint(
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
index 29e3ba2773..b07cf10cae 100644
--- a/nemo_rl/algorithms/grpo.py
+++ b/nemo_rl/algorithms/grpo.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+import warnings
 from pathlib import Path
 from typing import Any, Optional, Tuple, TypedDict, TypeVar, cast
 
@@ -170,19 +171,6 @@ def setup(
     if grpo_save_state is None:
         grpo_save_state = _default_grpo_save_state()
 
-    # config validation checks
-    if master_config["checkpointing"]["enabled"]:
-        assert master_config["checkpointing"]["save_period"] > 0
-        assert (
-            master_config["checkpointing"]["save_period"]
-            % master_config["grpo"]["val_period"]
-            == 0
-        ), (
-            f"Checkpointing save period {master_config['checkpointing']['save_period']} "
-            f"must be a multiple of validation period {master_config['grpo']['val_period']}"
-            f", or we won't know what metric to save!"
-        )
-
     # ==========================
     #           Data
     # ==========================
@@ -683,7 +671,7 @@ def grpo_train(
             )
 
             # Run validation if it's a validation step
-            if is_last_step or (val_period > 0 and (step + 1) % val_period == 0):
+            if val_period > 0 and (step + 1) % val_period == 0:
                 if NEED_REFIT and POLICY_GENERATION_STALE:
                     refit_policy_generation(
                         policy, policy_generation, colocated_inference
@@ -714,8 +702,23 @@ def grpo_train(
                 policy.prepare_for_training()
 
                 grpo_save_state["step"] = step + 1
-                grpo_save_state["val_reward"] = val_metrics["accuracy"]
+                if val_metrics is not None:
+                    grpo_save_state["val_reward"] = val_metrics["accuracy"]
+                elif "val_reward" in grpo_save_state:
+                    del grpo_save_state["val_reward"]
                 grpo_save_state["consumed_samples"] = consumed_samples
+
+                if master_config["checkpointing"]["metric_name"] is not None:
+                    if (
+                        master_config["checkpointing"]["metric_name"]
+                        not in grpo_save_state
+                    ):
+                        warnings.warn(
+                            f"You asked to save checkpoints based on {master_config['checkpointing']['metric_name']} but the metric is not found in the save state. "
+                            "Saving most recent k checkpoints instead."
+                        )
+                        master_config["checkpointing"]["metric_name"] = None
+
                 with timer.time("checkpointing"):
                     print(f"Saving checkpoint for step {step + 1}...")
                     checkpoint_path = checkpointer.init_tmp_checkpoint(
diff --git a/nemo_rl/algorithms/sft.py b/nemo_rl/algorithms/sft.py
index f988e7788b..ee227e0aa6 100644
--- a/nemo_rl/algorithms/sft.py
+++ b/nemo_rl/algorithms/sft.py
@@ -127,18 +127,6 @@ def setup(
     sft_save_state: Optional[SFTSaveState] = checkpointer.load_training_info(
         last_checkpoint_path
     )
-    # config validation checks
-    if master_config["checkpointing"]["enabled"]:
-        assert master_config["checkpointing"]["save_period"] > 0
-        assert (
-            master_config["checkpointing"]["save_period"]
-            % master_config["sft"]["val_period"]
-            == 0
-        ), (
-            f"Checkpointing save period {master_config['checkpointing']['save_period']} "
-            f"must be a multiple of validation period {master_config['sft']['val_period']}"
-            f", or we won't know what metric to save!"
-        )
 
     # ==========================
     #           Data
@@ -427,9 +415,7 @@ def sft_train(
                 )
 
                 # Run validation if it's a validation step
-                if is_last_step or (
-                    val_period > 0 and (total_steps + 1) % val_period == 0
-                ):
+                if val_period > 0 and (total_steps + 1) % val_period == 0:
                     val_metrics, validation_timings = validate(
                         policy,
                         val_dataloader,
@@ -457,11 +443,27 @@ def sft_train(
                     is_last_step
                     or (total_steps + 1) % master_config["checkpointing"]["save_period"]
                     == 0
-                ):  # +1 because step is 0-indexed
+                ):
+                    ## +1 because step is 0-indexed
                     sft_save_state["step"] = (current_step + 1) % len(train_dataloader)
                     sft_save_state["total_steps"] = total_steps + 1
                     sft_save_state["epoch"] = current_epoch
-                    sft_save_state["val_loss"] = val_metrics["val_loss"]
+                    if val_metrics is not None:
+                        sft_save_state["val_loss"] = val_metrics["val_loss"]
+                    elif "val_loss" in sft_save_state:
+                        del sft_save_state["val_loss"]
+
+                    if master_config["checkpointing"]["metric_name"] is not None:
+                        if (
+                            master_config["checkpointing"]["metric_name"]
+                            not in sft_save_state
+                        ):
+                            warnings.warn(
+                                f"You asked to save checkpoints based on {master_config['checkpointing']['metric_name']} but the metric is not found in the save state. "
+                                "Saving most recent k checkpoints instead."
+                            )
+                            master_config["checkpointing"]["metric_name"] = None
+
                     with timer.time("checkpointing"):
                         print(f"Saving checkpoint for step {total_steps + 1}...")
                         checkpoint_path = checkpointer.init_tmp_checkpoint(
diff --git a/nemo_rl/utils/checkpoint.py b/nemo_rl/utils/checkpoint.py
index 6255dcc79f..1be5948ba3 100644
--- a/nemo_rl/utils/checkpoint.py
+++ b/nemo_rl/utils/checkpoint.py
@@ -21,6 +21,7 @@
 import json
 import os
 import shutil
+import warnings
 from pathlib import Path
 from typing import Any, Optional, TypedDict, Union
 
@@ -155,12 +156,14 @@ def finalize_checkpoint(self, checkpoint_path: PathLike) -> None:
         self.remove_old_checkpoints()
 
     def remove_old_checkpoints(self, exclude_latest: bool = True) -> None:
-        """Remove checkpoints that are not in the top-k or latest based on the metric.
+        """Remove checkpoints that are not in the top-k or latest based on the (optional) metric.
 
         If keep_top_k is set, this method removes all checkpoints except the top-k
-        best ones based on the specified metric. The best checkpoints are determined
-        by the metric value and the higher_is_better setting. When multiple checkpoints
-        have the same metric value, more recent checkpoints (higher step numbers) are preferred.
+        best ones. The "best" checkpoints are determined by:
+        - If a metric is provided: the given metric value and the higher_is_better setting.
+          When multiple checkpoints have the same metric value, more recent checkpoints
+          (higher step numbers) are preferred.
+        - If no metric is provided: the step number. The most recent k checkpoints are kept.
 
         Args:
             exclude_latest (bool): Whether to exclude the latest checkpoint from deletion. (may result in K+1 checkpoints)
@@ -173,22 +176,36 @@ def remove_old_checkpoints(self, exclude_latest: bool = True) -> None:
             if checkpoint_history
             else None
         )
-        # sort by metric value first, then by step number (for equal metrics, prefer more recent)
-        if self.higher_is_better:
-            # For higher_is_better=True: higher metric values first, then higher step numbers
-            checkpoint_history.sort(
-                key=lambda x: (x[2][self.metric_name], x[0]), reverse=True
-            )
+
+        if self.metric_name is None:
+            checkpoint_history.sort(key=lambda x: x[0], reverse=True)
         else:
-            # For higher_is_better=False: lower metric values first, then higher step numbers for equal values
-            checkpoint_history.sort(key=lambda x: (x[2][self.metric_name], -x[0]))
+            try:
+                # sort by metric value first, then by step number (for equal metrics, prefer more recent)
+                if self.higher_is_better:
+                    # For higher_is_better=True: higher metric values first, then higher step numbers
+                    checkpoint_history.sort(
+                        key=lambda x: (x[2][self.metric_name], x[0]), reverse=True
+                    )
+                else:
+                    # For higher_is_better=False: lower metric values first, then higher step numbers for equal values
+                    checkpoint_history.sort(
+                        key=lambda x: (x[2][self.metric_name], -x[0])
+                    )
+            except KeyError:
+                warnings.warn(
+                    f"Metric {self.metric_name} not found in checkpoint history. Keeping most recent k checkpoints."
+                )
+                checkpoint_history.sort(key=lambda x: x[0], reverse=True)
+
+                self.metric_name = None
 
         # remove checkpoints that are not in the top-k
         for checkpoint in checkpoint_history[self.keep_top_k :]:
             if exclude_latest and checkpoint[0] == latest_step:
                 continue
             print(
-                f"Removing checkpoint {checkpoint[1]} due to being outside top-{self.keep_top_k}, metric: {checkpoint[2][self.metric_name]}"
+                f"Removing checkpoint {checkpoint[1]} due to being outside top-{self.keep_top_k}"
             )
             shutil.rmtree(checkpoint[1])
 
@@ -206,8 +223,8 @@ def get_best_checkpoint_path(self) -> Optional[str]:
             return None
         # sort by metric value
         if self.metric_name not in checkpoint_history[0][2]:
-            print(
-                f"WARNING:Metric {self.metric_name} not found in checkpoint history. Returning last"
+            warnings.warn(
+                f"Metric {self.metric_name} not found in checkpoint history. Returning last"
             )
             return self.get_latest_checkpoint_path()
 
diff --git a/tests/unit/algorithms/test_sft.py b/tests/unit/algorithms/test_sft.py
index cf09b73d9f..4b6d9ee2ce 100644
--- a/tests/unit/algorithms/test_sft.py
+++ b/tests/unit/algorithms/test_sft.py
@@ -115,8 +115,8 @@ def test_exit_on_max_steps(mock_components):
         sft_save_state,
     )
 
-    # Verify we only trained for 12 + 1 steps. The extra 1 step is the final validation step.
-    assert mock_components["policy"].train.call_count == 12 + 1
+    # Verify we only trained for 12 steps.
+    assert mock_components["policy"].train.call_count == 12
 
 
 def test_exit_on_max_epochs(mock_components):
@@ -141,5 +141,5 @@ def test_exit_on_max_epochs(mock_components):
         sft_save_state,
     )
 
-    # Verify we trained for exactly two epochs (20 batches) + 1 validation step
-    assert mock_components["policy"].train.call_count == 20 + 1
+    # Verify we trained for exactly two epochs (20 batches).
+    assert mock_components["policy"].train.call_count == 20

From b5b34249acb62d9861a928bf84f5e68ee5b2bb4c Mon Sep 17 00:00:00 2001
From: Terry Kong <terrycurtiskong@gmail.com>
Date: Thu, 10 Jul 2025 13:36:05 -0700
Subject: [PATCH 25/59] feat: dynamically detect --gres=gpu:8 arg to work on
 clusters that don't need it (#642)

Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 ray.sub | 42 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/ray.sub b/ray.sub
index 8c0cba32b1..4635f66be6 100644
--- a/ray.sub
+++ b/ray.sub
@@ -6,7 +6,6 @@
 #SBATCH --partition=PARTITION
 #SBATCH --time=1:0:0
 #SBATCH --dependency=singleton
-#SBATCH --gres=gpu:8
 
 # Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
 #
@@ -25,6 +24,26 @@
 
 set -eoux pipefail
 
+########################################################
+# Function to detect if SLURM cluster uses GRES
+########################################################
+maybe_gres_arg() {
+  # Check if any nodes in the partition have GRES configured
+  # Assumes a homogeneous allocation (not a heterogeneous job)
+  if sinfo -p $SLURM_JOB_PARTITION -h -o "%G" | grep -q "gpu:"; then
+    # Do a quick assert here that gpus:8 == gpus:$GPUS_PER_NODE. It is probably a user error if someone isn't using GPUS_PER_NODE=8 on our clusters if it supports --gres=gpu:8.
+    if [[ $GPUS_PER_NODE -ne $(sinfo -p $SLURM_JOB_PARTITION -h -o "%G" | grep "gpu:" | cut -d: -f2) ]]; then
+      echo "Error: GPUS_PER_NODE=$GPUS_PER_NODE but GRES detected is $(sinfo -p $SLURM_JOB_PARTITION -h -o "%G" | grep "gpu:") meaning GPUS_PER_NODE is not set to fully claim the GPUs on the nodes." >&2
+      exit 1
+    fi
+    echo "--gres=gpu:${GPUS_PER_NODE}"
+    return
+  fi
+  
+  # No GRES support detected
+  echo ""
+}
+
 ########################################################
 # User defined variables
 ########################################################
@@ -74,7 +93,18 @@ BASE_LOG_DIR=${BASE_LOG_DIR:-$SLURM_SUBMIT_DIR}
 LOG_DIR="$BASE_LOG_DIR/$SLURM_JOB_ID-logs"
 mkdir -p $LOG_DIR
 
-COMMON_SRUN_ARGS=""
+# Number of GPUs per worker node
+GPUS_PER_NODE=${GPUS_PER_NODE:-8}
+
+# Detect GRES support and set GRES_ARG
+GRES_ARG=$(maybe_gres_arg)
+if [[ -n "$GRES_ARG" ]]; then
+  echo "[INFO] GRES support detected. Using: $GRES_ARG"
+else
+  echo "[INFO] No GRES support detected. Running without --gres flag."
+fi
+
+COMMON_SRUN_ARGS="$GRES_ARG"
 COMMON_SRUN_ARGS+=" --no-container-mount-home"
 COMMON_SRUN_ARGS+=" --mpi=pmix"
 COMMON_SRUN_ARGS+=" --container-mounts=$MOUNTS"
@@ -83,10 +113,6 @@ COMMON_SRUN_ARGS+=" --container-workdir=$SLURM_SUBMIT_DIR"
 # TODO: delete these (just for debugging)
 COMMON_SRUN_ARGS+=" -p $SLURM_JOB_PARTITION"
 COMMON_SRUN_ARGS+=" -A $SLURM_JOB_ACCOUNT"
-COMMON_SRUN_ARGS+=" --gres=gpu:8"
-
-# Number of GPUs per worker node
-GPUS_PER_NODE=${GPUS_PER_NODE:-8}
 # Number of CPUs per worker node
 CPUS_PER_WORKER=${CPUS_PER_WORKER:-$((GPUS_PER_NODE * 16))}
 
@@ -323,7 +349,7 @@ else
 WORKER_NUM=\${1:-}
 if [[ -z "\$WORKER_NUM" ]]; then
   # Empty means we are on the head node
-  srun --no-container-mount-home --gres=gpu:8 -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-head --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "$head_node" --jobid $SLURM_JOB_ID --pty bash
+  srun --no-container-mount-home $GRES_ARG -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-head --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "$head_node" --jobid $SLURM_JOB_ID --pty bash
 else
   # Worker numbers 1 through N-1 correspond to ray-worker-1 through ray-worker-(N-1)
   # and use nodes_array[1] through nodes_array[N-1]
@@ -332,7 +358,7 @@ else
     exit 1
   fi
   nodes_array=($nodes)
-  srun --no-container-mount-home --gres=gpu:8 -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-worker-\$WORKER_NUM --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "\${nodes_array[\$WORKER_NUM]}" --jobid $SLURM_JOB_ID --pty bash
+  srun --no-container-mount-home $GRES_ARG -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-worker-\$WORKER_NUM --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "\${nodes_array[\$WORKER_NUM]}" --jobid $SLURM_JOB_ID --pty bash
 fi
 EOF
   chmod +x $SLURM_SUBMIT_DIR/${SLURM_JOB_ID}-attach.sh

From ebf2c05465f0ed3079fcaa2014b138c273dc6ad0 Mon Sep 17 00:00:00 2001
From: Zhanda Zhu <49645678+Dazz993@users.noreply.github.com>
Date: Thu, 10 Jul 2025 19:51:21 -0400
Subject: [PATCH 26/59] fix: fix nccl P2P initialization error for
 non-colocated (#636)

Signed-off-by: Zhanda <zhandazhu@gmail.com>
Signed-off-by: Zhanda Zhu <49645678+Dazz993@users.noreply.github.com>
Co-authored-by: Zhanda Zhu <zhandaz@cw-dfw-cs-001-vscode-02.cm.cluster>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/models/generation/vllm.py              |  6 +++---
 nemo_rl/models/generation/vllm_backend.py      |  5 +++++
 nemo_rl/models/policy/dtensor_policy_worker.py | 11 +++--------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index 9125b67665..0c1e4ee40e 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -1329,11 +1329,11 @@ def __init__(
         )
 
         # It's necessary to set env_vars here to ensure that vllm non-leader workers also have these env_vars
-        # Disable NCCL SHM if training and generation are not co-located: https://github.com/NVIDIA-NeMo/RL/issues/564
+        # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
+        # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
         env_vars = {}
         if not self.cfg["colocated"]["enabled"]:
-            env_vars["NCCL_SHM_DISABLE"] = "1"
-            env_vars["NCCL_P2P_DISABLE"] = "1"
+            os.environ["NCCL_CUMEM_ENABLE"] = "1"
 
         # Check if we need parallelism-aware worker group creation
         if self.model_parallel_size > 1:
diff --git a/nemo_rl/models/generation/vllm_backend.py b/nemo_rl/models/generation/vllm_backend.py
index 8aaa34cd15..01dc68146b 100644
--- a/nemo_rl/models/generation/vllm_backend.py
+++ b/nemo_rl/models/generation/vllm_backend.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 from typing import Any
 
 import torch
@@ -36,6 +37,10 @@ def init_collective(
         local_rank = torch.distributed.get_rank()
         rank = rank_prefix + local_rank + 1  # 1 is the head node of the train cluster
 
+        # Temporary fix for vllm==0.9.0 which overrides the NCCL_CUMEM_ENABLE to 0 and causes
+        # https://github.com/NVIDIA-NeMo/RL/issues/564. This can be removed after it is upgraded to vllm>=0.9.1rc1.
+        os.environ["NCCL_CUMEM_ENABLE"] = "1"
+
         pg = StatelessProcessGroup.create(
             host=ip, port=port, rank=rank, world_size=world_size
         )
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 2fd7032bdd..6e619885ae 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -138,14 +138,14 @@ def __init__(
         init_reference_model: bool = True,
         **kwargs: Any,
     ):
-        # Disable NCCL SHM if training and generation are not co-located: https://github.com/NVIDIA-NeMo/RL/issues/564
+        # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
+        # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
         if (
             "generation" in config
             and config["generation"] is not None
             and not config["generation"]["colocated"]["enabled"]
         ):
-            os.environ["NCCL_SHM_DISABLE"] = "1"
-            os.environ["NCCL_P2P_DISABLE"] = "1"
+            os.environ["NCCL_CUMEM_ENABLE"] = "1"
 
         self.cfg = config
         # torch distributed init. Envars for rank, world_size, and master_addr and master_port are set from the ray remote call
@@ -388,11 +388,6 @@ def init_collective(self, ip: str, port: int, world_size: int) -> None:
         from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
         from vllm.distributed.utils import StatelessProcessGroup
 
-        # keep the same behavior as vllm
-        # see https://github.com/vllm-project/vllm/blob/v0.9.0/vllm/env_override.py#L25
-        if not os.path.exists("/dev/nvidia-caps-imex-channels"):
-            os.environ["NCCL_CUMEM_ENABLE"] = "0"
-
         if self.rank == 0:
             pg = StatelessProcessGroup.create(
                 host=ip, port=port, rank=0, world_size=world_size

From 3a370616be5538e131eb56c2c167a19814784af6 Mon Sep 17 00:00:00 2001
From: Sahil Jain <48468750+SahilJain314@users.noreply.github.com>
Date: Fri, 11 Jul 2025 09:17:00 -0700
Subject: [PATCH 27/59] fix: Mcore: Added functional grpo test and typing fixes
 (#527)

Signed-off-by: Sahil Jain <sahilj@nvidia.com>
Signed-off-by: Sahil Jain <48468750+SahilJain314@users.noreply.github.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .github/workflows/cicd-main.yml               |  1 +
 docs/design-docs/logger.md                    |  2 +-
 docs/design-docs/loss-functions.md            |  4 +-
 docs/guides/grpo.md                           |  4 +-
 examples/configs/grpo_math_1B_megatron.yaml   |  1 -
 examples/configs/grpo_math_70B_megatron.yaml  |  1 -
 examples/configs/grpo_math_8B_megatron.yaml   |  1 -
 nemo_rl/algorithms/grpo.py                    |  6 +--
 nemo_rl/distributed/batched_data_dict.py      |  3 +-
 nemo_rl/models/generation/vllm.py             | 15 +++----
 .../models/policy/dtensor_policy_worker.py    | 10 ++---
 tests/functional/grpo_megatron.sh             | 44 +++++++++++++++++++
 12 files changed, 64 insertions(+), 28 deletions(-)
 create mode 100755 tests/functional/grpo_megatron.sh

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index cbf45afd20..a646b0d662 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -226,6 +226,7 @@ jobs:
         if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L1|L2)$ ]]; then
           time uv run --no-sync bash ./tests/functional/sft.sh
           time uv run --no-sync bash ./tests/functional/grpo.sh
+          time uv run --no-sync bash ./tests/functional/grpo_megatron.sh
           time uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
           time uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh
           time uv run --no-sync bash ./tests/functional/dpo.sh
diff --git a/docs/design-docs/logger.md b/docs/design-docs/logger.md
index d15ad5c1ba..b13436423b 100644
--- a/docs/design-docs/logger.md
+++ b/docs/design-docs/logger.md
@@ -21,7 +21,7 @@ class LoggerInterface(ABC):
     """Abstract base class for logger backends."""
 
     @abstractmethod
-    def log_metrics(self, metrics: Dict[str, Any], step: int, prefix: Optional[str]: "") -> None:
+    def log_metrics(self, metrics: dict[str, Any], step: int, prefix: Optional[str]: "") -> None:
         """Log a dictionary of metrics."""
         pass
 
diff --git a/docs/design-docs/loss-functions.md b/docs/design-docs/loss-functions.md
index de48102821..b0fb9523e2 100644
--- a/docs/design-docs/loss-functions.md
+++ b/docs/design-docs/loss-functions.md
@@ -22,8 +22,6 @@ In NeMo RL, this information is passed to the loss function directly. Each loss
 For our simple example above, this would look like:
 
 ```{testcode}
-from typing import Tuple
-
 import torch
 from nemo_rl.algorithms.interfaces import LossFunction
 from nemo_rl.algorithms.loss_functions import LossType
@@ -44,7 +42,7 @@ class SimpleAverageLoss(LossFunction):
         next_token_losses: torch.Tensor,
         data: BatchedDataDict,
         total_valid_tokens_or_seqs: torch.Tensor,
-    ) -> Tuple[torch.Tensor, dict]:
+    ) -> tuple[torch.Tensor, dict]:
         """Compute the simple average loss with proper microbatch handling."""
         token_mask = data["token_mask"] ## token mask for this microbatch
         sample_mask = data["sample_mask"] ## sample mask for this microbatch
diff --git a/docs/guides/grpo.md b/docs/guides/grpo.md
index f577820a21..88d349a9a7 100644
--- a/docs/guides/grpo.md
+++ b/docs/guides/grpo.md
@@ -44,7 +44,7 @@ We define a [DatumSpec](../../nemo_rl/data/interfaces.py) that holds all relevan
 class DatumSpec(TypedDict):
     message_log: LLMMessageLogType
     length: int  # total (concatenated) length of the message tensors
-    extra_env_info: Dict[str, Any] # anything your environment requires goes here, for example the 'answer' of a math problem
+    extra_env_info: dict[str, Any] # anything your environment requires goes here, for example the 'answer' of a math problem
     loss_multiplier: float  # multiplier for the loss for this datum. 0 to mask out (say the sample is invalid)
     idx: int
     task_name: Optional[str] = "default"
@@ -59,7 +59,7 @@ For each task, you should provide a data processor that reads from your dataset
 
 ```python
 def my_data_processor(
-    datum_dict: Dict[str, Any], # loaded directly from your dataset (i.e. single line of jsonl data)
+    datum_dict: dict[str, Any], # loaded directly from your dataset (i.e. single line of jsonl data)
     task_data_spec: TaskDataSpec,
     tokenizer,
     max_seq_length: int,
diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index e6dbc8f18e..7a8a651a54 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -40,7 +40,6 @@ policy:
   logprob_batch_size: 8
   max_total_sequence_length: 512
   precision: "bfloat16"
-  refit_buffer_size_gb: 4 # used for refitting inference engine, the unit is GB
 
   dtensor_cfg:
     enabled: false
diff --git a/examples/configs/grpo_math_70B_megatron.yaml b/examples/configs/grpo_math_70B_megatron.yaml
index 15a65c5ce6..a7ba2c8a52 100644
--- a/examples/configs/grpo_math_70B_megatron.yaml
+++ b/examples/configs/grpo_math_70B_megatron.yaml
@@ -17,7 +17,6 @@ policy:
   precision: "bfloat16"
   fsdp_offload_enabled: false
   activation_checkpointing_enabled: false
-  refit_buffer_size_gb: 4 # used for refitting inference engine, the unit is GB
 
   dtensor_cfg:
     enabled: false
diff --git a/examples/configs/grpo_math_8B_megatron.yaml b/examples/configs/grpo_math_8B_megatron.yaml
index e3e6247f45..41ce4412fc 100644
--- a/examples/configs/grpo_math_8B_megatron.yaml
+++ b/examples/configs/grpo_math_8B_megatron.yaml
@@ -21,7 +21,6 @@ policy:
   precision: "bfloat16"
   fsdp_offload_enabled: false
   activation_checkpointing_enabled: false
-  refit_buffer_size_gb: 4 # used for refitting inference engine, the unit is GB
 
   dtensor_cfg:
     enabled: false
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
index b07cf10cae..4bbe0cf52b 100644
--- a/nemo_rl/algorithms/grpo.py
+++ b/nemo_rl/algorithms/grpo.py
@@ -14,7 +14,7 @@
 import os
 import warnings
 from pathlib import Path
-from typing import Any, Optional, Tuple, TypedDict, TypeVar, cast
+from typing import Any, Optional, TypedDict, TypeVar, cast
 
 import numpy as np
 import ray
@@ -128,7 +128,7 @@ def setup(
 ) -> tuple[
     ColocatablePolicyInterface,
     Optional[GenerationInterface],
-    Tuple[RayVirtualCluster, RayVirtualCluster],
+    tuple[RayVirtualCluster, RayVirtualCluster],
     StatefulDataLoader,
     Optional[StatefulDataLoader],
     ClippedPGLossFn,
@@ -140,7 +140,7 @@ def setup(
     """Main entry point for running GRPO algorithm.
 
     Returns:
-        Tuple of policy, cluster, dataloader, tokenizer, loss_fn, math_env, logger, master_config, val_dataloader
+        tuple of policy, cluster, dataloader, tokenizer, loss_fn, math_env, logger, master_config, val_dataloader
     """
     # Extract individual configs for easier access
     policy_config = master_config["policy"]
diff --git a/nemo_rl/distributed/batched_data_dict.py b/nemo_rl/distributed/batched_data_dict.py
index 6609fb2e24..dc30d68364 100644
--- a/nemo_rl/distributed/batched_data_dict.py
+++ b/nemo_rl/distributed/batched_data_dict.py
@@ -17,7 +17,6 @@
     Any,
     Generic,
     Iterator,
-    List,
     Mapping,
     Optional,
     Type,
@@ -171,7 +170,7 @@ def chunk(self, rank: int, chunks: int) -> "SlicedDataDict":
 
         return chunked_batch
 
-    def reorder_data(self, reorded_indices: List[int]):
+    def reorder_data(self, reorded_indices: list[int]):
         """Reorders the data along the batch dimension by the given indices."""
         batch_sizes = set()
         for val in self.data.values():
diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index 0c1e4ee40e..bb689ac7f4 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -22,11 +22,8 @@
 from typing import (
     Any,
     AsyncGenerator,
-    Dict,
-    List,
     NotRequired,
     Optional,
-    Tuple,
     TypedDict,
     Union,
     cast,
@@ -1370,7 +1367,7 @@ def __init__(
 
     def _get_tied_worker_bundle_indices(
         self, cluster: RayVirtualCluster
-    ) -> List[Tuple[int, List[int]]]:
+    ) -> list[tuple[int, list[int]]]:
         """Calculate bundle indices for tensor and pipeline parallel workers.
 
         Handles both unified placement groups (for cross-node model parallelism) and
@@ -1393,7 +1390,7 @@ def _get_tied_worker_bundle_indices(
 
             def get_node_bundles(
                 pg: PlacementGroup,
-            ) -> Dict[str, List[int]]:
+            ) -> dict[str, list[int]]:
                 # Retrieve mapping from node ID to bundle indices from a placement group.
                 try:
                     pg_table = ray.util.placement_group_table(pg)
@@ -1403,7 +1400,7 @@ def get_node_bundles(
                         "Failed to retrieve bundle/node mapping from placement group"
                     ) from e
 
-                node_bundles: Dict[str, List[int]] = defaultdict(list)
+                node_bundles: dict[str, list[int]] = defaultdict(list)
                 for bundle_idx, node_id in bundle_to_node.items():
                     node_bundles[node_id].append(bundle_idx)
                 for bundles in node_bundles.values():
@@ -1412,7 +1409,7 @@ def get_node_bundles(
 
             def allocate_worker_groups(
                 pg: PlacementGroup, tp_size: int, pp_size: int
-            ) -> List[Tuple[int, List[int]]]:
+            ) -> list[tuple[int, list[int]]]:
                 # Allocate worker groups for TP and PP training, assuming all nodes have identical bundle counts.
 
                 # Retrieve both bundle mapping and per-node bundles
@@ -1442,12 +1439,12 @@ def allocate_worker_groups(
                 node_idx = {nid: idx for idx, nid in enumerate(sorted_nodes)}
 
                 # Flatten bundles in node order
-                flat: List[int] = []
+                flat: list[int] = []
                 for nid in sorted_nodes:
                     flat.extend(node_bundles[nid])
 
                 # Slice into groups and assign logical index
-                groups: List[Tuple[int, List[int]]] = []
+                groups: list[tuple[int, list[int]]] = []
                 for i in range(num_groups):
                     slice_ = flat[
                         i * model_parallel_size : (i + 1) * model_parallel_size
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 6e619885ae..81d93f6954 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -17,7 +17,7 @@
 import os
 from collections import defaultdict
 from contextlib import AbstractContextManager, contextmanager, nullcontext
-from typing import Any, Generator, Iterable, List, Optional, Set, Union, cast
+from typing import Any, Generator, Iterable, Optional, Set, Union, cast
 
 import ray
 import torch
@@ -336,8 +336,8 @@ def __init__(
     @staticmethod
     def create_context_parallel_ctx(
         cp_mesh: torch.distributed.device_mesh.DeviceMesh,
-        cp_buffers: List[torch.Tensor],
-        cp_seq_dims: List[int],
+        cp_buffers: list[torch.Tensor],
+        cp_seq_dims: list[int],
         cp_no_restore_buffers: Set[torch.Tensor],
         cp_rotate_method: Optional[str] = None,
     ):
@@ -345,8 +345,8 @@ def create_context_parallel_ctx(
 
         Args:
             cp_mesh (DeviceMesh): The device mesh for context parallel.
-            cp_buffers (List[torch.Tensor]): The buffers for context parallel.
-            cp_seq_dims (List[int]): The sequence dimensions for context parallel.
+            cp_buffers (list[torch.Tensor]): The buffers for context parallel.
+            cp_seq_dims (list[int]): The sequence dimensions for context parallel.
             cp_no_restore_buffers (Set[torch.Tensor]): The no restore buffers for context parallel.
             cp_rotate_method (str): The rotation method for context parallel, such as "allgather" or "addtoall".
         """
diff --git a/tests/functional/grpo_megatron.sh b/tests/functional/grpo_megatron.sh
new file mode 100755
index 0000000000..15fb2efec3
--- /dev/null
+++ b/tests/functional/grpo_megatron.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+# Using Qwen2.5-0.5B instead of Qwen3-0.6B because the latter is not supported by Megatron yet
+cd $PROJECT_ROOT
+uv run $PROJECT_ROOT/examples/run_grpo_math.py \
+    --config $PROJECT_ROOT/examples/configs/grpo_math_1B_megatron.yaml \
+    policy.model_name=Qwen/Qwen2.5-0.5B \
+    grpo.num_prompts_per_step=2 \
+    grpo.num_generations_per_prompt=4 \
+    policy.train_global_batch_size=4 \
+    policy.logprob_batch_size=4 \
+    policy.train_micro_batch_size=1 \
+    cluster.gpus_per_node=2 \
+    grpo.max_num_steps=2 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=false \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+    'max(data["train/token_mult_prob_error"]) < 1.05' \
+

From 89aa84c829c52bb0b4ad1f8863e040525a1e7a29 Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Fri, 11 Jul 2025 16:13:21 -0700
Subject: [PATCH 28/59] feat: plumb environment variables to RayWorkerGroup
 (#631)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Anna Shors <ashors@nvidia.com>
Co-authored-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/distributed/worker_groups.py          |   8 +-
 .../models/policy/dtensor_policy_worker.py    |   4 +
 nemo_rl/models/policy/lm_policy.py            |   5 +
 .../models/policy/megatron_policy_worker.py   |  16 +--
 nemo_rl/models/policy/utils.py                |  60 ++++++--
 tests/unit/algorithms/__init__.py             |   0
 tests/unit/distributed/test_worker_groups.py  |  86 +++++++++++
 tests/unit/models/__init__.py                 |   0
 tests/unit/models/policy/test_utils.py        | 133 ++++++++++++++++++
 9 files changed, 288 insertions(+), 24 deletions(-)
 create mode 100644 tests/unit/algorithms/__init__.py
 create mode 100644 tests/unit/models/__init__.py
 create mode 100644 tests/unit/models/policy/test_utils.py

diff --git a/nemo_rl/distributed/worker_groups.py b/nemo_rl/distributed/worker_groups.py
index b008452f1c..b625be25dc 100644
--- a/nemo_rl/distributed/worker_groups.py
+++ b/nemo_rl/distributed/worker_groups.py
@@ -392,7 +392,9 @@ def __init__(
 
         # Create workers based on the bundle_indices_list
         self._create_workers_from_bundle_indices(
-            remote_worker_builder, bundle_indices_list, env_vars=env_vars
+            remote_worker_builder,
+            bundle_indices_list,
+            env_vars=env_vars,
         )
 
     def get_dp_leader_worker_idx(self, dp_shard_idx: int) -> int:
@@ -424,7 +426,9 @@ def _create_workers_from_bundle_indices(
         )
 
         # Update env_vars with the current environment variables
-        env_vars.update(dict(os.environ))
+        for k, v in os.environ.items():
+            if k not in env_vars:
+                env_vars[k] = v
 
         # Get the python environment for the actor
         actor_python_env = get_actor_python_env(
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 81d93f6954..df8b4e734f 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -55,6 +55,7 @@
     ReferenceLogprobOutputSpec,
 )
 from nemo_rl.models.policy.utils import (
+    configure_expandable_segments,
     get_gpu_info,
     get_runtime_env_for_policy_worker,
     import_class_from_path,
@@ -147,6 +148,9 @@ def __init__(
         ):
             os.environ["NCCL_CUMEM_ENABLE"] = "1"
 
+        # Only enable expandable_segments on Hopper and newer architectures (compute capability 9.x+)
+        configure_expandable_segments()
+
         self.cfg = config
         # torch distributed init. Envars for rank, world_size, and master_addr and master_port are set from the ray remote call
         torch.distributed.init_process_group(backend="nccl")
diff --git a/nemo_rl/models/policy/lm_policy.py b/nemo_rl/models/policy/lm_policy.py
index 22bfcd690b..5e82b61d72 100644
--- a/nemo_rl/models/policy/lm_policy.py
+++ b/nemo_rl/models/policy/lm_policy.py
@@ -75,6 +75,8 @@ def __init__(
             tp_size = config["megatron_cfg"]["tensor_model_parallel_size"]
             pp_size = config["megatron_cfg"]["pipeline_model_parallel_size"]
             cp_size = config["megatron_cfg"]["context_parallel_size"]
+
+            env_vars = config["megatron_cfg"].get("env_vars", {})
         else:
             assert config["dtensor_cfg"]["enabled"], (
                 "Please either set policy.megatron_cfg.enabled=true to use Megatron training backend "
@@ -86,6 +88,8 @@ def __init__(
             tp_size = config["dtensor_cfg"]["tensor_parallel_size"]
             cp_size = config["dtensor_cfg"]["context_parallel_size"]
 
+            env_vars = config["dtensor_cfg"].get("env_vars", {})
+
         self.sharding_annotations = NamedSharding(
             layout=np.arange(cluster.world_size()).reshape(
                 pp_size,  # PP
@@ -120,6 +124,7 @@ def __init__(
             name_prefix=name_prefix,
             workers_per_node=workers_per_node,
             sharding_annotations=self.sharding_annotations,
+            env_vars=env_vars,
         )
 
         if config["dynamic_batching"]["enabled"]:
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index ae1918c842..7f35e55af6 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -113,7 +113,11 @@
     LogprobOutputSpec,
     ReferenceLogprobOutputSpec,
 )
-from nemo_rl.models.policy.utils import get_gpu_info, get_runtime_env_for_policy_worker
+from nemo_rl.models.policy.utils import (
+    configure_expandable_segments,
+    get_gpu_info,
+    get_runtime_env_for_policy_worker,
+)
 
 TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase)
 
@@ -363,6 +367,9 @@ def __init__(
         }
         self.dtype = dtype_map[self.cfg["precision"]]
 
+        # Only enable expandable_segments on Hopper and newer architectures (compute capability 9.x+)
+        configure_expandable_segments()
+
         # cfg["model_name"] is allowed to be either an HF model name or a path to an HF checkpoint
         # check if hf_model_name is a path
         hf_model_name = self.cfg["model_name"]
@@ -683,13 +690,6 @@ def __init__(
             ]
         )
 
-    def configure_worker(self, num_gpus: int, bundle_indices: Optional[tuple] = None):
-        USE_EXPANDABLE_SEGMENTS = False  # Disabling this right now as it seems to cause vLLM refit issues with Ampere
-        if USE_EXPANDABLE_SEGMENTS:
-            return None, {"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True"}, None
-        else:
-            return None, None, None
-
     def is_alive(self):
         return True
 
diff --git a/nemo_rl/models/policy/utils.py b/nemo_rl/models/policy/utils.py
index 485dea9011..7e5e1b92fe 100644
--- a/nemo_rl/models/policy/utils.py
+++ b/nemo_rl/models/policy/utils.py
@@ -131,25 +131,57 @@ def sliding_window_overwrite(model_name: str) -> dict[str, Any]:
     return overwrite_dict
 
 
+def configure_expandable_segments() -> None:
+    """Configure expandable_segments on Hopper and newer architectures (compute capability 9.x+).
+
+    This helps with memory allocation but causes crashes on Ampere GPUs, so we only enable it
+    on newer architectures. If PYTORCH_CUDA_ALLOC_CONF is already set, preserves existing values.
+    """
+    compute_capability = torch.cuda.get_device_properties(0).major
+
+    if compute_capability >= 9:  # Hopper+
+        existing_conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
+
+        # Check if expandable_segments is already configured
+        if "expandable_segments" in existing_conf:
+            print(f"expandable_segments already configured: {existing_conf}")
+            # Already configured, don't override
+            return
+
+        # Add expandable_segments to existing configuration
+        if existing_conf:
+            # Append to existing configuration
+            new_conf = f"{existing_conf},expandable_segments:True"
+        else:
+            # Set new configuration
+            new_conf = "expandable_segments:True"
+
+        print(f"Setting PYTORCH_CUDA_ALLOC_CONF to {new_conf}")
+        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = new_conf
+
+    else:
+        ## make sure that expandable_segments is not set to True
+        if "expandable_segments" in os.environ.get("PYTORCH_CUDA_ALLOC_CONF", ""):
+            conf_items = os.environ["PYTORCH_CUDA_ALLOC_CONF"].split(",")
+            for item in conf_items:
+                if item.strip().startswith("expandable_segments"):
+                    key_value = item.split(":")
+                    if len(key_value) == 2 and key_value[1].strip().lower() == "true":
+                        raise RuntimeError(
+                            "expandable_segments is enabled in PYTORCH_CUDA_ALLOC_CONF, "
+                            "but this is not supported on architectures older than Hopper (compute capability < 9). "
+                            "Please set expandable_segments to False."
+                        )
+
+
 def get_runtime_env_for_policy_worker(policy_worker_name: str) -> dict[str, Any]:
-    """Get runtime environment configuration for DTensorPolicyWorker.
+    """Get runtime environment configuration for policy workers.
 
-    Conditionally enables expandable_segments on Hopper GPUs only,
-    as it causes crashes on Ampere GPUs.
+    Note: expandable_segments configuration is handled directly in the worker init methods
+    to ensure proper GPU detection after CUDA initialization.
     """
     runtime_env = {
         **get_nsight_config_if_pattern_matches(policy_worker_name),
     }
 
-    # Only enable expandable_segments on Hopper and newer architectures (compute capability 9.x+)
-    try:
-        compute_capability = torch.cuda.get_device_properties(0).major
-        if compute_capability >= 9:  # Hopper+
-            runtime_env["env_vars"] = {
-                "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True"
-            }
-    except Exception:
-        # If we can't detect GPU capability, don't enable expandable_segments for safety
-        pass
-
     return runtime_env
diff --git a/tests/unit/algorithms/__init__.py b/tests/unit/algorithms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/unit/distributed/test_worker_groups.py b/tests/unit/distributed/test_worker_groups.py
index 12131fe4a4..57976a94d8 100644
--- a/tests/unit/distributed/test_worker_groups.py
+++ b/tests/unit/distributed/test_worker_groups.py
@@ -301,6 +301,92 @@ def test_environment_variables_setup(register_test_actor, virtual_cluster):
     worker_group.shutdown(force=True)
 
 
+def test_custom_environment_variables(register_test_actor, virtual_cluster):
+    """Test that custom environment variables passed through env_vars are correctly set in workers."""
+    actor_fqn = register_test_actor
+    builder = RayWorkerBuilder(actor_fqn)
+
+    # Define custom environment variables to pass to workers
+    custom_env_vars = {
+        "CUSTOM_VAR_1": "test_value_1",
+        "CUSTOM_VAR_2": "test_value_2",
+        "NEMO_TEST_ENV": "nemo_test_value",
+        "DUMMY_TEST_VAR": "/custom/test/path",
+    }
+
+    # Create worker group with custom environment variables
+    worker_group = RayWorkerGroup(
+        cluster=virtual_cluster,
+        remote_worker_builder=builder,
+        workers_per_node=2,
+        env_vars=custom_env_vars.copy(),
+    )
+
+    assert len(worker_group.workers) == 2
+
+    # Check that all workers have the custom environment variables set
+    for i, worker in enumerate(worker_group.workers):
+        # Check each custom environment variable
+        for var_name, expected_value in custom_env_vars.items():
+            actual_value = ray.get(worker.get_env_var.remote(var_name))
+            assert actual_value == expected_value, (
+                f"Worker {i}: Expected {var_name}={expected_value}, got {actual_value}"
+            )
+
+        # Also verify that the standard distributed environment variables are still set
+        rank, ws, node_rank, local_rank = ray.get(
+            worker.get_rank_world_size_node_rank_local_rank.remote()
+        )
+        assert rank == str(i)
+        assert ws == "2"
+        assert node_rank == "0"
+        assert local_rank == str(i)
+
+    worker_group.shutdown(force=True)
+
+
+def test_custom_environment_variables_override_existing(
+    register_test_actor, virtual_cluster
+):
+    """Test that custom environment variables can override existing environment variables."""
+    actor_fqn = register_test_actor
+    builder = RayWorkerBuilder(actor_fqn)
+
+    # Set an environment variable in the current process
+    os.environ["DUMMY_PYTHONPATH"] = "/original/python/path"
+
+    # Define custom environment variables that override existing ones
+    custom_env_vars = {
+        "DUMMY_PYTHONPATH": "/overridden/python/path",
+        "CUSTOM_OVERRIDE": "overridden_value",
+    }
+
+    # Create worker group with custom environment variables
+    worker_group = RayWorkerGroup(
+        cluster=virtual_cluster,
+        remote_worker_builder=builder,
+        workers_per_node=1,
+        env_vars=custom_env_vars,
+    )
+
+    assert len(worker_group.workers) == 1
+    worker = worker_group.workers[0]
+
+    # Check that the custom environment variable overrides the original
+    pythonpath_value = ray.get(worker.get_env_var.remote("DUMMY_PYTHONPATH"))
+    assert pythonpath_value == "/overridden/python/path", (
+        f"Expected DUMMY_PYTHONPATH to be overridden, got {pythonpath_value}"
+    )
+
+    # Check that the new custom variable is set
+    custom_value = ray.get(worker.get_env_var.remote("CUSTOM_OVERRIDE"))
+    assert custom_value == "overridden_value", (
+        f"Expected CUSTOM_OVERRIDE=overridden_value, got {custom_value}"
+    )
+
+    worker_group.shutdown(force=True)
+
+
 def test_configure_worker_interaction(register_test_actor, virtual_cluster):
     actor_fqn = register_test_actor
     builder = RayWorkerBuilder(actor_fqn)
diff --git a/tests/unit/models/__init__.py b/tests/unit/models/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/unit/models/policy/test_utils.py b/tests/unit/models/policy/test_utils.py
new file mode 100644
index 0000000000..98faa01667
--- /dev/null
+++ b/tests/unit/models/policy/test_utils.py
@@ -0,0 +1,133 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+from unittest.mock import MagicMock, patch
+
+from nemo_rl.models.policy.utils import configure_expandable_segments
+
+
+class TestConfigureExpandableSegments(unittest.TestCase):
+    """Test cases for configure_expandable_segments function."""
+
+    def setUp(self):
+        """Set up test environment."""
+        # Store original environment variable
+        self.original_pytorch_cuda_alloc_conf = os.environ.get(
+            "PYTORCH_CUDA_ALLOC_CONF"
+        )
+
+    def tearDown(self):
+        """Clean up after tests."""
+        # Restore original environment variable
+        if self.original_pytorch_cuda_alloc_conf is not None:
+            os.environ["PYTORCH_CUDA_ALLOC_CONF"] = (
+                self.original_pytorch_cuda_alloc_conf
+            )
+        elif "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
+            del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
+
+    @patch("torch.cuda.get_device_properties")
+    def test_hopper_gpu_no_existing_config(self, mock_get_device_properties):
+        """Test Hopper+ GPU (compute capability >= 9) with no existing PYTORCH_CUDA_ALLOC_CONF."""
+        # Mock GPU properties for Hopper+ architecture
+        mock_device_properties = MagicMock()
+        mock_device_properties.major = 9
+        mock_get_device_properties.return_value = mock_device_properties
+
+        # Ensure no existing config
+        if "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
+            del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
+
+        # Call the function
+        configure_expandable_segments()
+
+        # Verify the environment variable was set correctly
+        self.assertEqual(
+            os.environ["PYTORCH_CUDA_ALLOC_CONF"], "expandable_segments:True"
+        )
+
+    @patch("torch.cuda.get_device_properties")
+    def test_hopper_gpu_with_existing_config(self, mock_get_device_properties):
+        """Test Hopper+ GPU with existing PYTORCH_CUDA_ALLOC_CONF."""
+        # Mock GPU properties for Hopper+ architecture
+        mock_device_properties = MagicMock()
+        mock_device_properties.major = 9
+        mock_get_device_properties.return_value = mock_device_properties
+
+        # Set existing config
+        existing_config = "max_split_size_mb:128"
+        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = existing_config
+
+        # Call the function
+        configure_expandable_segments()
+
+        # Verify the environment variable was updated correctly
+        expected_config = f"{existing_config},expandable_segments:True"
+        self.assertEqual(os.environ["PYTORCH_CUDA_ALLOC_CONF"], expected_config)
+
+    @patch("torch.cuda.get_device_properties")
+    def test_hopper_gpu_already_configured(self, mock_get_device_properties):
+        """Test Hopper+ GPU with existing config that already has expandable_segments."""
+        # Mock GPU properties for Hopper+ architecture
+        mock_device_properties = MagicMock()
+        mock_device_properties.major = 9
+        mock_get_device_properties.return_value = mock_device_properties
+
+        # Set existing config with expandable_segments already present
+        existing_config = "max_split_size_mb:128,expandable_segments:False"
+        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = existing_config
+
+        # Call the function
+        configure_expandable_segments()
+
+        # Verify the environment variable was not changed
+        self.assertEqual(os.environ["PYTORCH_CUDA_ALLOC_CONF"], existing_config)
+
+    @patch("torch.cuda.get_device_properties")
+    def test_ampere_gpu_no_config_change(self, mock_get_device_properties):
+        """Test Ampere GPU (compute capability < 9) should not modify config."""
+        # Mock GPU properties for Ampere architecture
+        mock_device_properties = MagicMock()
+        mock_device_properties.major = 8  # Ampere
+        mock_get_device_properties.return_value = mock_device_properties
+
+        # Set existing config
+        existing_config = "max_split_size_mb:128"
+        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = existing_config
+
+        # Call the function
+        configure_expandable_segments()
+
+        # Verify the environment variable was not changed
+        self.assertEqual(os.environ["PYTORCH_CUDA_ALLOC_CONF"], existing_config)
+
+    @patch("torch.cuda.get_device_properties")
+    def test_ampere_gpu_no_existing_config(self, mock_get_device_properties):
+        """Test Ampere GPU with no existing config should not set anything."""
+        # Mock GPU properties for Ampere architecture
+        mock_device_properties = MagicMock()
+        mock_device_properties.major = 8  # Ampere
+        mock_get_device_properties.return_value = mock_device_properties
+
+        # Ensure no existing config
+        if "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
+            del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
+
+        # Call the function
+        configure_expandable_segments()
+
+        # Verify the environment variable was not set
+        self.assertNotIn("PYTORCH_CUDA_ALLOC_CONF", os.environ)

From 6d385a2af0e6f1dca999792cde5ce86dbe6f398a Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Mon, 14 Jul 2025 09:16:56 -0700
Subject: [PATCH 29/59] feat: Qwen3 support (#592)

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
Signed-off-by: Anna Shors <ashors@nvidia.com>
Signed-off-by: ashors1 <ashors@nvidia.com>
Co-authored-by: Yi-Fu Wu <yifu.wu@gmail.com>
Co-authored-by: Guyue Huang <guyueh@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .github/workflows/cicd-main.yml               |   3 +-
 .gitmodules                                   |   2 +-
 3rdparty/NeMo-workspace/NeMo                  |   2 +-
 .../grpo_math_qwen30ba3b_megatron.yaml        |  76 +++++
 nemo_rl/models/megatron/community_import.py   |  19 +-
 nemo_rl/models/megatron/converters/common.py  |   7 +
 nemo_rl/models/megatron/converters/qwen3.py   |  89 ++++++
 .../models/policy/megatron_policy_worker.py   |  30 +-
 pyproject.toml                                |   3 +
 .../converters/test_qwen_conversion.py        | 284 ++++++++++++++++++
 10 files changed, 496 insertions(+), 19 deletions(-)
 create mode 100644 examples/configs/grpo_math_qwen30ba3b_megatron.yaml
 create mode 100644 nemo_rl/models/megatron/converters/qwen3.py
 create mode 100644 tests/unit/models/megatron/converters/test_qwen_conversion.py

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index a646b0d662..06a2022bbe 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -210,7 +210,8 @@ jobs:
       UNIT_TEST_SCRIPT: |
         cd /opt/nemo-rl
         if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L0|L1|L2)$ ]]; then
-          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-report=term-missing --cov-report=json
+          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-report=term-missing --cov-report=json -m \"not mcore\"
+          uv run --extra mcore bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-report=term-missing --cov-report=json  -m mcore
         else
           echo Skipping unit tests for docs-only level
         fi
diff --git a/.gitmodules b/.gitmodules
index 9421f926c4..2a588f3a89 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,7 +1,7 @@
 [submodule "3rdparty/NeMo"]
 	path = 3rdparty/NeMo-workspace/NeMo
 	url = https://github.com/NVIDIA/NeMo.git
-	branch = terryk/hemil/automodel-custom-loop-with-sahil-patch-yifu-patch
+	branch = ashors/nemorl-qwen3
 	shallow = true
 [submodule "3rdparty/Megatron-LM"]
 	path = 3rdparty/Megatron-LM-workspace/Megatron-LM
diff --git a/3rdparty/NeMo-workspace/NeMo b/3rdparty/NeMo-workspace/NeMo
index 0e0894300e..33259f2540 160000
--- a/3rdparty/NeMo-workspace/NeMo
+++ b/3rdparty/NeMo-workspace/NeMo
@@ -1 +1 @@
-Subproject commit 0e0894300e09aca042bc07859f660f22858f0a9f
+Subproject commit 33259f2540af6eef375d43fc48bdcbd7ec490c29
diff --git a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
new file mode 100644
index 0000000000..915babbf5c
--- /dev/null
+++ b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
@@ -0,0 +1,76 @@
+# GRPO Algorithm Configuration
+defaults: "grpo_math_1B_megatron.yaml"
+
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+
+policy:
+  model_name: "Qwen/Qwen3-30B-A3B"
+  tokenizer:
+    name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+  train_global_batch_size: 512
+  train_micro_batch_size: 1
+  generation_batch_size: 32 # Only used when generating using HF backend
+  logprob_batch_size: 4
+  max_total_sequence_length: 4096
+  precision: "bfloat16"
+  fsdp_offload_enabled: false
+  activation_checkpointing_enabled: false
+
+  dtensor_cfg:
+    enabled: false
+
+  optimizer: null # remove default FSDP optimizer
+
+  scheduler: null # remove default FSDP scheduler
+
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1
+    converter_type: "LlamaForCausalLM"
+    tensor_model_parallel_size: 4
+    pipeline_model_parallel_size: 4
+    context_parallel_size: 1
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 4
+    sequence_parallel: True
+    pipeline_dtype: ${policy.precision}
+
+    optimizer:
+      optimizer: "adam"
+      lr: 3.0e-7
+      min_lr: 3.0e-8
+      weight_decay: 0.01
+      bf16: true
+      fp16: false
+      params_dtype: "float32"
+
+    scheduler:
+      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      weight_decay_incr_style: "constant"
+      lr_decay_style: "constant"
+      lr_decay_iters: null
+      lr_warmup_iters: 50
+      lr_warmup_init: 3.0e-8
+    
+    env_vars:
+      PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
+
+  generation:
+    backend: "vllm"
+    max_new_tokens: ${policy.max_total_sequence_length}
+    temperature: 1.0
+    top_p: 1.0
+    top_k: null
+    stop_token_ids: null
+    stop_strings: null
+    vllm_cfg:
+      tensor_parallel_size: 4
+      gpu_memory_utilization: 0.8
+      max_model_len: ${policy.max_total_sequence_length}
+
+cluster:
+  gpus_per_node: 8
+  num_nodes: 8
diff --git a/nemo_rl/models/megatron/community_import.py b/nemo_rl/models/megatron/community_import.py
index d5ef60c684..fdaf2f3690 100644
--- a/nemo_rl/models/megatron/community_import.py
+++ b/nemo_rl/models/megatron/community_import.py
@@ -35,6 +35,14 @@ def import_model_from_hf_name(hf_model_name: str, output_path: str):
             hf_model_name,
             output_path=output_path,
         )
+    elif hf_config.model_type in ("qwen3", "qwen3_moe"):
+        from nemo.tron.converter.qwen import HFQwen3Importer
+
+        print(f"Importing model {hf_model_name} to {output_path}...")
+        importer = HFQwen3Importer(
+            hf_model_name,
+            output_path=output_path,
+        )
     elif hf_config.model_type in ("deepseek_v2", "deepseek_v3"):
         from nemo.tron.converter.deepseek import HFDeepSeekImporter
 
@@ -44,7 +52,10 @@ def import_model_from_hf_name(hf_model_name: str, output_path: str):
             output_path=output_path,
         )
     else:
-        raise ValueError(f"Unknown model_type: {hf_config.model_type}")
+        raise ValueError(
+            f"Unknown model type: {hf_config.model_type}. Currently, DeepSeek, Qwen and Llama are supported. "
+            "If you'd like to run with a different model, please raise an issue or consider adding your own converter."
+        )
     importer.apply()
     # resetting mcore state
     import megatron.core.rerun_state_machine
@@ -64,11 +75,13 @@ def export_model_from_megatron(
             f"HF checkpoint already exists at {output_path}. Delete it to run or set overwrite=True."
         )
 
-    if "llama" in hf_model_name.lower():
+    hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
+
+    if hf_config.model_type == "llama":
         from nemo.tron.converter.llama import HFLlamaExporter
 
         exporter_cls = HFLlamaExporter
-    elif "qwen" in hf_model_name.lower():
+    elif hf_config.model_type == "qwen2":
         from nemo.tron.converter.qwen import HFQwen2Exporter
 
         exporter_cls = HFQwen2Exporter
diff --git a/nemo_rl/models/megatron/converters/common.py b/nemo_rl/models/megatron/converters/common.py
index 80be5e5755..42bdf4b44a 100644
--- a/nemo_rl/models/megatron/converters/common.py
+++ b/nemo_rl/models/megatron/converters/common.py
@@ -32,6 +32,7 @@
 import nemo_rl.models.megatron.converters.deepseek as deepseek_converter
 import nemo_rl.models.megatron.converters.llama as llama_converter
 import nemo_rl.models.megatron.converters.qwen2 as qwen2_converter
+import nemo_rl.models.megatron.converters.qwen3 as qwen3_converter
 
 _GROUP_TO_RANKS_CACHE = {}
 
@@ -280,6 +281,12 @@ def __init__(self, hf_model_name, megatron_model):
             self.get_source_fn = lambda source_state_dict, _: _ModelState(
                 source_state_dict
             )
+        elif config.model_type in ("qwen3", "qwen3_moe"):
+            self.export_mapping = qwen3_converter.get_export_mapping(config)
+            self.export_transforms = qwen3_converter.get_export_transforms(config)
+            self.get_source_fn = lambda source_state_dict, _: _ModelState(
+                source_state_dict
+            )
         elif config.model_type == "llama":
             self.export_mapping = llama_converter.get_export_mapping()
             self.export_transforms = llama_converter.get_export_transforms(config)
diff --git a/nemo_rl/models/megatron/converters/qwen3.py b/nemo_rl/models/megatron/converters/qwen3.py
new file mode 100644
index 0000000000..1dcb278106
--- /dev/null
+++ b/nemo_rl/models/megatron/converters/qwen3.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo.lightning import io
+from nemo.lightning.io.state import TransformFns
+
+
+def get_export_mapping(config):
+    mapping = {
+        "**.self_attention.linear_proj.weight": "**.self_attn.o_proj.weight",
+        "**.self_attention.linear_qkv.layer_norm_weight": "**.input_layernorm.weight",
+        "**.self_attention.q_layernorm.weight": "**.self_attn.q_norm.weight",
+        "**.self_attention.k_layernorm.weight": "**.self_attn.k_norm.weight",
+        "decoder.final_layernorm.weight": "model.norm.weight",
+    }
+    is_moe = getattr(config, "num_experts", 0) > 0
+    if is_moe:
+        mapping.update(
+            {
+                "**.mlp.experts.linear_fc2.weight*": "**.mlp.experts.*.down_proj.weight",
+                "**.mlp.router.weight": "**.mlp.gate.weight",
+                "**.pre_mlp_layernorm.weight": "**.post_attention_layernorm.weight",
+            }
+        )
+    else:
+        mapping.update(
+            {
+                "**.mlp.linear_fc2.weight": "**.mlp.down_proj.weight",
+                "**.mlp.linear_fc1.layer_norm_weight": "**.post_attention_layernorm.weight",
+            }
+        )
+    return mapping
+
+
+def get_export_transforms(config):
+    is_moe = getattr(config, "num_experts", 0) > 0
+    transforms = [
+        io.state_transform(
+            source_key="**.self_attention.linear_qkv.weight",
+            target_key=(
+                "**.self_attn.q_proj.weight",
+                "**.self_attn.k_proj.weight",
+                "**.self_attn.v_proj.weight",
+            ),
+            fn=TransformFns.split_qkv,
+        ),
+        (
+            io.state_transform(
+                source_key="**.mlp.linear_fc1.weight",
+                target_key=("**.mlp.gate_proj.weight", "**.mlp.up_proj.weight"),
+                fn=TransformFns.split_fc1,
+            )
+            if not is_moe
+            else io.state_transform(
+                source_key="**.mlp.experts.linear_fc1.weight*",
+                target_key=(
+                    "**.mlp.experts.*.gate_proj.weight",
+                    "**.mlp.experts.*.up_proj.weight",
+                ),
+                fn=TransformFns.split_fc1,
+            )
+        ),
+        io.state_transform(
+            source_key="embedding.word_embeddings.weight",
+            target_key="model.embed_tokens.weight",
+            fn=TransformFns.prune_padding,
+        ),
+    ]
+    if not config.tie_word_embeddings:
+        transforms.append(
+            io.state_transform(
+                source_key="output_layer.weight",
+                target_key="lm_head.weight",
+                fn=TransformFns.prune_padding,
+            )
+        )
+
+    return transforms
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 7f35e55af6..691e1ce5b3 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -458,25 +458,15 @@ def __init__(
         assert model_cfg.context_parallel_size == 1, (
             "Context parallel is not supported right now"
         )
+
+        ## moe-related
         model_cfg.expert_tensor_parallel_size = self.cfg["megatron_cfg"][
             "expert_tensor_parallel_size"
         ]
         model_cfg.expert_model_parallel_size = self.cfg["megatron_cfg"][
             "expert_model_parallel_size"
         ]
-        model_cfg.sequence_parallel = self.cfg["megatron_cfg"]["sequence_parallel"]
-        model_cfg.bf16 = self.dtype == torch.bfloat16
-        model_cfg.fp16 = self.dtype == torch.float16
-        if model_cfg.fp16:
-            assert not model_cfg.bf16, "fp16 and bf16 cannot be used together"
-            model_cfg.params_dtype = torch.float16
-        elif model_cfg.bf16:
-            assert not model_cfg.fp16, "fp16 and bf16 cannot be used together"
-            model_cfg.params_dtype = torch.bfloat16
-        else:
-            model_cfg.params_dtype = torch.float32
-        model_cfg.pipeline_dtype = dtype_map[self.cfg["megatron_cfg"]["pipeline_dtype"]]
-        model_cfg.parallel_output = True
+
         # Setting moe_router_dtype to higher precision (e.g. fp64) can improve numerical stability,
         # especially when using many experts.
         model_cfg.moe_router_dtype = self.cfg["megatron_cfg"]["moe_router_dtype"]
@@ -493,6 +483,20 @@ def __init__(
         model_cfg.moe_router_bias_update_rate = self.cfg["megatron_cfg"][
             "moe_router_bias_update_rate"
         ]
+
+        model_cfg.sequence_parallel = self.cfg["megatron_cfg"]["sequence_parallel"]
+        model_cfg.bf16 = self.dtype == torch.bfloat16
+        model_cfg.fp16 = self.dtype == torch.float16
+        if model_cfg.fp16:
+            assert not model_cfg.bf16, "fp16 and bf16 cannot be used together"
+            model_cfg.params_dtype = torch.float16
+        elif model_cfg.bf16:
+            assert not model_cfg.fp16, "fp16 and bf16 cannot be used together"
+            model_cfg.params_dtype = torch.bfloat16
+        else:
+            model_cfg.params_dtype = torch.float32
+        model_cfg.pipeline_dtype = dtype_map[self.cfg["megatron_cfg"]["pipeline_dtype"]]
+        model_cfg.parallel_output = True
         if self.cfg["megatron_cfg"]["activation_checkpointing"]:
             model_cfg.activations_checkpoint_granularity = "full"
             model_cfg.activations_checkpoint_method = "uniform"
diff --git a/pyproject.toml b/pyproject.toml
index a17cced3e8..62b78d6d4d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -172,6 +172,9 @@ exclude = '''
 addopts = "--durations=15 -s -rA -x"
 testpaths = ["tests"]
 python_files = "test_*.py"
+markers = [
+    "mcore: marks tests that require the mcore extra",
+]
 
 [tool.coverage.run]
 concurrency = ["thread", "multiprocessing"]
diff --git a/tests/unit/models/megatron/converters/test_qwen_conversion.py b/tests/unit/models/megatron/converters/test_qwen_conversion.py
new file mode 100644
index 0000000000..2debebaee0
--- /dev/null
+++ b/tests/unit/models/megatron/converters/test_qwen_conversion.py
@@ -0,0 +1,284 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import socket
+from contextlib import contextmanager
+from tempfile import TemporaryDirectory
+
+import pytest
+import torch
+import torch.distributed as dist
+from transformers import AutoConfig, AutoModelForCausalLM
+
+
+@contextmanager
+def temporary_distributed_context():
+    if "MASTER_ADDR" in os.environ and "MASTER_PORT" in os.environ:
+        init_method = None
+    else:
+        # Find an available port dynamically
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(("localhost", 0))
+            addr, port = s.getsockname()
+
+        init_method = f"tcp://{addr}:{port}"
+
+    dist.init_process_group(
+        backend="gloo", init_method=init_method, world_size=1, rank=0
+    )
+
+    from megatron.core import parallel_state
+
+    parallel_state.initialize_model_parallel()
+
+    from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
+
+    model_parallel_cuda_manual_seed(42)
+
+    try:
+        yield
+    finally:
+        parallel_state.destroy_model_parallel()
+        dist.destroy_process_group()
+
+
+def dummy_qwen3_megatron_moe_config():
+    from nemo.collections.llm.gpt.model.qwen3 import Qwen3MoEConfig
+
+    return Qwen3MoEConfig(
+        num_layers=2,
+        hidden_size=64,
+        num_attention_heads=4,
+        num_query_groups=2,
+        ffn_hidden_size=128,
+        moe_ffn_hidden_size=32,
+        num_moe_experts=2,
+        share_embeddings_and_output_weights=True,
+        kv_channels=16,
+    )
+
+
+def dummy_qwen3_megatron_dense_config():
+    from nemo.collections.llm.gpt.model.qwen3 import Qwen3Config
+
+    return Qwen3Config(
+        num_layers=2,
+        hidden_size=64,
+        num_attention_heads=4,
+        num_query_groups=2,
+        ffn_hidden_size=128,
+        share_embeddings_and_output_weights=False,
+        kv_channels=16,
+    )
+
+
+def create_dummy_hf_moe_config():
+    """Create a dummy HF MoE config and save it to a temporary directory."""
+    # Create a minimal HF config that matches the megatron config
+    hf_config = AutoConfig.from_pretrained("Qwen/Qwen3-30B-A3B", trust_remote_code=True)
+
+    # Update config to match our dummy megatron config
+    hf_config.num_hidden_layers = 2
+    hf_config.hidden_size = 64
+    hf_config.num_attention_heads = 4
+    hf_config.num_key_value_heads = 2
+    hf_config.intermediate_size = 128
+    hf_config.moe_intermediate_size = 32
+    hf_config.num_experts = 2
+    hf_config.tie_word_embeddings = True
+    hf_config.head_dim = 16
+
+    return hf_config
+
+
+def create_dummy_hf_dense_config():
+    """Create a dummy HF dense config and save it to a temporary directory."""
+    # Create a minimal HF config that matches the megatron config
+    hf_config = AutoConfig.from_pretrained("Qwen/Qwen3-4B", trust_remote_code=True)
+
+    # Update config to match our dummy megatron config
+    hf_config.num_hidden_layers = 2
+    hf_config.hidden_size = 64
+    hf_config.num_attention_heads = 4
+    hf_config.num_key_value_heads = 2
+    hf_config.intermediate_size = 128
+    hf_config.tie_word_embeddings = False
+    hf_config.head_dim = 16
+
+    return hf_config
+
+
+def create_model_and_converter(megatron_config, hf_config, model_name):
+    """Create megatron model and converter for testing."""
+
+    from nemo.collections.llm.gpt.model.qwen3 import Qwen3Model
+
+    from nemo_rl.models.megatron.converters.common import MegatronToHFConverter
+
+    # Create megatron model
+    model = Qwen3Model(megatron_config)
+    model.configure_model()
+
+    # Create dummy HF config and save to temporary directory
+    with TemporaryDirectory() as tmp_dir:
+        hf_dir = os.path.join(tmp_dir, model_name)
+        hf_config.save_pretrained(hf_dir)
+
+        # Create a dummy HF model to get the model class
+        dummy_model = AutoModelForCausalLM.from_config(
+            hf_config, trust_remote_code=True
+        )
+        dummy_model.save_pretrained(hf_dir)
+
+        original_state_dict = model.module.state_dict()
+
+        converter = MegatronToHFConverter(
+            hf_model_name=hf_dir,
+            megatron_model=model.module,
+        )
+
+        converted_state_dict = converter.convert(original_state_dict, model.config)
+
+        # Filter out _extra_state keys
+        original_state_dict = {
+            k: v for k, v in original_state_dict.items() if "_extra_state" not in k
+        }
+
+        return original_state_dict, converted_state_dict, hf_config, model
+
+
+def calculate_chunk_sizes(hf_config):
+    """Calculate chunk sizes for QKV tensor splitting."""
+    q_chunk_size = hf_config.head_dim * (
+        hf_config.num_attention_heads // hf_config.num_key_value_heads
+    )
+    kv_chunk_size = hf_config.head_dim * 2
+    return q_chunk_size, kv_chunk_size
+
+
+def assert_attention_tensors_match(
+    original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
+):
+    """Assert that attention tensors match between original and converted state dicts."""
+    # Check q_layernorm
+    torch.testing.assert_close(
+        original_state_dict["decoder.layers.0.self_attention.q_layernorm.weight"],
+        converted_state_dict["model.layers.0.self_attn.q_norm.weight"],
+    )
+
+    # Check first layer q_proj
+    torch.testing.assert_close(
+        original_state_dict["decoder.layers.0.self_attention.linear_qkv.weight"][
+            :q_chunk_size
+        ],
+        converted_state_dict["model.layers.0.self_attn.q_proj.weight"][:q_chunk_size],
+    )
+
+    # Check second layer q_proj
+    torch.testing.assert_close(
+        original_state_dict["decoder.layers.1.self_attention.linear_qkv.weight"][
+            (q_chunk_size + kv_chunk_size) : (2 * q_chunk_size + kv_chunk_size)
+        ],
+        converted_state_dict["model.layers.1.self_attn.q_proj.weight"][
+            q_chunk_size : (2 * q_chunk_size)
+        ],
+    )
+
+
+@pytest.mark.mcore
+def test_conversion_to_hf_moe():
+    """Test conversion of Qwen3 MoE model to HF format."""
+    with temporary_distributed_context():
+        mcore_config = dummy_qwen3_megatron_moe_config()
+        hf_config = create_dummy_hf_moe_config()
+
+        original_state_dict, converted_state_dict, hf_config, model = (
+            create_model_and_converter(mcore_config, hf_config, "Qwen3-tiny-test-moe")
+        )
+
+        # Check that the number of keys in the original state dict is equal to the number of keys in the converted state dict minus the number of extra state keys
+        # taking into account the qkv merging and the merging of the up and gate projections
+        assert len(original_state_dict) == len(converted_state_dict) - (
+            2 * hf_config.num_hidden_layers
+            + (hf_config.num_hidden_layers * hf_config.num_experts)
+        )
+
+        q_chunk_size, kv_chunk_size = calculate_chunk_sizes(hf_config)
+
+        # Check attention tensors
+        assert_attention_tensors_match(
+            original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
+        )
+
+        # Check MoE MLP tensors
+        torch.testing.assert_close(
+            original_state_dict["decoder.layers.1.mlp.experts.linear_fc1.weight0"][
+                mcore_config.moe_ffn_hidden_size :
+            ],
+            converted_state_dict["model.layers.1.mlp.experts.0.up_proj.weight"],
+        )
+        torch.testing.assert_close(
+            original_state_dict["decoder.layers.1.mlp.experts.linear_fc1.weight0"][
+                : mcore_config.moe_ffn_hidden_size
+            ],
+            converted_state_dict["model.layers.1.mlp.experts.0.gate_proj.weight"],
+        )
+        torch.testing.assert_close(
+            original_state_dict["decoder.layers.0.mlp.experts.linear_fc2.weight1"],
+            converted_state_dict["model.layers.0.mlp.experts.1.down_proj.weight"],
+        )
+
+
+@pytest.mark.mcore
+def test_conversion_to_hf_dense():
+    """Test conversion of Qwen3 dense model to HF format."""
+    with temporary_distributed_context():
+        mcore_config = dummy_qwen3_megatron_dense_config()
+        hf_config = create_dummy_hf_dense_config()
+
+        original_state_dict, converted_state_dict, hf_config, model = (
+            create_model_and_converter(mcore_config, hf_config, "Qwen3-tiny-test-dense")
+        )
+
+        # Check that the number of keys in the original state dict is equal to the number of keys in the converted state dict minus the number of extra state keys
+        # taking into account the qkv merging and the merging of the up and gate projections
+        assert len(original_state_dict) == len(converted_state_dict) - (
+            3 * hf_config.num_hidden_layers
+        )
+
+        q_chunk_size, kv_chunk_size = calculate_chunk_sizes(hf_config)
+
+        # Check attention tensors
+        assert_attention_tensors_match(
+            original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
+        )
+
+        # Check dense MLP tensors
+        torch.testing.assert_close(
+            original_state_dict["decoder.layers.1.mlp.linear_fc1.weight"][
+                mcore_config.ffn_hidden_size :
+            ],
+            converted_state_dict["model.layers.1.mlp.up_proj.weight"],
+        )
+        torch.testing.assert_close(
+            original_state_dict["decoder.layers.1.mlp.linear_fc1.weight"][
+                : mcore_config.ffn_hidden_size
+            ],
+            converted_state_dict["model.layers.1.mlp.gate_proj.weight"],
+        )
+        torch.testing.assert_close(
+            original_state_dict["decoder.layers.0.mlp.linear_fc2.weight"],
+            converted_state_dict["model.layers.0.mlp.down_proj.weight"],
+        )

From 883d573ff01c64eab04446a68bfc45b61f73ed88 Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Mon, 14 Jul 2025 10:55:52 -0700
Subject: [PATCH 30/59] fix: Fix megatron llama3.1-8b config (#652)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
Co-authored-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/grpo_math_8B_megatron.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/configs/grpo_math_8B_megatron.yaml b/examples/configs/grpo_math_8B_megatron.yaml
index 41ce4412fc..fc839c8239 100644
--- a/examples/configs/grpo_math_8B_megatron.yaml
+++ b/examples/configs/grpo_math_8B_megatron.yaml
@@ -34,7 +34,8 @@ policy:
     empty_unused_memory_level: 0
     converter_type: "LlamaForCausalLM"
     tensor_model_parallel_size: 1
-    pipeline_model_parallel_size: 1
+    # On H100, can run with pp=1 for better performance with expandable segments (which is enabled by default)
+    pipeline_model_parallel_size: 2
     context_parallel_size: 1
     pipeline_dtype: ${policy.precision}
 
@@ -66,7 +67,7 @@ policy:
     stop_strings: null
     vllm_cfg:
       tensor_parallel_size: 1
-      gpu_memory_utilization: 0.95
+      gpu_memory_utilization: 0.8
       max_model_len: ${policy.max_total_sequence_length}
 
 cluster:

From aa3cb34b790ad322c8742f4e54d9db58e086aeb4 Mon Sep 17 00:00:00 2001
From: yuki <48991475+yuki-666@users.noreply.github.com>
Date: Tue, 15 Jul 2025 02:43:57 +0800
Subject: [PATCH 31/59] fix: update qwen32b config (#658)

Signed-off-by: Yuki Huang <yukih@nvidia.com>
Signed-off-by: Terry Kong <terryk@nvidia.com>
Co-authored-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 README.md                                                 | 4 ++--
 examples/configs/grpo_math_8B.yaml                        | 3 +--
 ...rpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml} | 8 ++++----
 ... => grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml} | 8 ++++----
 ... grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh} | 2 +-
 ...sh => grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh} | 2 +-
 tests/test_suites/nightly.txt                             | 2 +-
 tests/test_suites/release.txt                             | 2 +-
 8 files changed, 15 insertions(+), 16 deletions(-)
 rename examples/configs/recipes/llm/{grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml => grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml} (93%)
 rename examples/configs/recipes/llm/{grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml => grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml} (93%)
 rename tests/test_suites/llm/{grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh => grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh} (98%)
 rename tests/test_suites/llm/{grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh => grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh} (98%)

diff --git a/README.md b/README.md
index 2e59d3b191..3cd472c6ad 100644
--- a/README.md
+++ b/README.md
@@ -209,14 +209,14 @@ The required `CONTAINER` can be built by following the instructions in the [Dock
 This section outlines how to run GRPO for Qwen2.5-32B with a 16k sequence length.
 ```sh
 # Run from the root of NeMo RL repo
-NUM_ACTOR_NODES=16
+NUM_ACTOR_NODES=32
 
 # Download Qwen before the job starts to avoid spending time downloading during the training loop
 HF_HOME=/path/to/hf_home huggingface-cli download Qwen/Qwen2.5-32B
 
 # Ensure HF_HOME is included in your MOUNTS
 HF_HOME=/path/to/hf_home \
-COMMAND="uv run ./examples/run_grpo_math.py --config examples/configs/grpo_math_8B.yaml policy.model_name='Qwen/Qwen2.5-32B' policy.generation.vllm_cfg.tensor_parallel_size=4 policy.max_total_sequence_length=16384 cluster.num_nodes=${NUM_ACTOR_NODES} policy.dtensor_cfg.enabled=True policy.dtensor_cfg.tensor_parallel_size=8 policy.dtensor_cfg.sequence_parallel=True policy.dtensor_cfg.activation_checkpointing=True policy.dynamic_batching.train_mb_tokens=16384 policy.dynamic_batching.logprob_mb_tokens=32768 checkpointing.checkpoint_dir='results/qwen2.5-32b' logger.wandb_enabled=True logger.wandb.name='qwen2.5-32b'" \
+COMMAND="uv run ./examples/run_grpo_math.py --config examples/configs/grpo_math_8B.yaml policy.model_name='Qwen/Qwen2.5-32B' policy.generation.vllm_cfg.tensor_parallel_size=4 policy.max_total_sequence_length=16384 cluster.num_nodes=${NUM_ACTOR_NODES} policy.dtensor_cfg.enabled=True policy.dtensor_cfg.tensor_parallel_size=8 policy.dtensor_cfg.sequence_parallel=True policy.dtensor_cfg.activation_checkpointing=True checkpointing.checkpoint_dir='results/qwen2.5-32b' logger.wandb_enabled=True logger.wandb.name='qwen2.5-32b'" \
 CONTAINER=YOUR_CONTAINER \
 MOUNTS="$PWD:$PWD" \
 sbatch \
diff --git a/examples/configs/grpo_math_8B.yaml b/examples/configs/grpo_math_8B.yaml
index a857b08858..6a958957c4 100644
--- a/examples/configs/grpo_math_8B.yaml
+++ b/examples/configs/grpo_math_8B.yaml
@@ -22,8 +22,7 @@ policy:
     enabled: True
 
   dynamic_batching:
-    train_mb_tokens: 4096
-    logprob_mb_tokens: 8192
+    enabled: True
 
   optimizer:
     name: "torch.optim.AdamW"
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
similarity index 93%
rename from examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
rename to examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
index 2a1a151ea5..15665e9af8 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
@@ -19,7 +19,7 @@ loss_fn:
   token_level_loss: true
 checkpointing:
   enabled: true
-  checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long
+  checkpoint_dir: results/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long
   metric_name: val_reward
   higher_is_better: true
   keep_top_k: 3
@@ -105,18 +105,18 @@ env:
   math:
     num_workers: 8
 logger:
-  log_dir: logs/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long
+  log_dir: logs/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
   monitor_gpus: true
   wandb:
     project: nemo-rl
-    name: grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long
+    name: grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long
   tensorboard: {}
   gpu_monitoring:
     collection_interval: 10
     flush_interval: 10
 cluster:
   gpus_per_node: 8
-  num_nodes: 16
+  num_nodes: 32
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
similarity index 93%
rename from examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
rename to examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
index 06ae6b4637..851ed41ef1 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
@@ -19,7 +19,7 @@ loss_fn:
   token_level_loss: true
 checkpointing:
   enabled: true
-  checkpoint_dir: results/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt
+  checkpoint_dir: results/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt
   metric_name: val_reward
   higher_is_better: true
   keep_top_k: 3
@@ -105,18 +105,18 @@ env:
   math:
     num_workers: 8
 logger:
-  log_dir: logs/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt
+  log_dir: logs/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
   monitor_gpus: true
   wandb:
     project: nemo-rl
-    name: grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt
+    name: grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt
   tensorboard: {}
   gpu_monitoring:
     collection_interval: 10
     flush_interval: 10
 cluster:
   gpus_per_node: 8
-  num_nodes: 16
+  num_nodes: 32
diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh
similarity index 98%
rename from tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh
rename to tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh
index 69c9899ccd..7d1fdc2858 100755
--- a/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh
+++ b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh
@@ -3,7 +3,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 source $SCRIPT_DIR/common.env
 
 # ===== BEGIN CONFIG =====
-NUM_NODES=16
+NUM_NODES=32
 STEPS_PER_RUN=10
 MAX_STEPS=20
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh
similarity index 98%
rename from tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh
rename to tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh
index ccdef1b2bd..e96f3de7fa 100755
--- a/tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh
+++ b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh
@@ -3,7 +3,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 source $SCRIPT_DIR/common.env
 
 # ===== BEGIN CONFIG =====
-NUM_NODES=16
+NUM_NODES=32
 STEPS_PER_RUN=2  # 40min: step_time: [1341, 801]
 MAX_STEPS=2
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
diff --git a/tests/test_suites/nightly.txt b/tests/test_suites/nightly.txt
index 59503c10be..d28e61a8e6 100644
--- a/tests/test_suites/nightly.txt
+++ b/tests/test_suites/nightly.txt
@@ -11,7 +11,7 @@ tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
 tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh
 
 # Functional 32b run
-tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt.v3.sh
+tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh
 
 #######
 # SFT #
diff --git a/tests/test_suites/release.txt b/tests/test_suites/release.txt
index 2304100ff9..e339ef0bc1 100644
--- a/tests/test_suites/release.txt
+++ b/tests/test_suites/release.txt
@@ -6,7 +6,7 @@
 tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh
 
 # Long 32b run
-tests/test_suites/llm/grpo-qwen2.5-32b-16n8g-fsdp2tp8sp-actckpt-long.v3.sh
+tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh
 
 # Long Gemma3 27b run
 tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh

From a27ab0078e6c4917918091b61e478379274be528 Mon Sep 17 00:00:00 2001
From: Parth Chadha <pchadha@nvidia.com>
Date: Mon, 14 Jul 2025 11:48:31 -0700
Subject: [PATCH 32/59] fix: Make trust_remote_code default true in checkpoint
 (#663)

Signed-off-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/utils/native_checkpoint.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nemo_rl/utils/native_checkpoint.py b/nemo_rl/utils/native_checkpoint.py
index 43d511bd74..8d21aae7d3 100644
--- a/nemo_rl/utils/native_checkpoint.py
+++ b/nemo_rl/utils/native_checkpoint.py
@@ -244,7 +244,7 @@ def convert_dcp_to_hf(
     )
     torch.save(state_dict["model"], weights_path)
 
-    config = AutoConfig.from_pretrained(model_name_or_path)
+    config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
     config.save_pretrained(hf_ckpt_path)
 
     # TODO: After the following PR gets merged:
@@ -253,7 +253,9 @@ def convert_dcp_to_hf(
     # We can expose a arg at the top level --tokenizer_path to plumb that through.
     # This is more stable than relying on the current NeMo-RL get_tokenizer() which can
     # change release to release.
-    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path)
+    tokenizer = AutoTokenizer.from_pretrained(
+        tokenizer_name_or_path, trust_remote_code=True
+    )
     tokenizer.save_pretrained(hf_ckpt_path)
 
     return hf_ckpt_path

From c06c264d2b1553c85f06a1ad0a7b5efe8199ac0f Mon Sep 17 00:00:00 2001
From: Terry Kong <terrycurtiskong@gmail.com>
Date: Mon, 14 Jul 2025 11:59:47 -0700
Subject: [PATCH 33/59] feat: add script to redact hparam paths from
 tensorboard logs (#347)

Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .../tools/test_copy_tbevent_maybe_redact.py   | 330 ++++++++++++++
 tools/__init__.py                             |   0
 tools/copy_tbevent_maybe_redact.py            | 407 ++++++++++++++++++
 tools/package_release_runs.sh                 |  19 +-
 4 files changed, 749 insertions(+), 7 deletions(-)
 create mode 100644 tests/unit/tools/test_copy_tbevent_maybe_redact.py
 create mode 100644 tools/__init__.py
 create mode 100644 tools/copy_tbevent_maybe_redact.py

diff --git a/tests/unit/tools/test_copy_tbevent_maybe_redact.py b/tests/unit/tools/test_copy_tbevent_maybe_redact.py
new file mode 100644
index 0000000000..ea16d7e4cf
--- /dev/null
+++ b/tests/unit/tools/test_copy_tbevent_maybe_redact.py
@@ -0,0 +1,330 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import filecmp
+import glob
+import os
+import sys
+import tempfile
+from unittest.mock import patch
+
+import pytest
+
+# Make sure the script can be imported
+script_dir = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "..", "..", "..", "tools")
+)
+sys.path.insert(0, script_dir)
+
+import copy_tbevent_maybe_redact as script_under_test
+
+# Needed for reading back and verifying hparams
+from tensorboard.backend.event_processing import event_accumulator
+from tensorboard.plugins.hparams import plugin_data_pb2
+from torch.utils.tensorboard import SummaryWriter
+
+
+def extract_hparams_from_event_file(file_path: str) -> dict | None:
+    """Extracts HParams dictionary from a single TensorBoard event file."""
+    try:
+        ea = event_accumulator.EventAccumulator(
+            file_path,
+            size_guidance=script_under_test.SIZE_GUIDANCE_META,  # Use defined size guidance
+        )
+        ea.Reload()  # Load the events
+
+        # Check for HParams using the specific tag and plugin name
+        if script_under_test.HPARAMS_TAG in ea.summary_metadata:
+            metadata = ea.summary_metadata[script_under_test.HPARAMS_TAG]
+            if (
+                metadata.plugin_data
+                and metadata.plugin_data.plugin_name
+                == script_under_test.HPARAMS_PLUGIN_NAME
+            ):
+                try:
+                    plugin_data_proto = plugin_data_pb2.HParamsPluginData.FromString(
+                        metadata.plugin_data.content
+                    )
+                    if plugin_data_proto.HasField("session_start_info"):
+                        # Convert the protobuf map to a standard Python dict
+                        hparams_dict = {}
+                        for (
+                            key,
+                            hparam_value,
+                        ) in plugin_data_proto.session_start_info.hparams.items():
+                            if hparam_value.HasField("string_value"):
+                                hparams_dict[key] = hparam_value.string_value
+                            elif hparam_value.HasField("number_value"):
+                                hparams_dict[key] = (
+                                    hparam_value.number_value
+                                )  # Store as float/int
+                            elif hparam_value.HasField("bool_value"):
+                                hparams_dict[key] = hparam_value.bool_value
+                        return hparams_dict
+                    else:
+                        print(
+                            f"Warning: HParams data in {file_path} missing 'session_start_info' field.",
+                            file=sys.stderr,
+                        )
+                except Exception as e:
+                    print(
+                        f"Warning: Failed to parse HParamsPluginData from {file_path}: {e}",
+                        file=sys.stderr,
+                    )
+                    return None  # Indicate parsing failure
+            else:
+                print(
+                    f"Warning: HParams tag '{script_under_test.HPARAMS_TAG}' found in {file_path}, but plugin data is missing or not for HParams plugin.",
+                    file=sys.stderr,
+                )
+        else:
+            # This is normal if the file doesn't contain HParams with the specific tag
+            pass  # print(f"Debug: No HParams tag '{script_under_test.HPARAMS_TAG}' found in summary metadata for {file_path}.")
+    except Exception as e:
+        print(
+            f"Warning: Failed to load or process event file {file_path} for HParams extraction: {e}",
+            file=sys.stderr,
+        )
+        return None  # Indicate loading failure
+
+    return None  # No HParams found or other issue
+
+
+def create_tfevents_file(file_dir: str, actions: list[tuple]) -> list[str]:
+    """Creates tfevents file(s) using SummaryWriter based on a list of actions.
+    Writes files into a subdirectory within file_dir based on SummaryWriter's default naming.
+    Returns a list of paths to the generated event files.
+    """
+    # SummaryWriter will create a subdirectory inside file_dir
+    writer = SummaryWriter(log_dir=file_dir)
+    writer_log_dir = writer.log_dir  # Capture the actual subdirectory path
+
+    hparams_added = False
+
+    for action in actions:
+        action_type = action[0]
+        if action_type == "scalar":
+            _, tag, value, step = action
+            writer.add_scalar(tag, value, step)
+        elif action_type == "hparams":
+            _, hparams_dict, metrics_dict = action
+            writer.add_hparams(
+                hparams_dict, metrics_dict
+            )  # Pass metrics_dict as required
+            hparams_added = True
+        else:
+            raise ValueError(f"Unknown action type: {action_type}")
+
+    writer.flush()
+    if hparams_added:
+        import time
+
+        time.sleep(0.1)
+    writer.close()
+
+    # Find the event file created *inside* the writer's log_dir (recursively)
+    generated_files = list(
+        glob.glob(
+            os.path.join(writer_log_dir, "**", "events.out.tfevents.*"), recursive=True
+        )
+    )
+
+    if not generated_files:
+        raise FileNotFoundError(
+            f"Could not find generated tfevents file in {writer_log_dir}"
+        )
+
+    return generated_files
+
+
+@pytest.fixture
+def temp_dir():
+    """Pytest fixture for creating a temporary directory."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield tmpdir
+
+
+def test_copy_passthrough_no_hparams(temp_dir):
+    """Case 1: Test file copy when no HParams are present."""
+    base_output_name = "output_no_hparams"
+    actions = [
+        ("scalar", "loss", 0.5, 1),
+        ("scalar", "accuracy", 0.9, 1),
+        ("scalar", "loss", 0.4, 2),
+    ]
+    # create_tfevents_file returns list[str], no longer needs base_file_name
+    input_file_paths = create_tfevents_file(temp_dir, actions)
+    assert input_file_paths, "Event file creation failed or no files found"
+
+    for idx, input_path in enumerate(input_file_paths):
+        print(f"Processing file {idx + 1}/{len(input_file_paths)}: {input_path}")
+        output_name = f"{base_output_name}_{idx}.tfevents"
+        output_path = os.path.join(temp_dir, output_name)
+
+        # Check redaction need for this specific file (should be False)
+        needs_redact = script_under_test.check_needs_redaction(input_path)
+        assert not needs_redact, f"File {input_path} unexpectedly requires redaction."
+
+        # Run the main logic (mocking args)
+        test_args = ["script_name", input_path, output_path]
+        with patch.object(sys, "argv", test_args):
+            script_under_test.main()
+
+        # Verify output file exists and is identical to input
+        assert os.path.exists(output_path), f"Output file {output_path} not found."
+        assert filecmp.cmp(input_path, output_path, shallow=False), (
+            f"File {input_path} and {output_path} differ, but no redaction was expected."
+        )
+
+
+def test_copy_passthrough_hparams_no_paths(temp_dir):
+    """Case 2: Test file copy when HParams exist but contain no paths."""
+    base_output_name = "output_hparams_no_paths"
+    hparams = {
+        "learning_rate": 0.001,
+        "optimizer": "Adam",
+        "use_gpu": True,
+        "model_name": "my_model_v1",
+    }
+    metrics_dict = {"hparam/metric": 0}
+    actions = [
+        ("scalar", "loss", 0.4, 1),
+        ("hparams", hparams, metrics_dict),  # Add hparams
+        ("scalar", "loss", 0.3, 2),
+    ]
+    # create_tfevents_file returns list[str], no longer needs base_file_name
+    input_file_paths = create_tfevents_file(temp_dir, actions)
+    assert input_file_paths, "Event file creation failed or no files found"
+
+    for idx, input_path in enumerate(input_file_paths):
+        print(f"Processing file {idx + 1}/{len(input_file_paths)}: {input_path}")
+        output_name = f"{base_output_name}_{idx}.tfevents"
+        output_path = os.path.join(temp_dir, output_name)
+
+        # Check redaction need for this specific file (should be False)
+        needs_redact = script_under_test.check_needs_redaction(input_path)
+        assert not needs_redact, (
+            f"File {input_path} unexpectedly requires redaction (hparams exist but no paths)."
+        )
+
+        # Run the main logic
+        test_args = ["script_name", input_path, output_path]
+        with patch.object(sys, "argv", test_args):
+            script_under_test.main()
+
+        # Verify output file exists and is identical to input
+        assert os.path.exists(output_path), f"Output file {output_path} not found."
+        assert filecmp.cmp(input_path, output_path, shallow=False), (
+            f"File {input_path} and {output_path} differ, but no redaction was expected (hparams no paths)."
+        )
+
+
+def test_copy_and_redact_paths(temp_dir):
+    """Case 3: Test file copy with redaction of HParam paths."""
+    base_output_name = "output_hparams_redacted"
+    hparams = {
+        "config_file": "/absolute/path/to/config.yaml",  # Needs redaction
+        "dataset_path": "/data/my_dataset",  # Needs redaction
+        "learning_rate": 0.01,  # No redaction
+        "relative_path": "relative/model.ckpt",  # No redaction
+        "empty_path": "",  # No redaction
+    }
+    metrics_dict = {"hparam/accuracy": 0.95}
+    actions = [
+        ("scalar", "val_loss", 1.5, 1),
+        ("hparams", hparams, metrics_dict),  # Add hparams
+    ]
+    # create_tfevents_file returns list[str], no longer needs base_file_name
+    input_file_paths = create_tfevents_file(temp_dir, actions)
+    assert input_file_paths, "Event file creation failed or no files found"
+
+    # Keep track if we found at least one file that needed redaction
+    at_least_one_file_needed_redaction = False
+
+    for idx, input_path in enumerate(input_file_paths):
+        # breakpoint()
+        print(f"Processing file {idx + 1}/{len(input_file_paths)}: {input_path}")
+        output_name = f"{base_output_name}_{idx}.tfevents"
+        output_path = os.path.join(temp_dir, output_name)
+
+        # Check if this specific file needs redaction
+        needs_redact = script_under_test.check_needs_redaction(input_path)
+        if needs_redact:
+            at_least_one_file_needed_redaction = True
+            print(f"  File {input_path} requires redaction.")
+        else:
+            print(f"  File {input_path} does not require redaction.")
+
+        # Run the main logic
+        test_args = ["script_name", input_path, output_path]
+        with patch.object(sys, "argv", test_args):
+            script_under_test.main()
+
+        # Verify output file exists
+        assert os.path.exists(output_path), f"Output file {output_path} not found."
+
+        # Perform verification based on whether this specific file needed redaction
+        if needs_redact:
+            assert not filecmp.cmp(input_path, output_path, shallow=False), (
+                f"File {input_path} and {output_path} are identical, but redaction was expected."
+            )
+
+            # --- Verification using EventAccumulator via helper ---
+            # Check if output_path is a directory before trying to load it
+            if os.path.isdir(output_path):
+                print(
+                    f"Error: Output path {output_path} is a directory before HParams extraction! Contents: {os.listdir(output_path)}",
+                    file=sys.stderr,
+                )
+                pytest.fail(f"Output path {output_path} is a directory, not a file.")
+            else:
+                print(
+                    f"Debug: Output path {output_path} is a file, proceeding with HParams extraction."
+                )
+
+            extracted_redacted_hparams = extract_hparams_from_event_file(output_path)
+            # breakpoint()
+            assert extracted_redacted_hparams is not None, (
+                f"HParams could not be extracted from the redacted output file: {output_path}"
+            )
+
+            # Check redacted values
+            assert "config_file" in extracted_redacted_hparams
+            assert (
+                extracted_redacted_hparams["config_file"]
+                == script_under_test.REDACTED_VALUE
+            )
+            assert "dataset_path" in extracted_redacted_hparams
+            assert (
+                extracted_redacted_hparams["dataset_path"]
+                == script_under_test.REDACTED_VALUE
+            )
+            # Check non-redacted values
+            assert "learning_rate" in extracted_redacted_hparams
+            assert extracted_redacted_hparams["learning_rate"] == pytest.approx(0.01)
+            assert "relative_path" in extracted_redacted_hparams
+            assert extracted_redacted_hparams["relative_path"] == "relative/model.ckpt"
+            assert "empty_path" in extracted_redacted_hparams
+            assert extracted_redacted_hparams["empty_path"] == ""
+        else:
+            # If this specific file (e.g., potentially one with only scalars if SW split them)
+            # didn't need redaction, it should be identical.
+            assert filecmp.cmp(input_path, output_path, shallow=False), (
+                f"File {input_path} and {output_path} differ, but no redaction was needed for this specific file."
+            )
+
+    # After processing all files, assert that at least one file actually contained
+    # the HParams needing redaction, otherwise the test case setup is faulty.
+    assert at_least_one_file_needed_redaction, (
+        "Test setup error: No input file requiring redaction was found among generated files."
+    )
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tools/copy_tbevent_maybe_redact.py b/tools/copy_tbevent_maybe_redact.py
new file mode 100644
index 0000000000..6815db2559
--- /dev/null
+++ b/tools/copy_tbevent_maybe_redact.py
@@ -0,0 +1,407 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import glob
+import os
+import shutil
+import sys
+import tempfile
+from typing import Dict, Set  # Import necessary types
+
+from tensorboard import errors as tb_errors
+from tensorboard.backend.event_processing import event_accumulator
+from tensorboard.backend.event_processing.event_file_loader import EventFileLoader
+from tensorboard.compat.proto import event_pb2, summary_pb2
+from tensorboard.plugins.hparams import (
+    api_pb2,  # Import the api_pb2 module
+    plugin_data_pb2,
+)
+from tensorboard.summary.writer.event_file_writer import EventFileWriter
+
+# By default TB tries to be smart about what to load in memory to avoid OOM
+# Since we expect every step to be there when we do our comparisons, we explicitly
+# set the size guidance to 0 so that we load everything. It's okay given our tests
+# are small/short.
+SIZE_GUIDANCE_META: Dict[str, int | bool] = {
+    event_accumulator.SCALARS: 0,
+    event_accumulator.IMAGES: 0,
+    event_accumulator.AUDIO: 0,
+    event_accumulator.HISTOGRAMS: 0,
+    event_accumulator.TENSORS: 0,
+    event_accumulator.GRAPH: False,
+    event_accumulator.RUN_METADATA: False,
+}
+
+HPARAMS_TAG: str = "_hparams_/session_start_info"
+HPARAMS_PLUGIN_NAME: str = "hparams"
+REDACTED_VALUE: str = "[REDACTED]"
+
+
+def check_needs_redaction(input_path: str) -> bool:
+    """Checks if the TensorBoard event file contains HParams with string values starting with '/'.
+
+    Returns True if redaction is needed, False otherwise.
+    Uses EventAccumulator with minimal size guidance for speed.
+    """
+    try:
+        print(f"Checking for HParams needing redaction in: {input_path}")
+        ea = event_accumulator.EventAccumulator(
+            input_path, size_guidance=SIZE_GUIDANCE_META
+        )
+        ea.Reload()
+
+        if HPARAMS_TAG in ea.summary_metadata:
+            metadata = ea.summary_metadata[HPARAMS_TAG]
+            if (
+                metadata.plugin_data
+                and metadata.plugin_data.plugin_name == HPARAMS_PLUGIN_NAME
+                and metadata.plugin_data.content
+            ):
+                try:
+                    plugin_data: plugin_data_pb2.HParamsPluginData = (
+                        plugin_data_pb2.HParamsPluginData.FromString(
+                            metadata.plugin_data.content
+                        )
+                    )
+                    if plugin_data.HasField("session_start_info"):
+                        hparams_map: Dict[str, api_pb2.HParamValue] = (
+                            plugin_data.session_start_info.hparams
+                        )
+                        # Check VALUES now, not keys
+                        for key, hparam_value in hparams_map.items():
+                            # Check if it's a string value and starts with '/'
+                            if hparam_value.HasField(
+                                "string_value"
+                            ) and hparam_value.string_value.startswith("/"):
+                                print(
+                                    f"  Found HParam needing redaction: key='{key}', value='{hparam_value.string_value}'"
+                                )
+                                return True
+                        print(
+                            "  Found HParams, but no string values start with '/'. No redaction needed."
+                        )
+                        return False
+                    else:
+                        print(
+                            "  HParams tag metadata found, but no session_start_info field. Assuming no redaction needed."
+                        )
+                        return False
+                except Exception as parse_err:
+                    print(
+                        f"Warning: Error parsing HParams plugin data: {parse_err}. Assuming no redaction needed.",
+                        file=sys.stderr,
+                    )
+                    return False
+            else:
+                print(
+                    "  Found HParams tag metadata, but plugin data is missing, invalid, or empty. Assuming no redaction needed."
+                )
+                return False
+        else:
+            print(
+                f"  No HParams tag ('{HPARAMS_TAG}') found in summary metadata. No redaction needed."
+            )
+            return False
+    except (tb_errors.CorruptEventFileError, tb_errors.DataLossError, Exception) as e:
+        print(
+            f"Warning: Error reading or processing event file for redaction check: {e}. Assuming no redaction needed.",
+            file=sys.stderr,
+        )
+        return False
+
+
+def redact_hparams_and_write(input_path: str, output_path: str) -> None:
+    """Reads events from input_path, redacts hparam string values that start with '/'.
+
+    Writes all events to output_path using TensorBoard's native utilities.
+    """
+    print(f"Redacting HParams from '{input_path}' and writing to '{output_path}'")
+    redacted_count: int = 0
+    event_count: int = 0
+    hparam_event_found: bool = False
+    hparam_event_modified: bool = False
+
+    parent_dir = os.path.dirname(output_path)
+    if not parent_dir:
+        parent_dir = "."
+    os.makedirs(parent_dir, exist_ok=True)
+
+    writer = None  # Initialize for the finally block
+    with tempfile.TemporaryDirectory(
+        dir=parent_dir, prefix=".writer_temp_"
+    ) as temp_writer_dir:
+        try:
+            writer = EventFileWriter(temp_writer_dir)
+
+            loader = EventFileLoader(input_path)
+            for event in loader.Load():
+                event_count += 1
+                new_event = event_pb2.Event()
+                new_event.CopyFrom(event)
+
+                is_hparam_event: bool = False
+                summary_value_index: int = -1
+                if event.HasField("summary"):
+                    for i, value in enumerate(event.summary.value):
+                        if (
+                            value.tag == HPARAMS_TAG
+                            and value.metadata
+                            and value.metadata.plugin_data
+                            and value.metadata.plugin_data.plugin_name
+                            == HPARAMS_PLUGIN_NAME
+                            and value.metadata.plugin_data.content
+                        ):
+                            is_hparam_event = True
+                            summary_value_index = i
+                            hparam_event_found = True
+                            break
+
+                if is_hparam_event:
+                    print(f"  Processing HParams event (Event #{event_count})...")
+                    original_value: summary_pb2.Summary.Value = event.summary.value[
+                        summary_value_index
+                    ]
+                    original_metadata: summary_pb2.SummaryMetadata = (
+                        original_value.metadata
+                    )
+                    original_plugin_content: bytes = (
+                        original_metadata.plugin_data.content
+                    )
+
+                    try:  # Keep this try-except for parsing individual HParam events
+                        plugin_data_proto: plugin_data_pb2.HParamsPluginData = (
+                            plugin_data_pb2.HParamsPluginData.FromString(
+                                original_plugin_content
+                            )
+                        )
+                        if plugin_data_proto.HasField("session_start_info"):
+                            keys_to_redact: Set[str] = set()
+                            hparams_map: Dict[str, api_pb2.HParamValue] = (
+                                plugin_data_proto.session_start_info.hparams
+                            )
+                            for key, hparam_value_obj in hparams_map.items():
+                                if hparam_value_obj.HasField(
+                                    "string_value"
+                                ) and hparam_value_obj.string_value.startswith("/"):
+                                    keys_to_redact.add(key)
+
+                            if keys_to_redact:
+                                hparam_event_modified = True
+                                print(
+                                    f"    Redacting values for keys: {list(keys_to_redact)}"
+                                )
+                                new_plugin_data_obj: plugin_data_pb2.HParamsPluginData = plugin_data_pb2.HParamsPluginData()
+                                new_plugin_data_obj.CopyFrom(plugin_data_proto)
+
+                                for key_to_modify in keys_to_redact:
+                                    if (
+                                        key_to_modify
+                                        in new_plugin_data_obj.session_start_info.hparams
+                                    ):
+                                        hparam_value_entry = new_plugin_data_obj.session_start_info.hparams[
+                                            key_to_modify
+                                        ]
+                                        hparam_value_entry.string_value = REDACTED_VALUE
+                                        redacted_count += 1
+                                    else:
+                                        print(
+                                            f"Warning: Key '{key_to_modify}' for redaction not found in copied hparams map.",
+                                            file=sys.stderr,
+                                        )
+
+                                new_plugin_data_content: bytes = (
+                                    new_plugin_data_obj.SerializeToString()
+                                )
+                                new_summary_metadata = summary_pb2.SummaryMetadata()
+                                new_summary_metadata.CopyFrom(original_metadata)
+                                new_summary_metadata.plugin_data.content = (
+                                    new_plugin_data_content
+                                )
+
+                                new_summary_value = summary_pb2.Summary.Value()
+                                new_summary_value.CopyFrom(original_value)
+                                new_summary_value.metadata.CopyFrom(
+                                    new_summary_metadata
+                                )
+                                new_event.summary.value[summary_value_index].CopyFrom(
+                                    new_summary_value
+                                )
+                            else:
+                                print(
+                                    "    HParams event found, but no values required redaction."
+                                )
+                        else:
+                            print(
+                                "    HParams event tag found, but no session_start_info field."
+                            )
+                    except (
+                        Exception
+                    ) as parse_err:  # This handles parsing of a single HParam event
+                        print(
+                            f"Warning: Error parsing HParams plugin data for event #{event_count}: {parse_err}. Skipping modification for this event.",
+                            file=sys.stderr,
+                        )
+                        new_event.CopyFrom(event)
+
+                writer.add_event(new_event)
+
+            writer.close()  # Close writer after loop
+            writer = None  # Indicate it's closed
+
+            written_files = list(
+                glob.glob(os.path.join(temp_writer_dir, "events.out.tfevents.*"))
+            )
+            if not written_files:
+                written_files = list(glob.glob(os.path.join(temp_writer_dir, "*")))
+                written_files = [
+                    f
+                    for f in written_files
+                    if os.path.isfile(f) and ".tfevents" in f.lower()
+                ]
+
+            if len(written_files) == 1:
+                writer_internal_file_path = written_files[0]
+                print(f"EventFileWriter created: {writer_internal_file_path}")
+                if os.path.exists(output_path):
+                    if os.path.isdir(output_path):
+                        print(
+                            f"Removing existing directory at output path: {output_path}"
+                        )
+                        shutil.rmtree(output_path)
+                    else:
+                        print(f"Removing existing file at output path: {output_path}")
+                        os.remove(output_path)
+                print(f"Moving '{writer_internal_file_path}' to '{output_path}'")
+                shutil.move(writer_internal_file_path, output_path)
+            elif len(written_files) > 1:
+                raise IOError(
+                    f"Ambiguous output from EventFileWriter in {temp_writer_dir}: {written_files}"
+                )
+            else:
+                raise IOError(
+                    f"EventFileWriter failed to create an event file in {temp_writer_dir}"
+                )
+
+        except (tb_errors.CorruptEventFileError, tb_errors.DataLossError) as e:
+            print(
+                f"Error: Data integrity issue with input file '{input_path}'. File may be truncated or corrupted.",
+                file=sys.stderr,
+            )
+            print(f"Specific error: {e}", file=sys.stderr)
+            if os.path.exists(output_path):
+                try:
+                    shutil.rmtree(output_path) if os.path.isdir(
+                        output_path
+                    ) else os.remove(output_path)
+                except OSError:
+                    pass
+            sys.exit(1)
+        # No general except Exception here, let it propagate to main() if it's not tb_errors
+        # OR, if we want to catch all from this block and clean up output_path, add it.
+        # For now, assuming other errors are unexpected and should fail loudly.
+        finally:
+            if (
+                writer is not None
+            ):  # If writer was initialized but loop/move failed before explicit close
+                print(
+                    "Ensuring writer is closed in finally block due to an earlier error.",
+                    file=sys.stderr,
+                )
+                writer.close()
+
+    print(f"Finished processing {event_count} events.")
+    if hparam_event_found:
+        if hparam_event_modified:
+            print(f"Redacted {redacted_count} HParam values.")
+        else:
+            print("HParams event found, but no values required redaction.")
+    else:
+        print(f"No HParams event ('{HPARAMS_TAG}') was found during file iteration.")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Copy a TensorBoard event file, redacting HParam string values that start with '/'. "
+        "If no HParams need redaction, performs a simple file copy.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "input_path",
+        type=str,
+        help="Path to the input TensorBoard event file (tfevents*).",
+    )
+    parser.add_argument(
+        "output_path",
+        type=str,
+        help="Path to write the output (potentially redacted) event file.",
+    )
+    args: argparse.Namespace = parser.parse_args()
+
+    input_f: str = args.input_path
+    output_f: str = args.output_path
+
+    if not os.path.exists(input_f):
+        print(f"Error: Input file not found: '{input_f}'", file=sys.stderr)
+        sys.exit(1)
+    if not os.path.isfile(input_f):
+        print(
+            f"Error: Input path must be a file, not a directory: '{input_f}'",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    if os.path.abspath(input_f) == os.path.abspath(output_f):
+        print(
+            f"Error: Input and output paths cannot be the same: '{input_f}'",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    needs_redact: bool = check_needs_redaction(input_f)
+
+    if needs_redact:
+        try:
+            redact_hparams_and_write(input_f, output_f)
+            print(f"Successfully created redacted file: '{output_f}'")
+        except Exception as e:  # Generalize exception for now
+            print(f"Failed to create redacted file due to error: {e}", file=sys.stderr)
+            # Attempt to clean up potentially bad output file/dir
+            if os.path.exists(output_f):
+                try:
+                    if os.path.isdir(output_f):
+                        shutil.rmtree(output_f)  # Remove dir if it became one
+                        print(f"Cleaned up directory: '{output_f}'")
+                    else:
+                        os.remove(output_f)  # Remove file
+                        print(f"Cleaned up file: '{output_f}'")
+                except OSError as rm_err:
+                    print(
+                        f"Error during cleanup of '{output_f}': {rm_err}",
+                        file=sys.stderr,
+                    )
+            sys.exit(1)
+    else:
+        print(f"No redaction needed. Copying '{input_f}' to '{output_f}'...")
+        try:
+            output_dir: str = os.path.dirname(output_f)
+            if output_dir:
+                os.makedirs(output_dir, exist_ok=True)
+            shutil.copy2(input_f, output_f)
+            print(f"Successfully copied file: '{output_f}'")
+        except Exception as e:
+            print(f"Error during file copy: {e}", file=sys.stderr)
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/package_release_runs.sh b/tools/package_release_runs.sh
index 357c9ad618..e4665f6b99 100755
--- a/tools/package_release_runs.sh
+++ b/tools/package_release_runs.sh
@@ -15,10 +15,10 @@ shopt -s globstar
 
 OUTPUT_TAR="release_runs-$(git rev-parse --short HEAD).tar.gz"
 
-TB_EVENTS=$(ls code_snapshots/*/tests/test_suites/**/logs/*/tensorboard/events* || true)
+TB_EVENTS=$(ls code_snapshots/*/tests/test_suites/**/logs/*/tensorboard/**/events* || true)
 
 # Check if the glob expanded to any files
-if [ -z "$TB_EVENTS" ]; then
+if [[ -z "$TB_EVENTS" ]]; then
     echo "Error: No tensorboard event files found matching the pattern."
     exit 1
 elif [[ -f $OUTPUT_TAR ]]; then
@@ -35,17 +35,22 @@ trap "echo 'Cleaning up temporary directory $TMP_DIR'; rm -rf $TMP_DIR" EXIT
 # Loop over all the recipe runs and package them into a tarball
 for tbevent in $TB_EVENTS; do
     exp_name=$(basename -- $(cut -d/ -f2 <<<$tbevent) -logs)
-    # Obfuscate the hostname
+    # Redact the hostname
     # events.out.tfevents.1744822578.<host-name>.780899.0
-    obfuscated_event_path=$(basename $tbevent | awk -F. '{print $1"."$2"."$3"."$4".HOSTNAME."$(NF-1)"."$NF}')
+    redacted_event_path=$(basename $tbevent | awk -F. '{print $1"."$2"."$3"."$4".HOSTNAME."$(NF-1)"."$NF}')
     
     # Create subdirectory for experiment if it doesn't exist
     mkdir -p "$TMP_DIR/$exp_name"
+
+    if [[ -f "$TMP_DIR/$exp_name/$redacted_event_path" ]]; then
+        echo "Error: $redacted_event_path already exists. This is unusual since tensorboard usually suffixes event files with a unique number. Please investigate."
+        exit 1
+    fi
     
-    # Copy the event file with obfuscated name to the experiment subdirectory
-    cp "$tbevent" "$TMP_DIR/$exp_name/$obfuscated_event_path"
+    # Copy the event file with redacted hostname to the experiment subdirectory
+    uv run --with tensorboard --no-project tools/copy_tbevent_maybe_redact.py "$tbevent" "$TMP_DIR/$exp_name/$redacted_event_path"
     
-    echo "[$exp_name] Copied $tbevent to $TMP_DIR/$exp_name/$obfuscated_event_path"
+    echo "[$exp_name] Copied $tbevent to $TMP_DIR/$exp_name/$redacted_event_path"
 done
 
 # Create a tarball of all the processed event files

From 28aae4a6afd6ea04c7e5276b104fdacece6de03c Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Mon, 14 Jul 2025 21:32:43 -0700
Subject: [PATCH 34/59] test: add a unit test that verifies that the correct
 keys are present in configs (#587)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/algorithms/grpo.py           |   1 -
 nemo_rl/data/__init__.py             |  19 ++-
 nemo_rl/models/policy/__init__.py    |  36 +++--
 nemo_rl/utils/checkpoint.py          |   4 +-
 nemo_rl/utils/logger.py              |   7 +-
 tests/unit/test_config_validation.py | 225 +++++++++++++++++++++++++++
 6 files changed, 261 insertions(+), 31 deletions(-)
 create mode 100644 tests/unit/test_config_validation.py

diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
index 4bbe0cf52b..d33e503636 100644
--- a/nemo_rl/algorithms/grpo.py
+++ b/nemo_rl/algorithms/grpo.py
@@ -83,7 +83,6 @@ class GRPOConfig(TypedDict):
     val_batch_size: int
     val_at_start: bool
     max_val_samples: int
-    checkpoint_dir: str
 
 
 class GRPOSaveState(TypedDict):
diff --git a/nemo_rl/data/__init__.py b/nemo_rl/data/__init__.py
index 31b40fa995..9a9ce4b23a 100644
--- a/nemo_rl/data/__init__.py
+++ b/nemo_rl/data/__init__.py
@@ -12,19 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional, TypedDict
+from typing import NotRequired, TypedDict
 
 
 class DataConfig(TypedDict):
     max_input_seq_length: int
-    prompt_file: str
-    system_prompt_file: Optional[str]
+    prompt_file: NotRequired[str]
+    system_prompt_file: NotRequired[str]
     dataset_name: str
-    val_dataset_name: Optional[str]
-    add_bos: Optional[bool]
-    add_eos: Optional[bool]
-    input_key: Optional[str]
-    output_key: Optional[str]
+    val_dataset_name: NotRequired[str]
+    add_bos: NotRequired[bool]
+    add_eos: NotRequired[bool]
+    input_key: NotRequired[str]
+    output_key: NotRequired[str]
+    add_generation_prompt: NotRequired[bool]
+    add_system_prompt: NotRequired[bool]
+    split: NotRequired[str]
 
 
 class MathDataConfig(DataConfig):
diff --git a/nemo_rl/models/policy/__init__.py b/nemo_rl/models/policy/__init__.py
index b04ff2d159..384a8bf5a5 100644
--- a/nemo_rl/models/policy/__init__.py
+++ b/nemo_rl/models/policy/__init__.py
@@ -12,19 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, NotRequired, Optional, TypedDict, Union
+from typing import Any, NotRequired, TypedDict, Union
 
 from nemo_rl.models.generation.interfaces import GenerationConfig
 
 
 class DTensorConfig(TypedDict):
     enabled: bool
-    cpu_offload: bool
-    sequence_parallel: bool
-    activation_checkpointing: bool
-    tensor_parallel_size: int
-    context_parallel_size: int
-    custom_parallel_plan: str
+    cpu_offload: NotRequired[bool]
+    sequence_parallel: NotRequired[bool]
+    activation_checkpointing: NotRequired[bool]
+    tensor_parallel_size: NotRequired[int]
+    context_parallel_size: NotRequired[int]
+    custom_parallel_plan: NotRequired[str]
 
 
 class SequencePackingConfig(TypedDict):
@@ -93,7 +93,7 @@ class MegatronConfig(TypedDict):
 
 class TokenizerConfig(TypedDict):
     name: str
-    chat_template: str
+    chat_template: NotRequired[str]
 
 
 class PytorchOptimizerConfig(TypedDict):
@@ -116,9 +116,11 @@ class DynamicBatchingConfig(TypedDict):
     # amount of tokens is approximately close to 'train_mb_tokens' and 'logprob_mb_tokens' for the
     # training and logprob stages respectively.
     enabled: bool
-    train_mb_tokens: int
-    logprob_mb_tokens: int
-    sequence_length_round: int
+
+    ## required if enabled is true
+    train_mb_tokens: NotRequired[int]
+    logprob_mb_tokens: NotRequired[int]
+    sequence_length_round: NotRequired[int]
 
 
 class PolicyConfig(TypedDict):
@@ -126,22 +128,22 @@ class PolicyConfig(TypedDict):
     tokenizer: TokenizerConfig
     train_global_batch_size: int
     train_micro_batch_size: int
-    learning_rate: float
-    logprob_batch_size: int
-    generation: Optional[GenerationConfig]
+    logprob_batch_size: NotRequired[int]
+    generation: NotRequired[GenerationConfig]
     generation_batch_size: NotRequired[
         int
     ]  # used in static batched (framework) generation
     precision: str
     dtensor_cfg: DTensorConfig
-    megatron_cfg: MegatronConfig
+    megatron_cfg: NotRequired[MegatronConfig]
     dynamic_batching: DynamicBatchingConfig
-    sequence_packing: SequencePackingConfig
+    sequence_packing: NotRequired[SequencePackingConfig]
     make_sequence_length_divisible_by: int
     max_total_sequence_length: int
-    max_grad_norm: Optional[Union[float, int]]
+    max_grad_norm: NotRequired[Union[float, int]]
     fsdp_offload_enabled: bool
     activation_checkpointing_enabled: bool
+    refit_buffer_size_gb: NotRequired[float]
     optimizer: NotRequired[PytorchOptimizerConfig] = None
     scheduler: NotRequired[list[SinglePytorchSchedulerConfig] | SchedulerMilestones] = (
         None
diff --git a/nemo_rl/utils/checkpoint.py b/nemo_rl/utils/checkpoint.py
index 1be5948ba3..ebc276bba6 100644
--- a/nemo_rl/utils/checkpoint.py
+++ b/nemo_rl/utils/checkpoint.py
@@ -23,7 +23,7 @@
 import shutil
 import warnings
 from pathlib import Path
-from typing import Any, Optional, TypedDict, Union
+from typing import Any, NotRequired, Optional, TypedDict, Union
 
 import numpy as np
 import torch
@@ -48,7 +48,7 @@ class CheckpointingConfig(TypedDict):
     metric_name: str
     higher_is_better: bool
     save_period: int
-    keep_top_k: Optional[int]
+    keep_top_k: NotRequired[int]
 
 
 class CheckpointManager:
diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py
index 0a83204ee6..c039ecd939 100644
--- a/nemo_rl/utils/logger.py
+++ b/nemo_rl/utils/logger.py
@@ -36,6 +36,7 @@
 from rich.logging import RichHandler
 from rich.panel import Panel
 from torch.utils.tensorboard import SummaryWriter
+from typing_extensions import NotRequired
 
 from nemo_rl.data.interfaces import LLMMessageLogType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
@@ -45,12 +46,12 @@
 
 
 class WandbConfig(TypedDict):
-    project: str
-    name: str
+    project: NotRequired[str]
+    name: NotRequired[str]
 
 
 class TensorboardConfig(TypedDict):
-    log_dir: str
+    log_dir: NotRequired[str]
 
 
 class GPUMonitoringConfig(TypedDict):
diff --git a/tests/unit/test_config_validation.py b/tests/unit/test_config_validation.py
new file mode 100644
index 0000000000..3056ae270d
--- /dev/null
+++ b/tests/unit/test_config_validation.py
@@ -0,0 +1,225 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import os
+import warnings
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Type, Union, get_type_hints
+
+import pytest
+from omegaconf import OmegaConf
+from typing_extensions import NotRequired
+
+from nemo_rl.algorithms.dpo import DPOConfig
+from nemo_rl.algorithms.grpo import GRPOConfig, GRPOLoggerConfig
+from nemo_rl.algorithms.sft import SFTConfig
+from nemo_rl.data import DataConfig
+from nemo_rl.distributed.virtual_cluster import ClusterConfig
+from nemo_rl.models.policy import PolicyConfig
+from nemo_rl.utils.checkpoint import CheckpointingConfig
+from nemo_rl.utils.config import load_config_with_inheritance
+from nemo_rl.utils.logger import LoggerConfig
+
+
+def get_keys_from_typeddict(typed_dict_class: dict) -> Set[str]:
+    """Extract required keys from a TypedDict class, excluding NotRequired fields."""
+    type_hints = get_type_hints(typed_dict_class, include_extras=True)
+    required_keys = set()
+    optional_keys = set()
+
+    for key, annotation in type_hints.items():
+        # Check if the field is marked as NotRequired
+        if hasattr(annotation, "__origin__") and (annotation.__origin__ is NotRequired):
+            optional_keys.add(key)
+
+        ## check for Optional fields
+        elif (
+            hasattr(annotation, "__origin__")
+            and annotation.__origin__ is Union
+            and type(None) in annotation.__args__
+        ):
+            raise ValueError(
+                f"Please use the NotRequired annotation instead of Optional for key {key}"
+            )
+        else:
+            required_keys.add(key)
+
+    return required_keys, optional_keys
+
+
+def validate_nested_config_section(
+    config_dict: Dict[str, Any], config_class: Type, section_path: str
+) -> List[str]:
+    """Recursively validate a config section and its nested TypedDict fields."""
+    errors = []
+    type_hints = get_type_hints(config_class, include_extras=True)
+
+    for key, annotation in type_hints.items():
+        current_path = f"{section_path}.{key}" if section_path else key
+
+        # Check if the field is marked as NotRequired
+        is_optional = hasattr(annotation, "__origin__") and (
+            annotation.__origin__ is NotRequired
+        )
+
+        # If the key is not in the config and it's required, add an error
+        if key not in config_dict:
+            if not is_optional:
+                errors.append(f"Missing required key in {section_path}: {key}")
+            continue
+
+        # Get the value from the config
+        value = config_dict[key]
+
+        # If the annotation is a TypedDict (nested config), validate it recursively
+        if hasattr(annotation, "__annotations__") and isinstance(value, dict):
+            # This is a nested TypedDict, validate it recursively
+            nested_errors = validate_nested_config_section(
+                value, annotation, current_path
+            )
+            errors.extend(nested_errors)
+        elif hasattr(annotation, "__origin__") and annotation.__origin__ is Optional:
+            # Handle Optional[TypedDict] case
+            if (
+                value is not None
+                and hasattr(annotation.__args__[0], "__annotations__")
+                and isinstance(value, dict)
+            ):
+                nested_errors = validate_nested_config_section(
+                    value, annotation.__args__[0], current_path
+                )
+                errors.extend(nested_errors)
+
+    # Check for extra keys (keys in config that are not in the TypedDict)
+    required_keys, optional_keys = get_keys_from_typeddict(config_class)
+    all_valid_keys = required_keys | optional_keys
+
+    for key in config_dict.keys():
+        if key not in all_valid_keys:
+            errors.append(f"Extra key in {section_path}: {key}")
+
+    return errors
+
+
+def validate_config_section(
+    config_dict: Dict[str, Any], config_class: dict, section_name: str
+) -> List[str]:
+    """Validate a specific section of a config against its TypedDict class."""
+    errors = []
+    required_keys, optional_keys = get_keys_from_typeddict(config_class)
+
+    if section_name not in config_dict:
+        errors.append(f"Missing required section: {section_name}")
+        return errors
+
+    section_config = config_dict[section_name]
+    if not isinstance(section_config, dict):
+        errors.append(f"Section {section_name} must be a dictionary")
+        return errors
+
+    # Use the new recursive validation function
+    nested_errors = validate_nested_config_section(
+        section_config, config_class, section_name
+    )
+    errors.extend(nested_errors)
+
+    return errors
+
+
+def test_all_config_files_have_required_keys():
+    """Test that all config files in examples/configs have all required keys for their respective sections."""
+    if not OmegaConf.has_resolver("mul"):
+        OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
+
+    absolute_path = os.path.abspath(__file__)
+    configs_dir = Path(
+        os.path.join(os.path.dirname(absolute_path), "../../examples/configs")
+    )
+
+    # Get all YAML config files
+    config_files = glob.glob(str(configs_dir / "**/*.yaml"), recursive=True)
+
+    assert len(config_files) > 0, "No config files found"
+
+    all_errors = []
+
+    for config_file in config_files:
+        print(f"\nValidating config file: {config_file}")
+
+        try:
+            # Load the config file with inheritance
+            config = load_config_with_inheritance(config_file)
+            config_dict = OmegaConf.to_container(config, resolve=True)
+
+            if config_dict is None:
+                all_errors.append(f"Config file {config_file} is empty or invalid")
+                continue
+
+            # Validate each section against its corresponding config class
+            section_validations = [
+                ("policy", PolicyConfig),
+                ("data", DataConfig),
+                ("cluster", ClusterConfig),
+                ("checkpointing", CheckpointingConfig),
+            ]
+
+            # Add algorithm-specific validation
+            if "dpo" in config_dict:
+                section_validations.extend(
+                    [("dpo", DPOConfig), ("logger", LoggerConfig)]
+                )
+            elif "sft" in config_dict:
+                section_validations.extend(
+                    [("sft", SFTConfig), ("logger", LoggerConfig)]
+                )
+            elif "grpo" in config_dict:
+                section_validations.extend(
+                    [("grpo", GRPOConfig), ("logger", GRPOLoggerConfig)]
+                )
+                # GRPO also has a loss_fn section
+                if "loss_fn" in config_dict:
+                    from nemo_rl.algorithms.loss_functions import ClippedPGLossConfig
+
+                    section_validations.append(("loss_fn", ClippedPGLossConfig))
+            else:
+                warnings.warn(
+                    f"Could not determine algorithm type for config {config_file}. Continuing..."
+                )
+                continue
+
+            # Validate each section
+            for section_name, config_class in section_validations:
+                errors = validate_config_section(
+                    config_dict, config_class, section_name
+                )
+                for error in errors:
+                    all_errors.append(f"{config_file}: {error}")
+
+            # Additional validation for GRPO configs that have an 'env' section
+            if "grpo" in config_dict and "env" in config_dict:
+                if not isinstance(config_dict["env"], dict):
+                    all_errors.append(
+                        f"{config_file}: env section must be a dictionary"
+                    )
+
+        except Exception as e:
+            all_errors.append(f"Error processing {config_file}: {str(e)}")
+
+    # If there are any errors, fail the test with detailed error messages
+    if all_errors:
+        error_message = "\n".join(all_errors)
+        pytest.fail(f"Config validation failed:\n{error_message}")
+
+    print(f"\n✅ Successfully validated {len(config_files)} config files")

From 908333044ef7f153b4fa8150d79e989aa81e2010 Mon Sep 17 00:00:00 2001
From: Xuehan <xxman@google.com>
Date: Wed, 16 Jul 2025 04:27:38 +0000
Subject: [PATCH 35/59] add adk

fix renderer issue?

update readme

Signed-off-by: Jialei Chen <jialeic@google.com>
---
 README_Jialei_runs.md                         |  24 +++-
 examples/configs/grpo_adk_llama8b.yaml        |  41 ++++++
 examples/configs/grpo_adk_llama8b_debug.yaml  |  43 +++++++
 examples/run_grpo_unique_numbers.py           |  12 +-
 .../environments/simulated_user/adk_utils.py  | 119 ++++++++++++++++++
 nemo_rl/environments/simulated_user/prompt.py |  30 +++++
 .../simulated_user/unique_numbers.py          |  72 +++++++++--
 nemo_rl/experience/rollouts.py                |  15 ++-
 pyproject.toml                                |   3 +-
 9 files changed, 329 insertions(+), 30 deletions(-)
 create mode 100644 examples/configs/grpo_adk_llama8b.yaml
 create mode 100644 examples/configs/grpo_adk_llama8b_debug.yaml
 create mode 100644 nemo_rl/environments/simulated_user/adk_utils.py
 create mode 100644 nemo_rl/environments/simulated_user/prompt.py

diff --git a/README_Jialei_runs.md b/README_Jialei_runs.md
index 5ec6b4ef34..2452fb8583 100644
--- a/README_Jialei_runs.md
+++ b/README_Jialei_runs.md
@@ -21,4 +21,26 @@ Seems there is some chat render/format issue with gemma-1b, leading to low quali
 
 ![alt text](image.png)
 
-see full wandb metrics [here](https://wandb.ai/jialeichen777-google/grpo-simulated-retrieval/reports/Dummy-retrival-task-for-llama-8b-and-gemma-1b--VmlldzoxMzQ0OTgyMw)
\ No newline at end of file
+see full wandb metrics [here](https://wandb.ai/jialeichen777-google/grpo-simulated-retrieval/reports/Dummy-retrival-task-for-llama-8b-and-gemma-1b--VmlldzoxMzQ0OTgyMw)
+
+- 07/15: enabled ADK
+
+example ADK script is here: nemo_rl/environments/simulated_user/adk_utils.py
+
+Intergrated into training:
+```
+export GOOGLE_GENAI_USE_VERTEXAI=1 && export GOOGLE_API_KEY="xxxxxxxxxxxxxx" && export GOOGLE_CLOUD_PROJECT="xxxxxxx" && export GOOGLE_CLOUD_LOCATION="xxxxxx" 
+uv run python examples/run_grpo_unique_numbers.py --config examples/configs/grpo_adk_llama8b.yaml
+```
+
+- 07/16: there are 3 issues found:
+    1) Gemini endpoint may fail with 500 error sometimes, maybe related to rate limit. Need to understand more.
+    2) training is very slow, took ~ 20min for 64x32 samples completing rollout for 1 step on a 8GPU H100 node.
+    3) chat format has issue. <- mostly fixed by chat_template and some hacks but not elegent.
+
+    Also added a debug script for samller run
+    ```
+    uv run python examples/run_grpo_unique_numbers.py --config examples/configs/grpo_adk_llama8b_debug.yaml > llama3.log 2>&1
+    ```
+
+    Also added logs for ADK agent convo history printout. Seems to work as expected.
\ No newline at end of file
diff --git a/examples/configs/grpo_adk_llama8b.yaml b/examples/configs/grpo_adk_llama8b.yaml
new file mode 100644
index 0000000000..13252c3d52
--- /dev/null
+++ b/examples/configs/grpo_adk_llama8b.yaml
@@ -0,0 +1,41 @@
+# GRPO configuration for unique numbers environment
+defaults: "grpo_math_8B.yaml"
+
+grpo:
+  num_prompts_per_step: 32
+  num_generations_per_prompt: 16
+  max_rollout_turns: 20
+  max_num_steps: 100
+  val_at_start: false
+
+data:
+  add_system_prompt: false
+
+checkpointing:
+  enabled: false
+  checkpoint_dir: "results/grpo-adk"
+  metric_name: "val_reward"
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+
+env:
+  unique_numbers:
+    cfg:
+      max_turns: 15
+      min_length: 5
+      max_length: 10
+      max_integer: 15
+
+logger:
+  wandb_enabled: True 
+  wandb:
+    project: "grpo-simulated-adk"
+    name: "llama-8b-__NOW__"
+
+policy:
+  tokenizer:
+    chat_template: "{% for message in messages %}{% if loop.first %}<|begin_of_text|>{% endif %}<|start_header_id|>{{ message['role'] }}<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% endfor %}<|start_header_id|>assistant<|end_header_id|>\n"
+   
+cluster:
+  gpus_per_node: 8
\ No newline at end of file
diff --git a/examples/configs/grpo_adk_llama8b_debug.yaml b/examples/configs/grpo_adk_llama8b_debug.yaml
new file mode 100644
index 0000000000..71d99c2c25
--- /dev/null
+++ b/examples/configs/grpo_adk_llama8b_debug.yaml
@@ -0,0 +1,43 @@
+# GRPO configuration for unique numbers environment
+defaults: "grpo_math_8B.yaml"
+
+grpo:
+  num_prompts_per_step: 2
+  num_generations_per_prompt: 4
+  max_rollout_turns: 20
+  max_num_steps: 100
+  val_at_start: false
+
+data:
+  add_system_prompt: false
+
+checkpointing:
+  enabled: false
+  checkpoint_dir: "results/grpo-adk"
+  metric_name: "val_reward"
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+
+policy:
+  train_global_batch_size: 8
+  tokenizer:
+    chat_template: "{% for message in messages %}{% if loop.first %}<|begin_of_text|>{% endif %}<|start_header_id|>{{ message['role'] }}<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% endfor %}<|start_header_id|>assistant<|end_header_id|>\n"
+    
+env:
+  unique_numbers:
+    cfg:
+      max_turns: 15
+      min_length: 5
+      max_length: 10
+      max_integer: 15
+
+logger:
+  wandb_enabled: True 
+  wandb:
+    project: "grpo-simulated-adk"
+    name: "llama-8b-debug-__NOW__"
+
+
+cluster:
+  gpus_per_node: 8
\ No newline at end of file
diff --git a/examples/run_grpo_unique_numbers.py b/examples/run_grpo_unique_numbers.py
index 892d37142f..35e9ff0d5c 100644
--- a/examples/run_grpo_unique_numbers.py
+++ b/examples/run_grpo_unique_numbers.py
@@ -22,15 +22,9 @@
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
 
-OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
+from nemo_rl.environments.simulated_user.prompt import starting_user_prompt
 
-PROMPT = (
-    "I will play a game with you. I have a list of integers in mind and can NOT tell you. "
-    "Your goal is to guess the count of UNIQUE numbers in my list. The only 2 things you can do is the following: "
-    "You can either ask me 'what is number k?' to get the number at position k in my list, "
-    "or answer 'there are m unique numbers' whenever you feel you want to make a guess."
-    "Please do not say anything else. You cannot ask me to provide the list of integers."
-)
+OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
 
 
 def parse_args():
@@ -42,7 +36,7 @@ def parse_args():
 
 def generate_datum(tokenizer: AutoTokenizer, env_cfg: dict, task_name: str, idx: int, add_system_prompt: bool) -> DatumSpec:
     formatted_prompt = tokenizer.apply_chat_template(
-        [{"role": "user", "content": PROMPT}],
+        [{"role": "user", "content": starting_user_prompt}],
         tokenize=False,
         add_system_prompt=add_system_prompt,
         add_generation_prompt=True,
diff --git a/nemo_rl/environments/simulated_user/adk_utils.py b/nemo_rl/environments/simulated_user/adk_utils.py
new file mode 100644
index 0000000000..43f20f9f2d
--- /dev/null
+++ b/nemo_rl/environments/simulated_user/adk_utils.py
@@ -0,0 +1,119 @@
+import asyncio
+
+from google.adk import Agent
+from google.adk.runners import Runner
+from google.adk.sessions import InMemorySessionService
+from google.genai import types
+
+
+# Define the agents
+def create_agent(instruction: str | None = None, name: str = "simulated_user", model: str = 'gemini-2.0-flash') -> Agent:
+    return Agent(
+        model=model,
+        name=name,
+        description="Agent",
+        instruction=instruction or "You are a helpful assistant that help people answer questions.",
+        generate_content_config=types.GenerateContentConfig(
+            safety_settings=[
+                types.SafetySetting(
+                    category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+                    threshold=types.HarmBlockThreshold.OFF,
+                ),
+            ]
+        ),
+    )
+
+def get_session_from_runner(runner: Runner, user_id: str):
+    app_session_map = runner.session_service.sessions
+    assert len(app_session_map) == 1, "Expected exactly one app in session_service"
+    user_sessions_map = next(iter(app_session_map.values()))
+    sessions = user_sessions_map[user_id]
+    assert len(sessions) == 1, "Expected exactly one user in app session"
+    return next(iter(sessions.values()))
+
+def get_agent_instruction_from_runner(runner: Runner):
+    return runner.agent.instruction
+
+def extract_conversation_history(runner: Runner, user_id: str, silence: bool = True):
+    session = get_session_from_runner(runner, user_id)
+    instruction = get_agent_instruction_from_runner(runner)
+    convo = [{"role": "instruction", "content":instruction}]
+    for event in session.events:
+        if event.content.parts and event.content.parts[0].text:
+            convo.append({"role": event.author, "content": event.content.parts[0].text})
+            if not silence:
+                print(f"[{convo[-1]['role']}]: {convo[-1]['content']}")
+    return session.id, convo
+
+
+async def run_prompt_async(runner: Runner, user_id: str, new_message: str, silence: bool = True):
+    content = types.Content(role='user', parts=[types.Part.from_text(text=new_message)])
+    if not silence:
+        print('** User says:', new_message)
+
+    session = get_session_from_runner(runner, user_id)
+
+    async for event in runner.run_async(
+        user_id=session.user_id,
+        session_id=session.id,
+        new_message=content,
+    ):
+        if event.content.parts and event.content.parts[0].text:
+            if not silence:
+                print(f'** {event.author} says: {event.content.parts[0].text}')
+            return event.content.parts[0].text.strip()
+        
+    return "<no response>" 
+
+async def setup_runner_async(agent: Agent, app_name: str, user_id: str):
+    runner = Runner(
+        agent=agent,
+        app_name=app_name,
+        session_service=InMemorySessionService()
+    )
+    await runner.session_service.create_session(app_name=app_name, user_id=user_id)
+    return runner
+
+
+async def main():
+
+    sample_id_1 = "sample_1"
+    sample_id_2 = "sample_2"
+
+    # Set up simulated user runner
+    simulated_user_app_name = "su_app"
+    simulated_user_runner = Runner(
+        agent=create_agent(name="simulated_user"),
+        app_name=simulated_user_app_name,
+        session_service=InMemorySessionService()
+    )
+
+    await simulated_user_runner.session_service.create_session(app_name=simulated_user_app_name, user_id=sample_id_1)
+    await simulated_user_runner.session_service.create_session(app_name=simulated_user_app_name, user_id=sample_id_2)
+
+    # setup grader runner
+    grader_app_name = "grader_app"
+    grader_instruction = "You are a helpful agent that can grade the correctness and coherent of a conversation. Please only give an integer as the score."
+    grader_runner = await setup_runner_async(agent=create_agent(name="grader", instruction=grader_instruction), app_name=grader_app_name, user_id=sample_id_1)
+
+    # Simulated user interactions
+    await run_prompt_async(simulated_user_runner, sample_id_1, 'what is 2*3+5?', silence=False)
+    await run_prompt_async(simulated_user_runner, sample_id_2, 'what is 2*3-5?')
+    await run_prompt_async(simulated_user_runner, sample_id_1, 'Now add another 10.')
+    await run_prompt_async(simulated_user_runner, sample_id_2, 'Now add another 100.')
+
+    # Print conversation
+    print("-" * 100)
+    _, convo1 = extract_conversation_history(simulated_user_runner, sample_id_1, silence=False)
+    print("-" * 100)
+    _, convo2 = extract_conversation_history(simulated_user_runner, sample_id_2, silence=False)
+    print("-" * 100)
+
+    # Grade conversation
+    await run_prompt_async(grader_runner, sample_id_1, f'Grade the above conversation and give a score between 0-10. \n\n{convo1}', silence=False)
+    print("-" * 100)
+    print("DONE!")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
\ No newline at end of file
diff --git a/nemo_rl/environments/simulated_user/prompt.py b/nemo_rl/environments/simulated_user/prompt.py
new file mode 100644
index 0000000000..6fd742c929
--- /dev/null
+++ b/nemo_rl/environments/simulated_user/prompt.py
@@ -0,0 +1,30 @@
+starting_user_prompt = (
+    "I will play a game with you. I have a list of integers in mind and can NOT tell you. "
+    "Your goal is to guess the count of UNIQUE numbers in my list. The only 2 things you can do is the following: "
+    "You can either ask me 'what is number k?' to get the number at position k in my list, "
+    "or answer 'there are m unique numbers' whenever you feel you want to make a guess."
+    "Please do not say anything else. You cannot ask me to provide the list of integers."
+)
+
+
+simulated_user_instruction = """
+You are a simulated user in a game where the assistant must figure out how many unique numbers you have.
+You have a list of numbers (which may contain duplicates) that you will not reveal to the assistant.
+The assistant can ask you questions of the form "What is number k?" where k is a 1-based index into your list of numbers.
+You should respond with the number at that index.
+The assistant can also make a guess by saying "There are m unique numbers" where m is their guess for the count of unique numbers.
+If the assistant makes a correct guess, you will reward it. If the guess is incorrect, you will penalize it.
+
+Here is your list of numbers: {numbers}.
+""".strip()
+
+grader_instruction = """
+Your are a strict grader to evaluate whether the assistant has properly guessed the count of unique numbers.
+Here is your list of numbers: {numbers}.
+You will see a conversation between the assistant and a simulated user who has this list of numbers.
+You will need to evaluete in the end whether the assistant has made a correct guess of the count of unique numbers.
+If the assistant made a correct guess, give it a score of 1. If the guess is incorrect, give it a score of 0.
+If assistant made a correct guess but you feel the assistant has asked too many questions, please give a score between 0 and 1.
+If the assistant never made a guess, give it a score of 0.
+Please only output an integer score between 0 and 1, and nothing else.
+""".strip()
diff --git a/nemo_rl/environments/simulated_user/unique_numbers.py b/nemo_rl/environments/simulated_user/unique_numbers.py
index b95bafd554..a355c98a48 100644
--- a/nemo_rl/environments/simulated_user/unique_numbers.py
+++ b/nemo_rl/environments/simulated_user/unique_numbers.py
@@ -2,8 +2,10 @@
 
 from __future__ import annotations
 
+import asyncio
 import random
 import re
+import os
 from typing import Optional, TypedDict
 
 import ray
@@ -12,7 +14,13 @@
 from nemo_rl.data.interfaces import LLMMessageLogType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.environments.interfaces import EnvironmentInterface, EnvironmentReturn
+from nemo_rl.environments.simulated_user.adk_utils import run_prompt_async, setup_runner_async, create_agent, extract_conversation_history
+from nemo_rl.environments.simulated_user.prompt import simulated_user_instruction, grader_instruction
 
+# ────────────────────────── ADK imports ────────────────────────────────────────
+from google.genai import types
+from google.adk.agents import Agent
+from google.adk.runners import Runner as ADKRunner
 
 class UniqueNumbersConfig(TypedDict, total=False):
     """Configuration for :class:`UniqueNumbersEnv`."""
@@ -29,15 +37,28 @@ class UniqueNumbersMetadata(TypedDict):
     unique_count: int
     turn: int
     max_turns: int
+    simulated_user_runner: Optional[ADKRunner]
+    grader_runner: Optional[ADKRunner]
 
-PENALTY_FOR_NO_GUESS = -0.1
+PENALTY_FOR_NO_GUESS = -0.2
 PENALTY_FOR_INCORRECT_GUESS = 0.0
-PENALTY_FOR_EVERY_ASK = -0.01
-PENALTY_FOR_INCORRECT_FORMAT = -0.02
+PENALTY_FOR_EVERY_ASK = 0.0
+PENALTY_FOR_INCORRECT_FORMAT = 0.0
+
+ADK_LOG_FOLDER = 'logs/adk'
+os.makedirs(ADK_LOG_FOLDER, exist_ok=True)
 
 class _UniqueNumbersRunner:
+
     query_re = re.compile(r"what is number (\d+)\??$", re.IGNORECASE)
-    guess_re = re.compile(r"there are (\d+) unique numbers", re.IGNORECASE)
+    guess_re = re.compile(r"there are (\d+) unique number", re.IGNORECASE)
+
+    def _dump_adk_messages_to_file(self, runner: ADKRunner, user_id:str, log_name_suffix:str = "", dump_folder: str = ADK_LOG_FOLDER):
+        session_id, messages = extract_conversation_history(runner, user_id, silence=True)
+        file_name = f"{user_id}_{session_id}{log_name_suffix}.log"
+        with open(os.path.join(dump_folder, file_name), "a") as f:
+            for message in messages:
+                f.write(f"[{message['role']}]:|||{message['content']}|||\n")
 
     def process_turn(
         self, message_log: LLMMessageLogType, metadata: UniqueNumbersMetadata
@@ -45,8 +66,19 @@ def process_turn(
         turn = metadata["turn"]
         max_turns = metadata["max_turns"]
 
+
+        if "simulated_user_runner" not in metadata or metadata["simulated_user_runner"] is None:
+            instruction = simulated_user_instruction.replace("{numbers}", str(metadata["numbers"]))
+            simulated_user_agent = create_agent(name="simulated_user", model="gemini-2.0-flash", instruction=instruction)
+            metadata["simulated_user_runner"] = asyncio.run(setup_runner_async(simulated_user_agent, "simulated_user_app", "simulated_user"))
+        
+        if "grader_runner" not in metadata or metadata["grader_runner"] is None:
+            instruction = grader_instruction.replace("{numbers}", str(metadata["numbers"]))
+            grader_agent = create_agent(name="grader", model="gemini-2.0-flash", instruction=instruction)
+            metadata["grader_runner"] = asyncio.run(setup_runner_async(grader_agent, "grader_app", "grader"))
+
         if turn >= max_turns:
-            # Out of turns
+            self._dump_adk_messages_to_file(metadata["simulated_user_runner"], "simulated_user", "_maxturns")
             return {"role": "user", "content": "<done>"}, PENALTY_FOR_NO_GUESS, True, None, None
 
         last_msg = ""
@@ -57,25 +89,40 @@ def process_turn(
             # no last message from assistant, assuming done
             return {"role": "user", "content": "<done>"}, PENALTY_FOR_NO_GUESS, True, None, None
 
+        # simulate user utterance via ADK
         query_match = self.query_re.search(last_msg)
         if query_match:
-            k = int(query_match.group(1))
-            if 1 <= k <= len(metadata["numbers"]):
-                content = str(metadata["numbers"][k - 1])
-            else:
-                content = f"Invalid index! There are {len(metadata['numbers'])} numbers."
+            simulated_content = asyncio.run(run_prompt_async(metadata["simulated_user_runner"], "simulated_user", last_msg, silence=True))
             next_meta = {
                 "numbers": metadata["numbers"],
                 "unique_count": metadata["unique_count"],
                 "turn": turn + 1,
                 "max_turns": max_turns,
+                "simulated_user_runner": metadata.get("simulated_user_runner", None),
+                "grader_runner": metadata.get("grader_runner", None)
             }
-            return {"role": "user", "content": content}, PENALTY_FOR_EVERY_ASK, False, None, next_meta
+            return {"role": "user", "content": simulated_content}, PENALTY_FOR_EVERY_ASK, False, None, next_meta
 
+        # calculate reward if the assistant made a guess
         guess_match = self.guess_re.search(last_msg)
         if guess_match:
             m = int(guess_match.group(1))
             reward = 1.0 if m == metadata["unique_count"] else PENALTY_FOR_INCORRECT_GUESS
+
+            # grade the conversation via ADK grader
+            if metadata["grader_runner"] is not None:
+                convo_str = "\n".join([f"{msg['role']}: {msg['content']}" for msg in message_log])
+                grading_prompt = f"Here is the converstation \n{convo_str}\nAnd please give the score between 0 and 1."
+                grading_response = asyncio.run(run_prompt_async(metadata["grader_runner"], "grader", grading_prompt, silence=True))
+                try:
+                    grade = int(re.search(r"(\d+)", grading_response).group(1))
+                    reward = (reward + grade) / 2.0
+                except Exception as e:
+                    print(f"Failed to parse grade from grader response '{grading_response}': {e}")
+            
+            self._dump_adk_messages_to_file(metadata["simulated_user_runner"], "simulated_user", "_stop")
+            self._dump_adk_messages_to_file(metadata["grader_runner"], "grader")
+
             return {"role": "user", "content": "<done>"}, reward, True, None, None
 
         # default response
@@ -84,6 +131,8 @@ def process_turn(
             "unique_count": metadata["unique_count"],
             "turn": turn + 1,
             "max_turns": max_turns,
+            "simulated_user_runner": metadata.get("simulated_user_runner", None),
+            "grader_runner": metadata.get("grader_runner", None)
         }
         help_msg = "Please ask 'what is number k?' or say 'there are m unique numbers'."
         return {"role": "user", "content": help_msg}, PENALTY_FOR_INCORRECT_FORMAT, False, None, next_meta
@@ -98,6 +147,7 @@ def __init__(self, cfg: Optional[UniqueNumbersConfig] = None):
         self.min_length = cfg.get("min_length", 3)
         self.max_length = cfg.get("max_length", 7)
         self.default_max_turns = cfg.get("max_turns", 10)
+
         self.runner = _UniqueNumbersRunner()
 
     def step(
diff --git a/nemo_rl/experience/rollouts.py b/nemo_rl/experience/rollouts.py
index 963fa3f73a..9af8135883 100644
--- a/nemo_rl/experience/rollouts.py
+++ b/nemo_rl/experience/rollouts.py
@@ -378,6 +378,8 @@ def run_multi_turn_rollout(
         )
 
         # generate_responses updates active_batch["message_log"] in-place
+        # print(f"[jialei][debug] - generation_input_data {turn=} - {generation_input_data}")
+        # print(f"[jialei][debug] - before {turn=} - {active_batch['message_log'][0]}")
         active_batch, generated_ids, gen_metrics = generate_responses(
             policy_generation,
             generation_input_data,
@@ -386,6 +388,7 @@ def run_multi_turn_rollout(
             input_lengths=active_input_lengths,
             greedy=greedy,
         )
+        # print(f"[jialei][debug] - after {turn=} -  {active_batch['message_log'][0]}")
 
         # Record token usage - assistant
         for i, global_idx in enumerate(active_indices.tolist()):
@@ -409,11 +412,9 @@ def run_multi_turn_rollout(
             env_role = env_output.observations[i]["role"].lower()
             if env_role in {"user", "assistant", "system"}:
                 formatted_obs = tokenizer.apply_chat_template(
-                    [{"role": env_role, "content": env_obs_content}],
+                    [{"role": env_role, "content": env_obs_content.strip()}],
                     tokenize=False,
-                    add_special_tokens=False,
-                    add_generation_prompt=False,
-                ).strip()
+                ).removeprefix("<|begin_of_text|>")
                 tokenized_obs = tokenizer(formatted_obs, return_tensors="pt", add_special_tokens=False).input_ids[0]
             else:
                 tokenized_obs = tokenizer(env_obs_content, return_tensors="pt", add_special_tokens=False).input_ids[0]
@@ -669,11 +670,9 @@ async def run_sample_multi_turn_rollout(
         env_role = env_output.observations[0]["role"].lower()
         if env_role in {"user", "assistant", "system"}:
             formatted_obs = tokenizer.apply_chat_template(
-                [{"role": env_role, "content": env_obs_content}],
+                [{"role": env_role, "content": env_obs_content.strip()}],
                 tokenize=False,
-                add_special_tokens=False,
-                add_generation_prompt=False,
-            ).strip()
+            ).removeprefix("<|begin_of_text|>").strip()
             tokenized_obs = tokenizer(formatted_obs, return_tensors="pt", add_special_tokens=False).input_ids[0]
         else:
             tokenized_obs = tokenizer(env_obs_content, return_tensors="pt", add_special_tokens=False).input_ids[0]
diff --git a/pyproject.toml b/pyproject.toml
index 62b78d6d4d..b9b0ad1191 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,8 @@ dependencies = [
     "debugpy",
     "nvtx",
     "matplotlib",
-    "plotly"
+    "plotly",
+    "google-adk"
 ]
 
 [project.optional-dependencies]

From 95cb615cf1554d0783c9215934a2f6e7ca2d64a3 Mon Sep 17 00:00:00 2001
From: Xuehan <xxman@google.com>
Date: Tue, 22 Jul 2025 19:37:42 +0000
Subject: [PATCH 36/59] rollout stable and faster

Signed-off-by: Jialei Chen <jialeic@google.com>
---
 README_Jialei_runs.md                         | 15 +++-
 examples/configs/grpo_adk_llama8b.yaml        |  1 +
 .../environments/simulated_user/adk_utils.py  | 68 +++++++++++++------
 .../simulated_user/unique_numbers.py          | 45 ++++++++----
 nemo_rl/experience/rollouts.py                |  8 +--
 5 files changed, 97 insertions(+), 40 deletions(-)

diff --git a/README_Jialei_runs.md b/README_Jialei_runs.md
index 2452fb8583..ae364d974a 100644
--- a/README_Jialei_runs.md
+++ b/README_Jialei_runs.md
@@ -29,18 +29,27 @@ example ADK script is here: nemo_rl/environments/simulated_user/adk_utils.py
 
 Intergrated into training:
 ```
-export GOOGLE_GENAI_USE_VERTEXAI=1 && export GOOGLE_API_KEY="xxxxxxxxxxxxxx" && export GOOGLE_CLOUD_PROJECT="xxxxxxx" && export GOOGLE_CLOUD_LOCATION="xxxxxx" 
+export GOOGLE_GENAI_USE_VERTEXAI=1 && export GOOGLE_CLOUD_PROJECT="xxxxxxx" && export GOOGLE_CLOUD_LOCATION="xxxxxx" 
 uv run python examples/run_grpo_unique_numbers.py --config examples/configs/grpo_adk_llama8b.yaml
 ```
 
 - 07/16: there are 3 issues found:
     1) Gemini endpoint may fail with 500 error sometimes, maybe related to rate limit. Need to understand more.
     2) training is very slow, took ~ 20min for 64x32 samples completing rollout for 1 step on a 8GPU H100 node.
-    3) chat format has issue. <- mostly fixed by chat_template and some hacks but not elegent.
+    3) chat format has issue. 
 
     Also added a debug script for samller run
     ```
     uv run python examples/run_grpo_unique_numbers.py --config examples/configs/grpo_adk_llama8b_debug.yaml > llama3.log 2>&1
     ```
 
-    Also added logs for ADK agent convo history printout. Seems to work as expected.
\ No newline at end of file
+    Also added logs for ADK agent convo history printout. Seems to work as expected.
+
+
+- 07/22: fixed issues:
+    1) Did many debugging, did not find the root cause (ruled out the rate limit issue, special token issue, and context length issue). Added retry logic when calling endpoint, seems the error rate is very low < 0.01% and 1 retry is enough. Adding logging to track the error and ignore the error for now.
+    2) add parallel enviroment.step() achives > 3x speedup (1625s -> 475s). Surprise that the current enviroment is single-threaded, so it is very slow.
+    3) mostly fixed by chat_template and some hacks but not elegent.
+
+Now seems to work well.
+
diff --git a/examples/configs/grpo_adk_llama8b.yaml b/examples/configs/grpo_adk_llama8b.yaml
index 13252c3d52..6274b18c07 100644
--- a/examples/configs/grpo_adk_llama8b.yaml
+++ b/examples/configs/grpo_adk_llama8b.yaml
@@ -34,6 +34,7 @@ logger:
     name: "llama-8b-__NOW__"
 
 policy:
+  train_global_batch_size: 512
   tokenizer:
     chat_template: "{% for message in messages %}{% if loop.first %}<|begin_of_text|>{% endif %}<|start_header_id|>{{ message['role'] }}<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% endfor %}<|start_header_id|>assistant<|end_header_id|>\n"
    
diff --git a/nemo_rl/environments/simulated_user/adk_utils.py b/nemo_rl/environments/simulated_user/adk_utils.py
index 43f20f9f2d..231fb816b6 100644
--- a/nemo_rl/environments/simulated_user/adk_utils.py
+++ b/nemo_rl/environments/simulated_user/adk_utils.py
@@ -1,9 +1,20 @@
 import asyncio
+import logging
+import random
 
 from google.adk import Agent
 from google.adk.runners import Runner
 from google.adk.sessions import InMemorySessionService
 from google.genai import types
+from google.genai.errors import ServerError
+
+
+# Initialize logging
+logging.basicConfig(
+    format='[%(asctime)s] [%(levelname)s] %(message)s',
+    level=logging.WARNING,
+)
+logger = logging.getLogger(__name__)
 
 
 # Define the agents
@@ -42,28 +53,47 @@ def extract_conversation_history(runner: Runner, user_id: str, silence: bool = T
         if event.content.parts and event.content.parts[0].text:
             convo.append({"role": event.author, "content": event.content.parts[0].text})
             if not silence:
-                print(f"[{convo[-1]['role']}]: {convo[-1]['content']}")
+                logger.info(f"[{convo[-1]['role']}]: {convo[-1]['content']}")
     return session.id, convo
 
 
-async def run_prompt_async(runner: Runner, user_id: str, new_message: str, silence: bool = True):
+async def run_prompt_async(runner: Runner, user_id: str, new_message: str, silence: bool = True,
+                        max_retries: int = 3, initial_delay: float = 2) -> str:
+            
+    new_message = new_message.strip()
     content = types.Content(role='user', parts=[types.Part.from_text(text=new_message)])
     if not silence:
-        print('** User says:', new_message)
+        logger.info(f'** [User]->|||{new_message}|||')
 
     session = get_session_from_runner(runner, user_id)
 
-    async for event in runner.run_async(
-        user_id=session.user_id,
-        session_id=session.id,
-        new_message=content,
-    ):
-        if event.content.parts and event.content.parts[0].text:
-            if not silence:
-                print(f'** {event.author} says: {event.content.parts[0].text}')
-            return event.content.parts[0].text.strip()
-        
-    return "<no response>" 
+    retries = 0
+    delay = initial_delay
+    while retries < max_retries:
+        try:
+            async for event in runner.run_async(
+                user_id=session.user_id,
+                session_id=session.id,
+                new_message=content,
+            ):
+                if event.content.parts and event.content.parts[0].text:
+                    if not silence:
+                        logger.info(f'** [{event.author}]->|||{event.content.parts[0].text.strip()}|||')
+                    return event.content.parts[0].text.strip()
+                else:
+                    return "<Empty response>"
+        except ServerError as e:
+            retries += 1
+            delay_with_jitter = delay + (random.random() * 2 - 1) * (delay * 0.5)
+            logger.error(f"Gemini API call (with message {new_message}) failed with ServerError {e} (attempt {retries}/{max_retries}). Retrying in {delay_with_jitter} seconds...")
+            await asyncio.sleep(delay_with_jitter)
+            delay *= 2  # Exponential backoff
+        except Exception as e:
+            logger.error(f"Gemini API call (with message {new_message}) failed with an unexpected error: {e}.")
+            return f"<No response due to unexpected error: {e}>"
+
+    logger.error(f"Gemini API call (with message {new_message}) reached maximum retries ({max_retries}) without success.")
+    return f"<No response due after {max_retries} retries>"
 
 async def setup_runner_async(agent: Agent, app_name: str, user_id: str):
     runner = Runner(
@@ -103,16 +133,16 @@ async def main():
     await run_prompt_async(simulated_user_runner, sample_id_2, 'Now add another 100.')
 
     # Print conversation
-    print("-" * 100)
+    logger.info("-" * 100)
     _, convo1 = extract_conversation_history(simulated_user_runner, sample_id_1, silence=False)
-    print("-" * 100)
+    logger.info("-" * 100)
     _, convo2 = extract_conversation_history(simulated_user_runner, sample_id_2, silence=False)
-    print("-" * 100)
+    logger.info("-" * 100)
 
     # Grade conversation
     await run_prompt_async(grader_runner, sample_id_1, f'Grade the above conversation and give a score between 0-10. \n\n{convo1}', silence=False)
-    print("-" * 100)
-    print("DONE!")
+    logger.info("-" * 100)
+    logger.info("DONE!")
 
 
 if __name__ == "__main__":
diff --git a/nemo_rl/environments/simulated_user/unique_numbers.py b/nemo_rl/environments/simulated_user/unique_numbers.py
index a355c98a48..df66bef62b 100644
--- a/nemo_rl/environments/simulated_user/unique_numbers.py
+++ b/nemo_rl/environments/simulated_user/unique_numbers.py
@@ -6,7 +6,9 @@
 import random
 import re
 import os
+from datetime import datetime
 from typing import Optional, TypedDict
+from concurrent.futures import ThreadPoolExecutor
 
 import ray
 import torch
@@ -22,6 +24,18 @@
 from google.adk.agents import Agent
 from google.adk.runners import Runner as ADKRunner
 
+
+PENALTY_FOR_NO_GUESS = -0.2
+PENALTY_FOR_INCORRECT_GUESS = 0.0
+PENALTY_FOR_EVERY_ASK = 0.0
+PENALTY_FOR_INCORRECT_FORMAT = 0.0
+
+ADK_LOG_FOLDER = None # 'logs/adk' or None to disable logging
+if ADK_LOG_FOLDER is not None:
+    os.makedirs(ADK_LOG_FOLDER, exist_ok=True)
+
+GEMINI_CALL_MAX_WORKERS = 64 # if 1 then it will be single-threaded, otherwise it will use ThreadPoolExecutor
+
 class UniqueNumbersConfig(TypedDict, total=False):
     """Configuration for :class:`UniqueNumbersEnv`."""
 
@@ -40,20 +54,14 @@ class UniqueNumbersMetadata(TypedDict):
     simulated_user_runner: Optional[ADKRunner]
     grader_runner: Optional[ADKRunner]
 
-PENALTY_FOR_NO_GUESS = -0.2
-PENALTY_FOR_INCORRECT_GUESS = 0.0
-PENALTY_FOR_EVERY_ASK = 0.0
-PENALTY_FOR_INCORRECT_FORMAT = 0.0
-
-ADK_LOG_FOLDER = 'logs/adk'
-os.makedirs(ADK_LOG_FOLDER, exist_ok=True)
-
 class _UniqueNumbersRunner:
 
     query_re = re.compile(r"what is number (\d+)\??$", re.IGNORECASE)
     guess_re = re.compile(r"there are (\d+) unique number", re.IGNORECASE)
 
-    def _dump_adk_messages_to_file(self, runner: ADKRunner, user_id:str, log_name_suffix:str = "", dump_folder: str = ADK_LOG_FOLDER):
+    def _maybe_dump_adk_messages_to_file(self, runner: ADKRunner, user_id:str, log_name_suffix:str = "", dump_folder: str = ADK_LOG_FOLDER):
+        if dump_folder is None:
+            return
         session_id, messages = extract_conversation_history(runner, user_id, silence=True)
         file_name = f"{user_id}_{session_id}{log_name_suffix}.log"
         with open(os.path.join(dump_folder, file_name), "a") as f:
@@ -63,6 +71,7 @@ def _dump_adk_messages_to_file(self, runner: ADKRunner, user_id:str, log_name_su
     def process_turn(
         self, message_log: LLMMessageLogType, metadata: UniqueNumbersMetadata
     ) -> tuple[dict[str, str], float, bool, None, Optional[UniqueNumbersMetadata]]:
+
         turn = metadata["turn"]
         max_turns = metadata["max_turns"]
 
@@ -78,7 +87,7 @@ def process_turn(
             metadata["grader_runner"] = asyncio.run(setup_runner_async(grader_agent, "grader_app", "grader"))
 
         if turn >= max_turns:
-            self._dump_adk_messages_to_file(metadata["simulated_user_runner"], "simulated_user", "_maxturns")
+            self._maybe_dump_adk_messages_to_file(metadata["simulated_user_runner"], "simulated_user", "_maxturns")
             return {"role": "user", "content": "<done>"}, PENALTY_FOR_NO_GUESS, True, None, None
 
         last_msg = ""
@@ -120,8 +129,8 @@ def process_turn(
                 except Exception as e:
                     print(f"Failed to parse grade from grader response '{grading_response}': {e}")
             
-            self._dump_adk_messages_to_file(metadata["simulated_user_runner"], "simulated_user", "_stop")
-            self._dump_adk_messages_to_file(metadata["grader_runner"], "grader")
+            self._maybe_dump_adk_messages_to_file(metadata["simulated_user_runner"], "simulated_user", "_stop")
+            self._maybe_dump_adk_messages_to_file(metadata["grader_runner"], "grader")
 
             return {"role": "user", "content": "<done>"}, reward, True, None, None
 
@@ -155,12 +164,20 @@ def step(
         message_log_batch: list[LLMMessageLogType],
         metadata_batch: list[Optional[UniqueNumbersMetadata]],
     ) -> EnvironmentReturn:
-        results = []
+
+        args = []
         for log, meta in zip(message_log_batch, metadata_batch):
             assert meta is not None, "Metadata must not be None for UniqueNumbersEnv."
             assert meta["numbers"] is not None, "Numbers must not be None in metadata."
             assert meta["unique_count"] > 0, "Unique count must be greater than 0 in metadata."
-            results.append(self.runner.process_turn(log, meta))
+            args.append((log, meta))
+
+        # Process either serially or in parallel
+        if GEMINI_CALL_MAX_WORKERS is None or GEMINI_CALL_MAX_WORKERS <= 1:
+            results = [self.runner.process_turn(log, meta) for log, meta in args]
+        else:
+            with ThreadPoolExecutor(max_workers=GEMINI_CALL_MAX_WORKERS) as executor:
+                results = list(executor.map(lambda p: self.runner.process_turn(*p), args))
 
         observations, rewards, terminateds, stop_strings, next_metadata = [], [], [], [], []
         for obs, rew, term, stops, meta in results:
diff --git a/nemo_rl/experience/rollouts.py b/nemo_rl/experience/rollouts.py
index 9af8135883..d68a5437b9 100644
--- a/nemo_rl/experience/rollouts.py
+++ b/nemo_rl/experience/rollouts.py
@@ -19,6 +19,7 @@
 import copy
 from typing import Any
 
+from datetime import datetime
 import ray
 import torch
 from transformers import PreTrainedTokenizerBase
@@ -352,6 +353,9 @@ def run_multi_turn_rollout(
         if len(active_indices) == 0:
             break
 
+        if max_rollout_turns > 0:
+            print(f"▶ ▶ ▶ Running rollout turn {turn + 1} / {max_rollout_turns} with {len(active_indices)} active samples...")
+
         active_samples_per_turn.append(len(active_indices))
 
         # Convert LLMMessageLogType to FlatMessagesType for generation
@@ -376,10 +380,7 @@ def run_multi_turn_rollout(
                 "stop_strings": active_stop_strings,
             }
         )
-
         # generate_responses updates active_batch["message_log"] in-place
-        # print(f"[jialei][debug] - generation_input_data {turn=} - {generation_input_data}")
-        # print(f"[jialei][debug] - before {turn=} - {active_batch['message_log'][0]}")
         active_batch, generated_ids, gen_metrics = generate_responses(
             policy_generation,
             generation_input_data,
@@ -388,7 +389,6 @@ def run_multi_turn_rollout(
             input_lengths=active_input_lengths,
             greedy=greedy,
         )
-        # print(f"[jialei][debug] - after {turn=} -  {active_batch['message_log'][0]}")
 
         # Record token usage - assistant
         for i, global_idx in enumerate(active_indices.tolist()):

From 86505bbe3c373f207dd0edc357fa59ff484fe262 Mon Sep 17 00:00:00 2001
From: Xuehan <xxman@google.com>
Date: Tue, 22 Jul 2025 21:05:00 +0000
Subject: [PATCH 37/59] cleanup

Signed-off-by: Xuehan <xxman@google.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 README_Jialei_runs.md                         | 55 -------------------
 examples/configs/grpo_adk_llama8b_debug.yaml  | 43 ---------------
 .../configs/grpo_unique_numbers_gemma1b.yaml  | 43 ---------------
 .../configs/grpo_unique_numbers_llama8b.yaml  | 38 -------------
 ...rs.py => run_grpo_unique_numbers_w_adk.py} | 49 +++++++++++++----
 .../test_unique_numbers_environment.py        | 43 ---------------
 6 files changed, 37 insertions(+), 234 deletions(-)
 delete mode 100644 README_Jialei_runs.md
 delete mode 100644 examples/configs/grpo_adk_llama8b_debug.yaml
 delete mode 100644 examples/configs/grpo_unique_numbers_gemma1b.yaml
 delete mode 100644 examples/configs/grpo_unique_numbers_llama8b.yaml
 rename examples/{run_grpo_unique_numbers.py => run_grpo_unique_numbers_w_adk.py} (81%)
 delete mode 100644 tests/unit/environments/test_unique_numbers_environment.py

diff --git a/README_Jialei_runs.md b/README_Jialei_runs.md
deleted file mode 100644
index ae364d974a..0000000000
--- a/README_Jialei_runs.md
+++ /dev/null
@@ -1,55 +0,0 @@
-## What I have run so far
-
-- 07/01: successfully run sliding_puzzle example:
-```
-uv run python examples/run_grpo_sliding_puzzle.py logger.wandb_enabled=True grpo.val_at_start=True policy.train_micro_batch_size=1 cluster.gpus_per_node=8
-```
-
-- 07/02: run with new dummy retreival simulator:
-Task: env generate and hide a list of integers, LLM agent need to guess how many unique integers are in the list.
-Allow LLM to ask for the list of integers by index one by one.
-
-```
-uv run python examples/run_grpo_unique_numbers.py
-```
-or with llama-8b
-```
-uv run python examples/run_grpo_unique_numbers.py --config examples/configs/grpo_unique_numbers_llama8b.yaml
-```
-
-Seems there is some chat render/format issue with gemma-1b, leading to low quality in the beginning. Llama-8b seems to be better?
-
-![alt text](image.png)
-
-see full wandb metrics [here](https://wandb.ai/jialeichen777-google/grpo-simulated-retrieval/reports/Dummy-retrival-task-for-llama-8b-and-gemma-1b--VmlldzoxMzQ0OTgyMw)
-
-- 07/15: enabled ADK
-
-example ADK script is here: nemo_rl/environments/simulated_user/adk_utils.py
-
-Intergrated into training:
-```
-export GOOGLE_GENAI_USE_VERTEXAI=1 && export GOOGLE_CLOUD_PROJECT="xxxxxxx" && export GOOGLE_CLOUD_LOCATION="xxxxxx" 
-uv run python examples/run_grpo_unique_numbers.py --config examples/configs/grpo_adk_llama8b.yaml
-```
-
-- 07/16: there are 3 issues found:
-    1) Gemini endpoint may fail with 500 error sometimes, maybe related to rate limit. Need to understand more.
-    2) training is very slow, took ~ 20min for 64x32 samples completing rollout for 1 step on a 8GPU H100 node.
-    3) chat format has issue. 
-
-    Also added a debug script for samller run
-    ```
-    uv run python examples/run_grpo_unique_numbers.py --config examples/configs/grpo_adk_llama8b_debug.yaml > llama3.log 2>&1
-    ```
-
-    Also added logs for ADK agent convo history printout. Seems to work as expected.
-
-
-- 07/22: fixed issues:
-    1) Did many debugging, did not find the root cause (ruled out the rate limit issue, special token issue, and context length issue). Added retry logic when calling endpoint, seems the error rate is very low < 0.01% and 1 retry is enough. Adding logging to track the error and ignore the error for now.
-    2) add parallel enviroment.step() achives > 3x speedup (1625s -> 475s). Surprise that the current enviroment is single-threaded, so it is very slow.
-    3) mostly fixed by chat_template and some hacks but not elegent.
-
-Now seems to work well.
-
diff --git a/examples/configs/grpo_adk_llama8b_debug.yaml b/examples/configs/grpo_adk_llama8b_debug.yaml
deleted file mode 100644
index 71d99c2c25..0000000000
--- a/examples/configs/grpo_adk_llama8b_debug.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-# GRPO configuration for unique numbers environment
-defaults: "grpo_math_8B.yaml"
-
-grpo:
-  num_prompts_per_step: 2
-  num_generations_per_prompt: 4
-  max_rollout_turns: 20
-  max_num_steps: 100
-  val_at_start: false
-
-data:
-  add_system_prompt: false
-
-checkpointing:
-  enabled: false
-  checkpoint_dir: "results/grpo-adk"
-  metric_name: "val_reward"
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
-
-policy:
-  train_global_batch_size: 8
-  tokenizer:
-    chat_template: "{% for message in messages %}{% if loop.first %}<|begin_of_text|>{% endif %}<|start_header_id|>{{ message['role'] }}<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% endfor %}<|start_header_id|>assistant<|end_header_id|>\n"
-    
-env:
-  unique_numbers:
-    cfg:
-      max_turns: 15
-      min_length: 5
-      max_length: 10
-      max_integer: 15
-
-logger:
-  wandb_enabled: True 
-  wandb:
-    project: "grpo-simulated-adk"
-    name: "llama-8b-debug-__NOW__"
-
-
-cluster:
-  gpus_per_node: 8
\ No newline at end of file
diff --git a/examples/configs/grpo_unique_numbers_gemma1b.yaml b/examples/configs/grpo_unique_numbers_gemma1b.yaml
deleted file mode 100644
index 638647590a..0000000000
--- a/examples/configs/grpo_unique_numbers_gemma1b.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-# GRPO configuration for unique numbers environment
-defaults: "grpo_math_1B.yaml"
-
-grpo:
-  num_prompts_per_step: 32
-  num_generations_per_prompt: 16
-  max_rollout_turns: 20
-  max_num_steps: 100
-  val_at_start: true
-
-data:
-  add_system_prompt: false
-
-checkpointing:
-  enabled: false
-  checkpoint_dir: "results/grpo-unique-numbers"
-  metric_name: "val_reward"
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
-
-env:
-  unique_numbers:
-    cfg:
-      max_turns: 15
-      min_length: 5
-      max_length: 10
-      max_integer: 15
-
-logger:
-  wandb_enabled: True 
-  wandb:
-    project: "grpo-simulated-retrieval"
-    name: "gemma-1b-__NOW__"
-
-policy: 
-  train_micro_batch_size: 1
-  model_name: google/gemma-3-1b-it
-  tokenizer:
-    name: google/gemma-3-1b-it
-
-cluster:
-  gpus_per_node: 8
\ No newline at end of file
diff --git a/examples/configs/grpo_unique_numbers_llama8b.yaml b/examples/configs/grpo_unique_numbers_llama8b.yaml
deleted file mode 100644
index 8cdd63bcdd..0000000000
--- a/examples/configs/grpo_unique_numbers_llama8b.yaml
+++ /dev/null
@@ -1,38 +0,0 @@
-# GRPO configuration for unique numbers environment
-defaults: "grpo_math_8B.yaml"
-
-grpo:
-  num_prompts_per_step: 32
-  num_generations_per_prompt: 16
-  max_rollout_turns: 20
-  max_num_steps: 100
-  val_at_start: true
-
-data:
-  add_system_prompt: false
-
-checkpointing:
-  enabled: false
-  checkpoint_dir: "results/grpo-unique-numbers"
-  metric_name: "val_reward"
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
-
-env:
-  unique_numbers:
-    cfg:
-      max_turns: 15
-      min_length: 5
-      max_length: 10
-      max_integer: 15
-
-logger:
-  wandb_enabled: True 
-  wandb:
-    project: "grpo-simulated-retrieval"
-    name: "llama-8b-__NOW__"
-
-
-cluster:
-  gpus_per_node: 8
\ No newline at end of file
diff --git a/examples/run_grpo_unique_numbers.py b/examples/run_grpo_unique_numbers_w_adk.py
similarity index 81%
rename from examples/run_grpo_unique_numbers.py
rename to examples/run_grpo_unique_numbers_w_adk.py
index 35e9ff0d5c..74d48736de 100644
--- a/examples/run_grpo_unique_numbers.py
+++ b/examples/run_grpo_unique_numbers_w_adk.py
@@ -14,6 +14,7 @@
 from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType
 from nemo_rl.distributed.virtual_cluster import init_ray
+from nemo_rl.environments.simulated_user.prompt import starting_user_prompt
 from nemo_rl.environments.simulated_user.unique_numbers import (
     UniqueNumbersEnv,
     UniqueNumbersMetadata,
@@ -22,19 +23,27 @@
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
 
-from nemo_rl.environments.simulated_user.prompt import starting_user_prompt
-
 OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
 
 
 def parse_args():
-    parser = argparse.ArgumentParser(description="Run GRPO with unique numbers simulator")
-    parser.add_argument("--config", type=str, default=None, help="Path to YAML config file")
+    parser = argparse.ArgumentParser(
+        description="Run GRPO with unique numbers simulator"
+    )
+    parser.add_argument(
+        "--config", type=str, default=None, help="Path to YAML config file"
+    )
     args, overrides = parser.parse_known_args()
     return args, overrides
 
 
-def generate_datum(tokenizer: AutoTokenizer, env_cfg: dict, task_name: str, idx: int, add_system_prompt: bool) -> DatumSpec:
+def generate_datum(
+    tokenizer: AutoTokenizer,
+    env_cfg: dict,
+    task_name: str,
+    idx: int,
+    add_system_prompt: bool,
+) -> DatumSpec:
     formatted_prompt = tokenizer.apply_chat_template(
         [{"role": "user", "content": starting_user_prompt}],
         tokenize=False,
@@ -42,9 +51,13 @@ def generate_datum(tokenizer: AutoTokenizer, env_cfg: dict, task_name: str, idx:
         add_generation_prompt=True,
         add_special_tokens=False,
     ).strip()
-    token_ids = tokenizer(formatted_prompt, return_tensors="pt", add_special_tokens=False)["input_ids"][0]
+    token_ids = tokenizer(
+        formatted_prompt, return_tensors="pt", add_special_tokens=False
+    )["input_ids"][0]
 
-    def _generate_numbers(min_length, max_length, max_integer, default_max_turns) -> UniqueNumbersMetadata:
+    def _generate_numbers(
+        min_length, max_length, max_integer, default_max_turns
+    ) -> UniqueNumbersMetadata:
         length = random.randint(min_length, max_length)
         numbers = [random.randint(0, max_integer) for _ in range(length)]
         return UniqueNumbersMetadata(
@@ -123,27 +136,39 @@ def setup_data(tokenizer, env_cfg, task_name, length, val_length, add_system_pro
 def main():
     args, overrides = parse_args()
     if not args.config:
-        args.config = os.path.join(os.path.dirname(__file__), "configs", "grpo_unique_numbers_gemma1b.yaml")
+        args.config = os.path.join(
+            os.path.dirname(__file__), "configs", "grpo_unique_numbers_gemma1b.yaml"
+        )
     config = load_config(args.config)
     if overrides:
         config = parse_hydra_overrides(config, overrides)
     config: MasterConfig = OmegaConf.to_container(config, resolve=True)
 
     now_pst = datetime.utcnow() + timedelta(hours=-7)
-    config["logger"]["wandb"]["name"] = config["logger"]["wandb"]["name"].replace("__NOW__", now_pst.strftime("%m/%d-%H:%M"))
+    config["logger"]["wandb"]["name"] = config["logger"]["wandb"]["name"].replace(
+        "__NOW__", now_pst.strftime("%m/%d-%H:%M")
+    )
 
     config["logger"]["log_dir"] = get_next_experiment_dir(config["logger"]["log_dir"])
     if config["checkpointing"]["enabled"]:
-        print(f"\U0001F4CA Using checkpoint directory: {config['checkpointing']['checkpoint_dir']}")
+        print(
+            f"\U0001f4ca Using checkpoint directory: {config['checkpointing']['checkpoint_dir']}"
+        )
 
     pprint.pprint(config)
 
     init_ray()
 
     tokenizer = get_tokenizer(config["policy"]["tokenizer"])
-    config["policy"]["generation"] = configure_generation_config(config["policy"]["generation"], tokenizer)
+    config["policy"]["generation"] = configure_generation_config(
+        config["policy"]["generation"], tokenizer
+    )
 
-    ds_length = config["grpo"]["num_prompts_per_step"] * config["grpo"]["num_generations_per_prompt"] * config["grpo"]["max_num_steps"]
+    ds_length = (
+        config["grpo"]["num_prompts_per_step"]
+        * config["grpo"]["num_generations_per_prompt"]
+        * config["grpo"]["max_num_steps"]
+    )
     dataset, val_dataset, task_to_env, val_task_to_env = setup_data(
         tokenizer=tokenizer,
         env_cfg=config["env"],
diff --git a/tests/unit/environments/test_unique_numbers_environment.py b/tests/unit/environments/test_unique_numbers_environment.py
deleted file mode 100644
index 2ee01166b4..0000000000
--- a/tests/unit/environments/test_unique_numbers_environment.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-import time
-
-import pytest
-import ray
-
-from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env
-from nemo_rl.environments.simulated_user.unique_numbers import (
-    UniqueNumbersEnv,
-    UniqueNumbersMetadata,
-)
-
-
-@pytest.fixture(scope="module")
-def unique_env():
-    env = UniqueNumbersEnv.options(
-        runtime_env={
-            "py_executable": get_actor_python_env(
-                "nemo_rl.environments.simulated_user.unique_numbers.UniqueNumbersEnv"
-            ),
-            "env_vars": dict(os.environ),
-        }
-    ).remote(cfg={"max_turns": 5, "min_length": 3, "max_length": 3})
-    yield env
-    env.shutdown.remote()
-    ray.kill(env)
-    time.sleep(0.1)
-
-
-def test_query_and_reward(unique_env):
-    metadata = UniqueNumbersMetadata(numbers=[1, 2, 1], unique_count=2, turn=0, max_turns=5)
-    query_log = [[{"role": "assistant", "content": "what is number 2?"}]]
-    result = ray.get(unique_env.step.remote(query_log, [metadata]))
-
-    assert result.observations[0]["content"] == "2"
-    assert result.rewards[0] == 0.0
-    assert result.terminateds[0] is False
-
-    guess_meta = UniqueNumbersMetadata(numbers=[1, 2, 1], unique_count=2, turn=3, max_turns=5)
-    guess_log = [[{"role": "assistant", "content": "there are 2 unique numbers"}]]
-    guess_result = ray.get(unique_env.step.remote(guess_log, [guess_meta]))
-    assert guess_result.terminateds[0] is True
-    assert guess_result.rewards[0] == 1.0

From 3c5aa359b2d1ce81525999611745958f66ba4e9c Mon Sep 17 00:00:00 2001
From: Andrew Schilling <85314306+aschilling-nv@users.noreply.github.com>
Date: Tue, 15 Jul 2025 12:41:59 -0500
Subject: [PATCH 38/59] docs: Add GitHub icon and link to top bar (#669)

Signed-off-by: Andrew Schilling <aschilling@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 docs/conf.py      | 9 ++++++++-
 docs/project.json | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index e65eb15cbe..60bcecf32f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -26,7 +26,7 @@
 project = "NeMo-RL"
 copyright = "2025, NVIDIA Corporation"
 author = "NVIDIA Corporation"
-release = "0.2.1"
+release = "latest"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -75,6 +75,13 @@
 
 html_theme = "nvidia_sphinx_theme"
 html_theme_options = {
+    "icon_links": [
+        {
+            "name": "GitHub",
+            "url": "https://github.com/NVIDIA-NeMo/RL",
+            "icon": "fa-brands fa-github",
+        }
+    ],
     "switcher": {
         "json_url": "../versions1.json",
         "version_match": release,
diff --git a/docs/project.json b/docs/project.json
index d47f15baf7..348654f298 100644
--- a/docs/project.json
+++ b/docs/project.json
@@ -1 +1 @@
-{"name": "nemo-rl", "version": "0.2.1"}
\ No newline at end of file
+{"name": "nemo-rl", "version": "latest"}
\ No newline at end of file

From 171ef502801da590ef4bd8d9366c6b82ef1d24d5 Mon Sep 17 00:00:00 2001
From: Parth Chadha <pchadha@nvidia.com>
Date: Tue, 15 Jul 2025 13:46:43 -0700
Subject: [PATCH 39/59] fix: Tie weights after set_model_state_dict if required
 (#666)

Signed-off-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/models/dtensor/parallelize.py           |  7 -------
 nemo_rl/models/policy/dtensor_policy_worker.py  | 15 +++++++++++++++
 tests/unit/models/policy/test_dtensor_worker.py | 13 ++++++++-----
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/nemo_rl/models/dtensor/parallelize.py b/nemo_rl/models/dtensor/parallelize.py
index 370624a163..8004d1fbff 100644
--- a/nemo_rl/models/dtensor/parallelize.py
+++ b/nemo_rl/models/dtensor/parallelize.py
@@ -146,10 +146,6 @@ def _parallelize_llama(
     sequence_parallel: bool = False,
 ):
     """Parallelizes a LlamaForCausalLM model across data and tensor parallel dimensions."""
-    assert not model.config.tie_word_embeddings, (
-        "Tie word embeddings not supported when TP is enabled"
-    )
-
     base_model_tp_plan = {
         "model.embed_tokens": RowwiseParallel(input_layouts=Replicate()),
         "model.layers.*.self_attn.q_proj": ColwiseParallel(),
@@ -206,9 +202,6 @@ def _prepare_input_fn(sequence_sharding, mod, inputs, device_mesh):
                     f"expecting input of {mod} to be a torch.Tensor or DTensor, but got {input_tensor}"
                 )
 
-    assert not model.config.tie_word_embeddings, (
-        "Tie word embeddings not supported when TP is enabled"
-    )
     if sequence_parallel:
         base_model_tp_plan = {
             "lm_head": ColwiseParallel(
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index df8b4e734f..6be85d9e0d 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -267,6 +267,21 @@ def __init__(
             ),
         )
 
+        # Handle tied word embeddings after loading the state dict
+        # We need to actually tie the parameters at the model level
+        is_tied_lm_head = getattr(
+            getattr(self.model, "config", {}), "tie_word_embeddings", False
+        )
+        if is_tied_lm_head:
+            embed_tokens_weight = None
+            for name, param in self.model.named_parameters():
+                if "embed_tokens" in name and name.endswith(".weight"):
+                    embed_tokens_weight = param
+                    break
+
+            if embed_tokens_weight is not None:
+                self.model.lm_head.weight = embed_tokens_weight
+
         # Manually broadcast buffers
         for _, buf in self.model.named_buffers():
             torch.distributed.broadcast(to_local_if_dtensor(buf), src=0)
diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py
index 91bf140641..e208873353 100644
--- a/tests/unit/models/policy/test_dtensor_worker.py
+++ b/tests/unit/models/policy/test_dtensor_worker.py
@@ -487,7 +487,10 @@ def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(two_gpu_virtual_clu
     from torch.distributed.tensor.parallel import ColwiseParallel
     from torch.distributed.tensor.placement_types import Replicate
 
-    custom_parallel_plan = {"lm_head": ColwiseParallel(output_layouts=Replicate())}
+    custom_parallel_plan = {
+        "lm_head": ColwiseParallel(output_layouts=Replicate()),
+        "model.embed_tokens": ColwiseParallel(output_layouts=Replicate()),
+    }
     config = create_test_config(
         model_name=TEST_ASSETS.TINY_LLAMA_TIED_MODEL_PATH,
         tp=2,
@@ -510,11 +513,11 @@ def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(two_gpu_virtual_clu
     state_dict = ray.get(policy.worker_group.workers[0].return_state_dict.remote())
     total_shape = state_dict["lm_head.weight"].shape
     sharded_shape = state_dict["lm_head.weight"].to_local().shape
-    assert total_shape[0] == sharded_shape[0] * 2, (
-        "lm_head.weight should be sharded across 2 GPUs"
+    assert total_shape[0] == sharded_shape[0], (
+        "lm_head.weight should have the same number of rows"
     )
-    assert total_shape[1] == sharded_shape[1], (
-        "lm_head.weight should have the same number of columns"
+    assert total_shape[1] == sharded_shape[1] * 2, (
+        "lm_head.weight should be sharded across 2 GPUs"
     )
 
     # Clean up

From 905d089e51bcb5ebe348caac935df157b03218f1 Mon Sep 17 00:00:00 2001
From: Zhiyu Li <zhiyul@NVIDIA.com>
Date: Tue, 15 Jul 2025 16:19:03 -0700
Subject: [PATCH 40/59] feat: optimize refit by reducing set of IPC handles
 sent to each device (#634)

Signed-off-by: Zhiyu Li <zhiyul@nvidia.com>
Signed-off-by: Yuki Huang <yukih@nvidia.com>
Co-authored-by: yuki <48991475+yuki-666@users.noreply.github.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/models/generation/vllm.py         | 64 +++++++++++++++++++++--
 nemo_rl/models/generation/vllm_backend.py | 28 ++++++----
 2 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index bb689ac7f4..a99022e30f 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -347,6 +347,16 @@ def _patch_vllm_init_workers_ray():
         else:
             self.llm = vllm.LLM(**llm_kwargs)
 
+        # will be initialized in post_init
+        # used in update_weights_from_ipc_handles
+        self.vllm_device_ids = None
+
+    def post_init(self):
+        self.vllm_device_ids = self.report_device_id()
+
+    async def post_init_async(self):
+        self.vllm_device_ids = await self.report_device_id_async()
+
     def init_collective(
         self, rank_prefix: int, ip: str, port: int, world_size: int
     ) -> None:
@@ -1030,9 +1040,37 @@ def update_weights_from_ipc_handles(self, ipc_handles: dict[str, Any]) -> bool:
                     "update_weights_from_ipc_handles cannot be used with async_engine=True. Use update_weights_from_ipc_handles_async instead."
                 )
 
-            result_or_coro = self.llm.collective_rpc(
-                "update_weights_from_ipc_handles", args=(ipc_handles,)
-            )
+            if self.tensor_parallel_size == 1:
+                # UniProcExecutor
+                assert len(self.vllm_device_ids) == 1
+                result_or_coro = self.llm.collective_rpc(
+                    "update_weights_from_local_ipc_handles",
+                    args=(ipc_handles[self.vllm_device_ids[0]],),
+                )
+            else:
+                """
+                DO NOT USE VLLM's collective_rpc: This code causes duplicate IPC data transfer across Ray workers,
+                leading to unnecessary network serialization overhead and potential performance degradation.
+
+                result_or_coro = self.llm.collective_rpc(
+                    "update_weights_from_global_ipc_handles", args=(ipc_handles,)
+                )
+                """
+                ray_worker_outputs = []
+                # MultiProcExecutor
+                for worker, device_id in zip(
+                    self.llm.llm_engine.model_executor.workers, self.vllm_device_ids
+                ):
+                    ray_worker_outputs.append(
+                        worker.execute_method.remote(
+                            "update_weights_from_local_ipc_handles",
+                            ipc_handles[device_id],
+                        )
+                    )
+
+                # Gather the results
+                result_or_coro = ray.get(ray_worker_outputs)
+
             worker_result = result_or_coro[0]
 
             if not worker_result:
@@ -1069,8 +1107,9 @@ async def update_weights_from_ipc_handles_async(
                     "update_weights_from_ipc_handles_async can only be used with async_engine=True. Use update_weights_from_ipc_handles instead."
                 )
 
+            # TODO: switch to update_weights_from_local_ipc_handles for better performance once collectively report_device_id is supported in asyncLLM initialization
             result_or_coro = await self.llm.collective_rpc(
-                "update_weights_from_ipc_handles", args=(ipc_handles,)
+                "update_weights_from_global_ipc_handles", args=(ipc_handles,)
             )
 
             if asyncio.iscoroutine(result_or_coro):
@@ -1356,6 +1395,10 @@ def __init__(
                 env_vars=env_vars,
             )
 
+        # Call some collective rpc functions in VllmGenerationWorker when initializing the vLLM engine
+        # This is necessary for async engine to work
+        self._post_init()
+
         # Number of data parallel groups is the number of tied worker groups
         self.dp_size = self.worker_group.dp_size
 
@@ -1496,6 +1539,19 @@ def _report_device_id(self) -> list[list[str]]:
         results = ray.get(futures)
         return results
 
+    def _post_init(self):
+        # Choose the appropriate method based on async_engine setting
+        method_name = (
+            "post_init_async" if self.cfg["vllm_cfg"]["async_engine"] else "post_init"
+        )
+        # Use run_all_workers_single_data for methods that don't need data
+        futures = self.worker_group.run_all_workers_single_data(
+            method_name, run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"]
+        )
+        # Wait for all futures to complete
+        results = ray.get(futures)
+        return results
+
     def init_collective(
         self, ip: str, port: int, world_size: int
     ) -> list[ray.ObjectRef]:
diff --git a/nemo_rl/models/generation/vllm_backend.py b/nemo_rl/models/generation/vllm_backend.py
index 01dc68146b..3d6ed0253c 100644
--- a/nemo_rl/models/generation/vllm_backend.py
+++ b/nemo_rl/models/generation/vllm_backend.py
@@ -51,24 +51,34 @@ def report_device_id(self) -> str:
 
         return get_device_uuid(self.device.index)
 
-    def update_weights_from_ipc_handles(self, ipc_handles):
-        """Update weights from IPC handles.
+    def update_weights_from_global_ipc_handles(self, global_device_ipc_handles):
+        """Update weights from global IPC handles.
 
         Args:
-            ipc_handles (dict): Dictionary mapping device UUIDs to parameter IPC handles.
+            global_device_ipc_handles (dict): Dictionary mapping device UUIDs to parameter IPC handles.
+
+        Returns:
+            bool: True if weights were successfully updated.
+        """
+        device_uuid = self.report_device_id()
+        local_device_ipc_handles = global_device_ipc_handles[device_uuid]
+        return self.update_weights_from_local_ipc_handles(local_device_ipc_handles)
+
+    def update_weights_from_local_ipc_handles(self, local_device_ipc_handles):
+        """Update weights from local IPC handles.
+
+        Args:
+            local_device_ipc_handles (dict): parameter IPC handles for local device.
 
         Returns:
             bool: True if weights were successfully updated.
         """
         try:
-            # Get handles for this device
-            device_uuid = self.report_device_id()
-            handles = ipc_handles[device_uuid]
-            is_tensor_packed = handles[0]
+            is_tensor_packed = local_device_ipc_handles[0]
             if is_tensor_packed:
-                _, all_handles, tensor_metadata = handles
+                _, all_handles, tensor_metadata = local_device_ipc_handles
             else:
-                _, name_and_handle_list = handles
+                _, name_and_handle_list = local_device_ipc_handles
 
             device_id = self.device.index
             weights = []

From d0424cf0e269cacb538ac0f6b5a3130bc1cbea7c Mon Sep 17 00:00:00 2001
From: Matvei Novikov <mattyson.so@gmail.com>
Date: Wed, 16 Jul 2025 09:39:53 +0400
Subject: [PATCH 41/59] fix: adjust temperature scaling logic based on engine
 version (#660)

Signed-off-by: jubick1337 <mattyson.so@gmail.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/models/policy/dtensor_policy_worker.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 6be85d9e0d..d93385ec5f 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -578,7 +578,12 @@ def train(
                             "generation" in self.cfg
                             and self.cfg["generation"] is not None
                         ):
-                            logits.div_(self.cfg["generation"]["temperature"])
+                            # The V1 engine returns raw logits before temperature scaling.
+                            # The V0 engine (when VLLM_USE_V1 is not '1') returns scaled logits.
+                            # Therefore, we only divide if we are NOT using the V1 engine.
+                            use_v1_engine = os.environ.get("VLLM_USE_V1") == "1"
+                            if not use_v1_engine:
+                                logits.div_(self.cfg["generation"]["temperature"])
 
                         if self.cp_size > 1:
                             seq_index_dtensor = (

From 07d7d92d1a3fa6736c311ec04262420fc4916856 Mon Sep 17 00:00:00 2001
From: Terry Kong <terrycurtiskong@gmail.com>
Date: Tue, 15 Jul 2025 23:42:19 -0700
Subject: [PATCH 42/59] feat: introduce megatron checkpoint dir precedence
 (#665)

Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Terry Kong <terrycurtiskong@gmail.com>
Co-authored-by: jgerh <163925524+jgerh@users.noreply.github.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 docs/design-docs/training-backends.md         |  46 ++++++-
 .../models/policy/megatron_policy_worker.py   |  12 +-
 nemo_rl/models/policy/utils.py                |  26 ++++
 tests/unit/models/policy/test_utils.py        | 118 +++++++++++++++++-
 4 files changed, 193 insertions(+), 9 deletions(-)

diff --git a/docs/design-docs/training-backends.md b/docs/design-docs/training-backends.md
index 0448284971..27bdce71e2 100644
--- a/docs/design-docs/training-backends.md
+++ b/docs/design-docs/training-backends.md
@@ -4,8 +4,19 @@ NeMo RL supports multiple training backends to accommodate different model sizes
 
 ## Available Backends
 
-- **DTensor (FSDP2)** - PyTorch's next-generation distributed training with improved memory efficiency
-- **Megatron** - NVIDIA's high-performance training framework for scaling to large models (>100B parameters)
+- **DTensor (FSDP2)** - PyTorch's next-generation distributed training with improved memory efficiency.
+- **Megatron** - NVIDIA's high-performance training framework for scaling to large models (>100B parameters).
+
+## Supported Input Checkpoint Format
+
+At this time, NeMo RL only supports Hugging Face checkpoints as inputs to the training scripts. This applies to both
+the `DTensor` backend and the `Megatron` backend.
+
+* `DTensor` uses the Hugging Face checkpoint both to initialize the training backend and to configure `vllm`, ensuring the model implementations match exactly. This is crucial for correctness.
+* `Megatron` also uses the Hugging Face checkpoint to configure `vllm`, and performs a one-time conversion to a Megatron-format checkpoint to initialize the training backend.
+
+If you would like to see direct support for Megatron checkpoints, please share your use case on
+https://github.com/NVIDIA-NeMo/RL/issues/671.
 
 ## Backend Selection
 
@@ -33,3 +44,34 @@ To enable DTensor (FSDP2) training:
 ## Configuration Examples
 
 For comprehensive examples of each algorithm and backend, see the [examples/configs/recipes/llm](https://github.com/NVIDIA-NeMo/RL/tree/main/examples/configs/recipes/llm) folder. This directory contains ready-to-use configurations for various supported combinations.
+
+## Megatron Configuration
+
+The Megatron backend requires a checkpoint directory for storing converted Hugging Face model weights in Megatron format. This directory must be accessible from all nodes in your distributed training setup.
+
+### Environment Variable Priority (Highest to Lowest) ###
+
+1. **`NRL_MEGATRON_CHECKPOINT_DIR`** - The custom checkpoint directory path.
+2. [RECOMMENDED] **`HF_HOME/nemo_rl`** - Uses the Hugging Face cache directory, if available.
+3. **`~/.cache/huggingface/nemo_rl`** - The default fallback location.
+
+### Configuration Examples ###
+
+```bash
+# Option 1: Set custom checkpoint directory
+export NRL_MEGATRON_CHECKPOINT_DIR="/shared/nfs/checkpoints/megatron"
+
+# Option 2: Use HuggingFace home directory (recommended for shared setups)
+export HF_HOME="/shared/nfs/huggingface"
+# This will use /shared/nfs/huggingface/nemo_rl
+
+# Option 3: Use default (no environment variables needed)
+# Uses ~/.cache/huggingface/nemo_rl
+```
+
+### Best Practices ###
+
+- **Mount in checkpoint directory**: If you are using Docker, make sure the Megatron checkpoint path is covered by `-v`/`--mount`. Similarly, if you are using SLURM+pyxis, ensure `--container-mounts` includes this path.
+- **Use shared storage**: Ensure the checkpoint directory is accessible from all nodes (e.g., NFS, shared filesystem).
+- **Prefer HF_HOME**: If you already have `HF_HOME` mounted across nodes, this reduces the number of environment variables to manage.
+- **Sufficient space**: Ensure adequate disk space for the converted model checkpoints.
\ No newline at end of file
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 691e1ce5b3..7daa6de019 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -116,6 +116,7 @@
 from nemo_rl.models.policy.utils import (
     configure_expandable_segments,
     get_gpu_info,
+    get_megatron_checkpoint_dir,
     get_runtime_env_for_policy_worker,
 )
 
@@ -356,7 +357,6 @@ def __init__(
         *,
         worker_sharding_annotations: NamedSharding,
         pre_init_communication_queue: Queue,
-        megatron_checkpoint_home: Optional[str] = None,
         **kwargs: Any,
     ):
         self.cfg = config
@@ -378,10 +378,7 @@ def __init__(
         if os.path.exists(hf_model_name):
             hf_model_subdir = f"model_{hf_model_subdir.replace('/', '_')}"
 
-        if megatron_checkpoint_home is not None:
-            pretrained_path = f"{megatron_checkpoint_home}/{hf_model_subdir}"
-        else:
-            pretrained_path = f"/opt/checkpoints/tron/{hf_model_subdir}"
+        pretrained_path = f"{get_megatron_checkpoint_dir()}/{hf_model_subdir}"
         pt_checkpoint_exists = os.path.exists(pretrained_path) and os.path.exists(
             os.path.join(pretrained_path, "iter_0000000")
         )
@@ -435,6 +432,11 @@ def __init__(
         if self.tokenizer.pad_token is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token
 
+        if not os.path.exists(pretrained_run_config):
+            raise FileNotFoundError(
+                f"Pretrained run config not found at {pretrained_run_config} on rank={get_rank_safe()}. This usually means that the one-time HF->mcore conversion on rank=0 saved to a directory not being mounted on this node. Please check "
+            )
+
         cfg_from_pretrained = ConfigContainer.from_yaml(pretrained_run_config)
         model_cfg = cfg_from_pretrained.model_config
         cfg_from_pretrained.logger_config = LoggerConfig()
diff --git a/nemo_rl/models/policy/utils.py b/nemo_rl/models/policy/utils.py
index 7e5e1b92fe..359e5bf4a3 100644
--- a/nemo_rl/models/policy/utils.py
+++ b/nemo_rl/models/policy/utils.py
@@ -185,3 +185,29 @@ def get_runtime_env_for_policy_worker(policy_worker_name: str) -> dict[str, Any]
     }
 
     return runtime_env
+
+
+def get_megatron_checkpoint_dir() -> str:
+    """Gets the default megatron checkpoint directory for initial HF -> Mcore conversion.
+
+    Megatron initial checkpoint should be saved to a path available on all nodes. The directory used will take this order of precendence:
+    1. $NRL_MEGATRON_CHECKPOINT_DIR (if set)
+    2. $HF_HOME/nemo_rl (if HF_HOME is set)
+    3. ~/.cache/huggingface/nemo_rl
+
+    HF_HOME is preferred since many users will also have that path mounted and it means one less directory
+    to mount into your runtime environment.
+    """
+    nrl_checkpoint_dir = os.environ.get("NRL_MEGATRON_CHECKPOINT_DIR")
+    if nrl_checkpoint_dir is not None and nrl_checkpoint_dir.strip():
+        checkpoint_dir = nrl_checkpoint_dir
+    else:
+        hf_home = os.environ.get("HF_HOME")
+        if hf_home is not None and hf_home.strip():
+            checkpoint_dir = os.path.join(hf_home, "nemo_rl")
+        else:
+            checkpoint_dir = os.path.join(
+                os.path.expanduser("~"), ".cache", "huggingface", "nemo_rl"
+            )
+    print(f"Using default megatron checkpoint dir: {checkpoint_dir}")
+    return checkpoint_dir
diff --git a/tests/unit/models/policy/test_utils.py b/tests/unit/models/policy/test_utils.py
index 98faa01667..5712985cd3 100644
--- a/tests/unit/models/policy/test_utils.py
+++ b/tests/unit/models/policy/test_utils.py
@@ -13,10 +13,13 @@
 # limitations under the License.
 
 import os
-import unittest
+import unittest.mock
 from unittest.mock import MagicMock, patch
 
-from nemo_rl.models.policy.utils import configure_expandable_segments
+from nemo_rl.models.policy.utils import (
+    configure_expandable_segments,
+    get_megatron_checkpoint_dir,
+)
 
 
 class TestConfigureExpandableSegments(unittest.TestCase):
@@ -131,3 +134,114 @@ def test_ampere_gpu_no_existing_config(self, mock_get_device_properties):
 
         # Verify the environment variable was not set
         self.assertNotIn("PYTORCH_CUDA_ALLOC_CONF", os.environ)
+
+    @patch("torch.cuda.get_device_properties")
+    def test_ampere_gpu_with_expandable_segments_true_raises_error(
+        self, mock_get_device_properties
+    ):
+        """Test Ampere GPU with expandable_segments:True in config raises RuntimeError."""
+        # Mock GPU properties for Ampere architecture
+        mock_device_properties = MagicMock()
+        mock_device_properties.major = 8  # Ampere
+        mock_get_device_properties.return_value = mock_device_properties
+
+        # Set config with expandable_segments:True
+        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+        # Call the function and expect RuntimeError
+        with self.assertRaises(RuntimeError) as context:
+            configure_expandable_segments()
+
+        # Verify the error message
+        self.assertIn("expandable_segments is enabled", str(context.exception))
+        self.assertIn(
+            "not supported on architectures older than Hopper", str(context.exception)
+        )
+
+
+class TestGetMegatronCheckpointDir:
+    """Test cases for the get_megatron_checkpoint_dir function."""
+
+    def test_nrl_megatron_checkpoint_dir_takes_precedence(self):
+        """Test that NRL_MEGATRON_CHECKPOINT_DIR environment variable takes highest precedence."""
+        expected_dir = "/custom/nrl/checkpoint/path"
+
+        with unittest.mock.patch.dict(
+            os.environ,
+            {
+                "NRL_MEGATRON_CHECKPOINT_DIR": expected_dir,
+                "HF_HOME": "/some/hf/home",
+                "HOME": "/some/home",
+            },
+        ):
+            result = get_megatron_checkpoint_dir()
+            assert result == expected_dir
+
+    def test_hf_home_fallback_when_nrl_not_set(self):
+        """Test that HF_HOME/nemo_rl is used when NRL_MEGATRON_CHECKPOINT_DIR is not set."""
+        hf_home = "/path/to/hf/home"
+        expected_dir = os.path.join(hf_home, "nemo_rl")
+
+        env_vars = {"HF_HOME": hf_home, "HOME": "/some/home"}
+        # Remove NRL_MEGATRON_CHECKPOINT_DIR if it exists
+        env_vars.pop("NRL_MEGATRON_CHECKPOINT_DIR", None)
+
+        with unittest.mock.patch.dict(os.environ, env_vars, clear=True):
+            result = get_megatron_checkpoint_dir()
+            assert result == expected_dir
+
+    def test_default_fallback_when_no_env_vars_set(self):
+        """Test that ~/.cache/huggingface/nemo_rl is used when no environment variables are set."""
+        home_dir = "/home/testuser"
+        expected_dir = os.path.join(home_dir, ".cache", "huggingface", "nemo_rl")
+
+        with unittest.mock.patch.dict(os.environ, {"HOME": home_dir}, clear=True):
+            with unittest.mock.patch("os.path.expanduser") as mock_expanduser:
+                mock_expanduser.return_value = home_dir
+                result = get_megatron_checkpoint_dir()
+                assert result == expected_dir
+                mock_expanduser.assert_called_once_with("~")
+
+    def test_nrl_checkpoint_dir_empty_string_treated_as_unset(self):
+        """Test that an empty NRL_MEGATRON_CHECKPOINT_DIR is treated as unset."""
+        hf_home = "/path/to/hf/home"
+        expected_dir = os.path.join(hf_home, "nemo_rl")
+
+        with unittest.mock.patch.dict(
+            os.environ,
+            {
+                "NRL_MEGATRON_CHECKPOINT_DIR": "",
+                "HF_HOME": hf_home,
+                "HOME": "/some/home",
+            },
+        ):
+            result = get_megatron_checkpoint_dir()
+            assert result == expected_dir
+
+    def test_hf_home_empty_string_treated_as_unset(self):
+        """Test that an empty HF_HOME is treated as unset."""
+        home_dir = "/home/testuser"
+        expected_dir = os.path.join(home_dir, ".cache", "huggingface", "nemo_rl")
+
+        with unittest.mock.patch.dict(
+            os.environ, {"HF_HOME": "", "HOME": home_dir}, clear=True
+        ):
+            with unittest.mock.patch("os.path.expanduser") as mock_expanduser:
+                mock_expanduser.return_value = home_dir
+                result = get_megatron_checkpoint_dir()
+                assert result == expected_dir
+
+    def test_function_prints_selected_directory(self, capsys):
+        """Test that the function prints the selected directory."""
+        expected_dir = "/custom/checkpoint/dir"
+
+        with unittest.mock.patch.dict(
+            os.environ, {"NRL_MEGATRON_CHECKPOINT_DIR": expected_dir}
+        ):
+            result = get_megatron_checkpoint_dir()
+
+            captured = capsys.readouterr()
+            assert (
+                f"Using default megatron checkpoint dir: {expected_dir}" in captured.out
+            )
+            assert result == expected_dir

From 00d74b76c06e0c2ec8a19b52fa0a36854f37f8b7 Mon Sep 17 00:00:00 2001
From: yuki <48991475+yuki-666@users.noreply.github.com>
Date: Thu, 17 Jul 2025 02:10:28 +0800
Subject: [PATCH 43/59] feat: optimize refit by preparing refit info ahead of
 time (#638)

Signed-off-by: Yuki Huang <yukih@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/algorithms/grpo.py                    |  14 +-
 nemo_rl/models/generation/interfaces.py       |  10 +-
 nemo_rl/models/generation/vllm.py             |  44 ++-
 nemo_rl/models/generation/vllm_backend.py     |  43 ++-
 nemo_rl/models/megatron/refit_utils.py        | 177 +++++++++++-
 .../models/policy/dtensor_policy_worker.py    |  75 +++--
 nemo_rl/models/policy/interfaces.py           |   8 +-
 nemo_rl/models/policy/lm_policy.py            |  24 +-
 .../models/policy/megatron_policy_worker.py   | 269 +++++-------------
 .../models/generation/test_vllm_generation.py | 114 +++++++-
 .../models/policy/test_megatron_worker.py     |   7 +
 11 files changed, 498 insertions(+), 287 deletions(-)

diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
index d33e503636..530ad4e8f9 100644
--- a/nemo_rl/algorithms/grpo.py
+++ b/nemo_rl/algorithms/grpo.py
@@ -347,6 +347,10 @@ def setup(
         # wait for all futures to complete
         ray.get(futures_train + futures_inference)
 
+    # prepare refit info
+    state_dict_info = policy.prepare_refit_info()
+    policy_generation.prepare_refit_info(state_dict_info)
+
     loss_fn = ClippedPGLossFn(loss_config)
 
     print("\n" + "=" * 60)
@@ -422,17 +426,15 @@ def refit_policy_generation(
         # do update
         for keys in grouped_param_keys:
             ipc_handles = policy.get_weights_ipc_handles(keys)
-            update_success = policy_generation.update_weights(ipc_handles)
+            update_success = policy_generation.update_weights_from_ipc_handles(
+                ipc_handles
+            )
             if not update_success:
                 break
     else:
-        # prepare info for update weights
-        state_dict_info = policy.prepare_info_for_collective()
         # update weights through nccl
         futures_train = policy.broadcast_weights_for_collective()
-        futures_inference = policy_generation.update_weights_from_collective(
-            state_dict_info
-        )
+        futures_inference = policy_generation.update_weights_from_collective()
         # wait for all futures to complete
         ray.get(futures_train)
         results = ray.get(futures_inference)
diff --git a/nemo_rl/models/generation/interfaces.py b/nemo_rl/models/generation/interfaces.py
index 9db0c357fa..665473cc03 100644
--- a/nemo_rl/models/generation/interfaces.py
+++ b/nemo_rl/models/generation/interfaces.py
@@ -228,12 +228,14 @@ def prepare_for_generation(self, *args: Any, **kwargs: Any) -> bool:
     def finish_generation(self, *args: Any, **kwargs: Any) -> bool:
         pass
 
-    def update_weights(self, ipc_handles: dict[str, Any]) -> bool:
+    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
+        """Prepare the info for refit."""
+        raise NotImplementedError
+
+    def update_weights_from_ipc_handles(self, ipc_handles: dict[str, Any]) -> bool:
         """Update the model weights from the given IPC handles."""
         raise NotImplementedError
 
-    def update_weights_from_collective(
-        self, info: dict[str, Any]
-    ) -> list[ray.ObjectRef]:
+    def update_weights_from_collective(self) -> list[ray.ObjectRef]:
         """Update the model weights from collective communication."""
         raise NotImplementedError
diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
index a99022e30f..7a382c0bda 100644
--- a/nemo_rl/models/generation/vllm.py
+++ b/nemo_rl/models/generation/vllm.py
@@ -1021,6 +1021,14 @@ async def report_device_id_async(self) -> list[str]:
 
         return cast(list[str], list_of_worker_results)
 
+    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
+        """Prepare the info for refit."""
+        self.llm.collective_rpc("prepare_refit_info", args=(state_dict_info,))
+
+    async def prepare_refit_info_async(self, state_dict_info: dict[str, Any]) -> None:
+        """Async version of prepare_refit_info."""
+        await self.llm.collective_rpc("prepare_refit_info", args=(state_dict_info,))
+
     def update_weights_from_ipc_handles(self, ipc_handles: dict[str, Any]) -> bool:
         """Update weights from IPC handles by delegating to the vLLM Worker implementation.
 
@@ -1132,7 +1140,7 @@ async def update_weights_from_ipc_handles_async(
             traceback.print_exc()
             return False
 
-    def update_weights_from_collective(self, info: dict[str, Any]) -> bool:
+    def update_weights_from_collective(self) -> bool:
         """Update the model weights from collective communication."""
         try:
             assert self.llm is not None, (
@@ -1145,7 +1153,7 @@ def update_weights_from_collective(self, info: dict[str, Any]) -> bool:
                 )
 
             result_or_coro = self.llm.collective_rpc(
-                "update_weights_from_collective", args=(info,)
+                "update_weights_from_collective", args=tuple()
             )
             worker_result = result_or_coro[0]
 
@@ -1162,7 +1170,7 @@ def update_weights_from_collective(self, info: dict[str, Any]) -> bool:
             traceback.print_exc()
             return False
 
-    async def update_weights_from_collective_async(self, info: dict[str, Any]) -> bool:
+    async def update_weights_from_collective_async(self) -> bool:
         """Async version of update_weights_from_collective."""
         try:
             assert self.llm is not None, (
@@ -1175,7 +1183,7 @@ async def update_weights_from_collective_async(self, info: dict[str, Any]) -> bo
                 )
 
             result_or_coro = await self.llm.collective_rpc(
-                "update_weights_from_collective", args=(info,)
+                "update_weights_from_collective", args=tuple()
             )
 
             if asyncio.iscoroutine(result_or_coro):
@@ -1908,7 +1916,26 @@ def shutdown(self) -> bool:
             print(f"Error during policy shutdown: {e}")
             return False
 
-    def update_weights(self, ipc_handles: dict[str, Any]) -> bool:
+    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
+        """Prepare the info for refit."""
+        # Choose the appropriate method based on async_engine setting
+        method_name = (
+            "prepare_refit_info_async"
+            if self.cfg["vllm_cfg"]["async_engine"]
+            else "prepare_refit_info"
+        )
+
+        # Use run_all_workers_single_data to send data to all workers
+        futures = self.worker_group.run_all_workers_single_data(
+            method_name,
+            state_dict_info=state_dict_info,
+            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+        )
+
+        # Wait for all futures to complete
+        ray.get(futures)
+
+    def update_weights_from_ipc_handles(self, ipc_handles: dict[str, Any]) -> bool:
         """Update weights of the policy using IPC handles, considering tensor parallelism.
 
         For tp > 1, only the leader in each tensor parallel tied worker group will update weights.
@@ -1952,9 +1979,7 @@ def update_weights(self, ipc_handles: dict[str, Any]) -> bool:
             print(f"Error during update weights: {e}")
             return False
 
-    def update_weights_from_collective(
-        self, info: dict[str, Any]
-    ) -> list[ray.ObjectRef]:
+    def update_weights_from_collective(self) -> list[ray.ObjectRef]:
         """Update weights of the policy using collective communication."""
         if not self.worker_group or not self.worker_group.workers:
             raise RuntimeError("Worker group is not initialized")
@@ -1966,10 +1991,9 @@ def update_weights_from_collective(
             else "update_weights_from_collective"
         )
 
-        # Use run_all_workers_single_data to send data to all workers
+        # Use run_all_workers_single_data for methods that don't need data
         futures = self.worker_group.run_all_workers_single_data(
             method_name,
-            info=info,
             run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
         )
 
diff --git a/nemo_rl/models/generation/vllm_backend.py b/nemo_rl/models/generation/vllm_backend.py
index 3d6ed0253c..fceea5b24f 100644
--- a/nemo_rl/models/generation/vllm_backend.py
+++ b/nemo_rl/models/generation/vllm_backend.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-from typing import Any
+from typing import Any, Optional
 
 import torch
 
@@ -51,6 +51,21 @@ def report_device_id(self) -> str:
 
         return get_device_uuid(self.device.index)
 
+    def prepare_refit_info(
+        self, state_dict_info: Optional[dict[str, Any]] = None
+    ) -> None:
+        """Prepare the info for refit.
+
+        DtensorPolicyWorker:
+            colocated inference: state_dict_info is None
+            non-colocated inference: state_dict_info is a dict of {tensor_name: (shape, dtype)}
+
+        MegatronPolicyWorker:
+            colocated inference: state_dict_info is a dict of {tensor_name: (shape, dtype, numel)}
+            non-colocated inference: not implemented yet
+        """
+        self.state_dict_info = state_dict_info
+
     def update_weights_from_global_ipc_handles(self, global_device_ipc_handles):
         """Update weights from global IPC handles.
 
@@ -84,6 +99,11 @@ def update_weights_from_local_ipc_handles(self, local_device_ipc_handles):
             weights = []
 
             if is_tensor_packed:
+                assert self.state_dict_info is not None, (
+                    "state_dict_info is not prepared. "
+                    "Please call prepare_refit_info when initializing the worker."
+                )
+
                 # Extract packed tensor from IPC handle
                 dtype_to_packed_tensor = {}
                 for dtype, tensor_handle in all_handles:
@@ -95,7 +115,17 @@ def update_weights_from_local_ipc_handles(self, local_device_ipc_handles):
 
                 # Unpack tensor to weights. Here we only return a view of the tensor to avoid
                 # using extra memory.
-                for key, (shape, dtype, offset, size) in tensor_metadata.items():
+                for key, metadata in tensor_metadata.items():
+                    # dtype for the 1st and 2nd steps may be different (e.g. e_score_correction_bias)
+                    if isinstance(metadata, tuple):
+                        # use dtype of current step
+                        offset, dtype = metadata
+                        shape, _, size = self.state_dict_info[key]
+                        # update record
+                        self.state_dict_info[key] = (shape, dtype, size)
+                    else:
+                        offset = metadata
+                        shape, dtype, size = self.state_dict_info[key]
                     tensor = dtype_to_packed_tensor[dtype][offset : offset + size].view(
                         *shape
                     )
@@ -118,10 +148,15 @@ def update_weights_from_local_ipc_handles(self, local_device_ipc_handles):
             )
             return False
 
-    def update_weights_from_collective(self, info: dict[str, Any]) -> bool:
+    def update_weights_from_collective(self) -> bool:
         """Update the model weights from collective communication."""
+        assert self.state_dict_info is not None, (
+            "state_dict_info is not prepared. "
+            "Please call prepare_refit_info when initializing the worker."
+        )
+
         try:
-            for name, (shape, dtype) in info.items():
+            for name, (shape, dtype) in self.state_dict_info.items():
                 weight = torch.empty(shape, dtype=dtype, device="cuda")
                 self.model_update_group.broadcast(weight, src=0)
                 self.model_runner.model.load_weights(weights=[(name, weight)])
diff --git a/nemo_rl/models/megatron/refit_utils.py b/nemo_rl/models/megatron/refit_utils.py
index fb46030ce9..f96c6b7537 100644
--- a/nemo_rl/models/megatron/refit_utils.py
+++ b/nemo_rl/models/megatron/refit_utils.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import re
 import time
-from typing import Dict, List
+from typing import Any, Dict, List, Tuple
 
 import torch
 from megatron.core import parallel_state
@@ -28,6 +28,9 @@
     RowParallelLinear,
     VocabParallelEmbedding,
 )
+from torch.distributed import get_process_group_ranks
+
+from nemo_rl.models.megatron.converters.common import get_global_key_from_local_key
 
 
 def get_tp_dim(model, param_name, named_modules_dict):
@@ -155,3 +158,175 @@ def gather_params(model, keys, key_to_global_keys: Dict[str, List[str]]):
 
     print(f"Time taken to gather params: {time.perf_counter() - st}")
     return gathered_params
+
+
+@torch.no_grad()
+def get_param_info(model, dtype):
+    # Get parallel info
+    tp_group = parallel_state.get_tensor_model_parallel_group()
+    tp_world_size = torch.distributed.get_world_size(tp_group)
+    tp_group_rank_ids = get_process_group_ranks(tp_group)
+
+    etp_group = parallel_state.get_expert_tensor_parallel_group()
+    etp_world_size = torch.distributed.get_world_size(etp_group)
+    etp_group_rank_ids = get_process_group_ranks(etp_group)
+
+    pp_group = parallel_state.get_pipeline_model_parallel_group()
+    pp_world_size = torch.distributed.get_world_size(pp_group)
+    pp_group_rank_ids = get_process_group_ranks(pp_group)
+    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
+
+    ep_group = parallel_state.get_expert_model_parallel_group()
+    ep_world_size = torch.distributed.get_world_size(ep_group)
+    ep_group_rank_ids = get_process_group_ranks(ep_group)
+
+    # Collect parameter info
+    param_info = []
+
+    # Dictionary of modules we can quickly look up to check if a module has TP
+    named_modules_dict = dict(model.named_modules())
+
+    # Process each parameter in the model
+    # state_dict includes parameters and persistent buffers
+    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
+    for name, param in model.state_dict().items():
+        # Skip _extra_state entries (these are metadata, not actual weights)
+        if "_extra_state" in name:
+            continue
+
+        use_etp = True if ep_pattern.search(name) else False
+        if use_etp:
+            tensor_mp_rank_ids = etp_group_rank_ids
+        else:
+            tensor_mp_rank_ids = tp_group_rank_ids
+
+        shape = list(param.shape)
+        tp_dim = get_tp_dim(model, name, named_modules_dict)
+        if tp_dim is not None:
+            tp_rank_ids = tuple(sorted(tensor_mp_rank_ids))
+            shape[tp_dim] *= len(tp_rank_ids)
+        else:
+            tp_rank_ids = (torch.distributed.get_rank(),)
+
+        pp_rank_ids = tuple(sorted(pp_group_rank_ids))
+        ep_rank_ids = tuple(sorted(ep_group_rank_ids))
+
+        if ep_pattern.search(name):
+            ep_rank_ids = tuple(sorted(ep_group_rank_ids))
+        else:
+            ep_rank_ids = (torch.distributed.get_rank(),)
+
+        # Calculate size for this parameter
+        prec_to_bytes = {
+            torch.bfloat16: 2,
+            torch.float16: 2,
+            torch.float32: 4,
+        }
+        scale = prec_to_bytes[dtype] / prec_to_bytes[param.dtype]
+        size_in_bytes = (
+            param.element_size()
+            * param.numel()
+            * len(tensor_mp_rank_ids)
+            * len(ep_rank_ids)
+            * scale
+        )
+        param_info.append(
+            (
+                (
+                    name,
+                    pp_local_rank_id,
+                    tuple(shape),
+                    param.dtype,
+                ),
+                size_in_bytes,
+            )
+        )
+    # Gather parameter info from all pipeline parallel ranks to ensure complete coverage
+    pp_group = parallel_state.get_pipeline_model_parallel_group()
+    pp_world_size = torch.distributed.get_world_size(pp_group)
+
+    # Gather all parameter info from all PP ranks
+    pp_gathered_param_infos = [None] * pp_world_size
+    torch.distributed.all_gather_object(
+        pp_gathered_param_infos, param_info, group=pp_group
+    )
+    pp_gathered_param_infos = [x for y in pp_gathered_param_infos for x in y]  # type: ignore
+
+    # Gather parameter info from all expert parallel ranks to ensure complete coverage
+    ep_group = parallel_state.get_expert_model_parallel_group()
+    ep_world_size = torch.distributed.get_world_size(ep_group)
+
+    # Gather all parameter info from all EP ranks
+    ep_gathered_param_infos = [None] * ep_world_size
+    torch.distributed.all_gather_object(
+        ep_gathered_param_infos, pp_gathered_param_infos, group=ep_group
+    )
+    all_param_infos = [x for y in ep_gathered_param_infos for x in y]
+
+    # Merge all parameter infos, keeping only unique parameter names
+    merged_param_info = []
+    seen_params = set()
+
+    for name, size in all_param_infos:
+        if name not in seen_params:
+            merged_param_info.append((name, size))
+            seen_params.add(name)
+
+    # Update param_info with the merged information
+    param_info = merged_param_info
+    print(f"Prepared {len(param_info)} tensors for refit")
+
+    return param_info
+
+
+@torch.no_grad()
+def get_local_key_to_global_keys(model, state_dict_info: List[Tuple[Any, int]]):
+    """Get the local key to global keys mapping."""
+    # Get parallel info
+    tp_group = parallel_state.get_tensor_model_parallel_group()
+    tp_world_size = torch.distributed.get_world_size(tp_group)
+
+    pp_group = parallel_state.get_pipeline_model_parallel_group()
+    pp_world_size = torch.distributed.get_world_size(pp_group)
+    pp_global_ranks = torch.distributed.get_process_group_ranks(group=pp_group)
+    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
+
+    ep_group = parallel_state.get_expert_model_parallel_group()
+    ep_world_size = torch.distributed.get_world_size(ep_group)
+
+    # start calculating the global key
+    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
+    state_dict = model.state_dict()
+    final_key_to_global_keys = {}
+
+    for param_info, size in state_dict_info:
+        local_key, owner_pp_local_rank_id, _, _ = param_info
+
+        # Step 1: create global key from local key
+        # if: for if a parameter is sharded along PP or EP;
+        # else: not sharded (like embedding)
+        pp_gathered_objs = [None]
+        if local_key in state_dict and owner_pp_local_rank_id == pp_local_rank_id:
+            pp_gathered_objs[0] = get_global_key_from_local_key(local_key, model.config)
+
+        # Step 2: gather global keys from ranks in PP group
+        src_global_rank = pp_global_ranks[owner_pp_local_rank_id]
+        torch.distributed.broadcast_object_list(
+            pp_gathered_objs, src=src_global_rank, group=pp_group
+        )
+
+        # Step 3: gather global keys from ranks in EP group
+        if ep_pattern.search(local_key):
+            ep_gathered_objs = [None] * ep_world_size
+            torch.distributed.all_gather_object(
+                ep_gathered_objs, pp_gathered_objs, group=ep_group
+            )
+            flat_gathered_objs = [x for y in ep_gathered_objs for x in y]
+        else:
+            flat_gathered_objs = pp_gathered_objs
+
+        final_key_to_global_keys[(local_key, owner_pp_local_rank_id)] = (
+            flat_gathered_objs
+        )
+
+    return final_key_to_global_keys
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index d93385ec5f..68115dd052 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -139,13 +139,13 @@ def __init__(
         init_reference_model: bool = True,
         **kwargs: Any,
     ):
+        self.is_generation_colocated = None
+        if "generation" in config and config["generation"] is not None:
+            self.is_generation_colocated = config["generation"]["colocated"]["enabled"]
+
         # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
         # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
-        if (
-            "generation" in config
-            and config["generation"] is not None
-            and not config["generation"]["colocated"]["enabled"]
-        ):
+        if not self.is_generation_colocated:
             os.environ["NCCL_CUMEM_ENABLE"] = "1"
 
         # Only enable expandable_segments on Hopper and newer architectures (compute capability 9.x+)
@@ -289,12 +289,6 @@ def __init__(
         if self.cpu_offload:
             self.model = self.move_to_device(self.model, "cpu")
 
-        # used for streaming update inference engine weights
-        self._held_sharded_state_dict_reference: Optional[dict[str, torch.Tensor]] = (
-            None
-        )
-        self._held_streamed_param_reference: Optional[dict[str, torch.Tensor]] = None
-
         if init_reference_model:
             self.reference_model_state_dict = get_cpu_state_dict(
                 self.model.state_dict().items(), pin_memory=True
@@ -350,6 +344,15 @@ def __init__(
                 "No weights path provided. Starting from scratch (default policy init)"
             )
 
+        # vars used for refit
+        ## will be initialized in prepare_refit_info
+        self.refit_param_info = None
+        ## used for streaming update inference engine weights
+        self._held_sharded_state_dict_reference: Optional[dict[str, torch.Tensor]] = (
+            None
+        )
+        self._held_streamed_param_reference: Optional[dict[str, torch.Tensor]] = None
+
     # Refer to nemo impl. Below is original comment.
     # based on https://github.com/pytorch/torchtitan/blob/main/torchtitan/distributed/utils.py#L113
     @staticmethod
@@ -902,6 +905,26 @@ def report_device_id(self) -> str:
         # Get device UUID using NVML
         return get_device_uuid(device_idx)
 
+    @torch.no_grad()
+    def prepare_refit_info(self) -> Optional[dict[str, Any]]:
+        state_dict = self.model.state_dict()
+
+        if self.is_generation_colocated:
+            # Collect info for streaming multiple tensors
+            self.refit_param_info = []
+            for name, tensor in state_dict.items():
+                # dtensor's numel will return complete tensor instead of only local tensor
+                size_in_bytes = tensor.element_size() * tensor.numel()
+                self.refit_param_info.append((name, size_in_bytes))
+
+        else:
+            # Collect info for collective communication
+            state_dict_info = {}
+            for name, tensor in state_dict.items():
+                state_dict_info[name] = (tensor.shape, self.dtype)
+
+            return state_dict_info
+
     @torch.no_grad()
     def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         """Prepare the weights for IPC.
@@ -922,22 +945,16 @@ def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
             self.model.state_dict()
         )
 
-        # Collect info for streaming multiple tensors
-        state_dict_info = []
-        for name, tensor in self._held_sharded_state_dict_reference.items():
-            # dtensor's numel will return complete tensor instead of only local tensor
-            size_in_bytes = tensor.element_size() * tensor.numel()
-            state_dict_info.append((name, size_in_bytes))
-
         # Collect current available memory for refit
         ## Get current device index from torch
         device_idx = torch.cuda.current_device()
         ## Get device free memory using NVML
         total_available_bytes = get_free_memory_bytes(device_idx)
         ## Use 80% of the free memory for safety
-        total_available_bytes *= 0.8
+        memory_ratio = os.getenv("NRL_REFIT_BUFFER_MEMORY_RATIO", "0.8")
+        total_available_bytes *= float(memory_ratio)
 
-        return state_dict_info, total_available_bytes
+        return self.refit_param_info, total_available_bytes
 
     @torch.no_grad()
     def get_weights_ipc_handles(self, keys: Iterable[str]) -> dict[str, Any]:
@@ -980,24 +997,6 @@ def get_weights_ipc_handles(self, keys: Iterable[str]) -> dict[str, Any]:
 
         return {device_uuid: serialized}
 
-    @torch.no_grad()
-    def prepare_info_for_collective(self) -> dict[str, Any]:
-        """Prepare the info for collective communication.
-
-        Returns:
-            dict: A dictionary containing the info for collective communication.
-        """
-        # Get state_dict
-        self.model = self.move_to_cuda(self.model)
-        state_dict = self.model.state_dict()
-
-        # Collect info for collective communication
-        state_dict_info = {}
-        for name, tensor in state_dict.items():
-            state_dict_info[name] = (tensor.shape, self.dtype)
-
-        return state_dict_info
-
     @torch.no_grad()
     def broadcast_weights_for_collective(self) -> None:
         """Broadcast the weights for collective communication."""
diff --git a/nemo_rl/models/policy/interfaces.py b/nemo_rl/models/policy/interfaces.py
index 614340c67b..d63b66e735 100644
--- a/nemo_rl/models/policy/interfaces.py
+++ b/nemo_rl/models/policy/interfaces.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from abc import ABC, abstractmethod
-from typing import Any, TypedDict
+from typing import Any, Optional, TypedDict
 
 import ray
 import torch
@@ -109,15 +109,15 @@ def offload_after_refit(self) -> None:
         pass
 
     @abstractmethod
-    def prepare_weights_for_ipc(self, *args: Any, **kwargs: Any) -> list[list[str]]:
+    def prepare_refit_info(self) -> Optional[dict[str, Any]]:
         pass
 
     @abstractmethod
-    def get_weights_ipc_handles(self, keys: list[str]) -> dict[str, Any]:
+    def prepare_weights_for_ipc(self, *args: Any, **kwargs: Any) -> list[list[str]]:
         pass
 
     @abstractmethod
-    def prepare_info_for_collective(self) -> dict[str, Any]:
+    def get_weights_ipc_handles(self, keys: list[str]) -> dict[str, Any]:
         pass
 
     @abstractmethod
diff --git a/nemo_rl/models/policy/lm_policy.py b/nemo_rl/models/policy/lm_policy.py
index 5e82b61d72..c77f2460e7 100644
--- a/nemo_rl/models/policy/lm_policy.py
+++ b/nemo_rl/models/policy/lm_policy.py
@@ -405,6 +405,17 @@ def finish_training(self, *args: Any, **kwargs: Any) -> None:
         # Placeholder implementation
         pass
 
+    def prepare_refit_info(self) -> Optional[dict[str, Any]]:
+        """Prepare the info for refit.
+
+        Returns:
+            dict: A dictionary containing the info for refit.
+        """
+        futures = self.worker_group.run_all_workers_single_data("prepare_refit_info")
+        results = ray.get(futures)
+        # Only get the first worker's info since all workers will have the same result
+        return results[0]
+
     def prepare_weights_for_ipc(
         self, _refit_buffer_size_gb: Optional[int] = None
     ) -> list[list[str]]:
@@ -469,19 +480,6 @@ def get_weights_ipc_handles(self, keys: list[str]) -> dict[str, Any]:
 
         return all_handles
 
-    def prepare_info_for_collective(self) -> dict[str, Any]:
-        """Prepare the info for collective communication.
-
-        Returns:
-            dict: A dictionary containing the info for collective communication.
-        """
-        futures = self.worker_group.run_all_workers_single_data(
-            "prepare_info_for_collective"
-        )
-        results = ray.get(futures)
-        # Only get the first worker's info since all workers will have the same result
-        return results[0]
-
     def broadcast_weights_for_collective(self) -> list[ray.ObjectRef]:
         """Broadcast the weights for collective communication."""
         futures = self.worker_group.run_all_workers_single_data(
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 7daa6de019..944f8d4740 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -13,13 +13,12 @@
 # limitations under the License.
 import gc
 import os
-import re
 import time
 import warnings
 from collections import defaultdict
 from contextlib import AbstractContextManager, contextmanager, nullcontext
 from functools import partial
-from typing import Any, Iterator, List, Optional, Tuple, TypeVar
+from typing import Any, Iterator, Optional, TypeVar
 
 import ray
 import torch
@@ -83,7 +82,6 @@
     reduce_max_stat_across_model_parallel_group,
 )
 from ray.util.queue import Queue
-from torch.distributed import get_process_group_ranks
 from transformers import PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
@@ -100,13 +98,11 @@
     forward_step_arbitrary_loss,
 )
 from nemo_rl.models.megatron.community_import import import_model_from_hf_name
-from nemo_rl.models.megatron.converters.common import (
-    MegatronToHFConverter,
-    get_global_key_from_local_key,
-)
+from nemo_rl.models.megatron.converters.common import MegatronToHFConverter
 from nemo_rl.models.megatron.refit_utils import (
     gather_params,
-    get_tp_dim,
+    get_local_key_to_global_keys,
+    get_param_info,
 )
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.interfaces import (
@@ -681,14 +677,8 @@ def __init__(
         )
         self.final_padded_vocab_size = tokenizer_config.padded_vocab_size
         self.dp_size = worker_sharding_annotations.get_axis_size("data_parallel")
-        self._held_gather_buffer = None
         self.megatron_to_hf_converter = MegatronToHFConverter(hf_model_name, self.model)
 
-        # Create a map that maps any local parameter name to a list of global parameter names.
-        # This map is repeatedly used by parameter gatherring phase during refit of every step.
-        self.local_key_to_global_keys = self.get_local_key_to_global_keys(
-            state_dict_info=self.prepare_weights_for_ipc()[0]
-        )
         self.should_disable_forward_pre_hook = (
             self.cfg["megatron_cfg"]["optimizer"]["use_distributed_optimizer"]
             and self.cfg["megatron_cfg"]["distributed_data_parallel_config"][
@@ -696,6 +686,13 @@ def __init__(
             ]
         )
 
+        # vars used for refit
+        ## will be initialized in prepare_refit_info
+        self.refit_param_info_hf = None
+        self.local_key_to_global_keys = None
+        ## used for streaming update inference engine weights
+        self._held_gather_buffer = None
+
     def is_alive(self):
         return True
 
@@ -1263,59 +1260,43 @@ def report_device_id(self) -> str:
         return get_device_uuid(device_idx)
 
     @torch.no_grad()
-    def get_local_key_to_global_keys(self, state_dict_info: List[Tuple[Any, int]]):
-        """Get the local key to global keys mapping."""
-        # Get parallel info
-        tp_group = parallel_state.get_tensor_model_parallel_group()
-        tp_world_size = torch.distributed.get_world_size(tp_group)
-
-        pp_group = parallel_state.get_pipeline_model_parallel_group()
-        pp_world_size = torch.distributed.get_world_size(pp_group)
-        pp_global_ranks = torch.distributed.get_process_group_ranks(group=pp_group)
-        pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
-
-        ep_group = parallel_state.get_expert_model_parallel_group()
-        ep_world_size = torch.distributed.get_world_size(ep_group)
-
-        # start calculating the global key
-        ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
-        state_dict = self.model.state_dict()
-        final_key_to_global_keys = {}
-
-        for param_info, size in state_dict_info:
-            local_key, owner_pp_local_rank_id, _, _ = param_info
-
-            # Step 1: create global key from local key
-            # if: for if a parameter is sharded along PP or EP;
-            # else: not sharded (like embedding)
-            pp_gathered_objs = [None]
-            if local_key in state_dict and owner_pp_local_rank_id == pp_local_rank_id:
-                pp_gathered_objs[0] = get_global_key_from_local_key(
-                    local_key, self.model.config
-                )
-
-            # Step 2: gather global keys from ranks in PP group
-            src_global_rank = pp_global_ranks[owner_pp_local_rank_id]
-            torch.distributed.broadcast_object_list(
-                pp_gathered_objs, src=src_global_rank, group=pp_group
-            )
+    def prepare_refit_info(self) -> None:
+        # Get parameter info for refit
+        ## param_info: list of ((name, shape, dtype), size_in_bytes) tuples
+        # Cannot cache refit_param_info_mcore since dtype and size_in_bytes for the 1st and 2nd steps may be different
+        ## e.g. e_score_correction_bias
+        refit_param_info_mcore = get_param_info(self.model, self.dtype)
 
-            # Step 3: gather global keys from ranks in EP group
-            if ep_pattern.search(local_key):
-                ep_gathered_objs = [None] * ep_world_size
-                torch.distributed.all_gather_object(
-                    ep_gathered_objs, pp_gathered_objs, group=ep_group
-                )
-                flat_gathered_objs = [x for y in ep_gathered_objs for x in y]
-            else:
-                flat_gathered_objs = pp_gathered_objs
+        # Create a map that maps any local parameter name to a list of global parameter names.
+        # This map is repeatedly used by parameter gatherring phase during refit of every step.
+        self.local_key_to_global_keys = get_local_key_to_global_keys(
+            self.model, state_dict_info=refit_param_info_mcore
+        )
 
-            final_key_to_global_keys[(local_key, owner_pp_local_rank_id)] = (
-                flat_gathered_objs
+        # Collect tensor metadata for refit
+        self.refit_param_info_hf = {}
+        for key, _ in refit_param_info_mcore:
+            # gather megatron params
+            gathered_megatron_params = gather_params(
+                self.model,
+                [key],
+                key_to_global_keys=self.local_key_to_global_keys,
             )
+            # convert to hf params
+            gathered_hf_params = self.megatron_to_hf_converter.convert(
+                gathered_megatron_params, self.model.config
+            )
+            # collect tensor metadata
+            for name, tensor in gathered_hf_params.items():
+                self.refit_param_info_hf[name] = (
+                    tensor.shape,
+                    tensor.dtype,
+                    tensor.numel(),
+                )
 
-        return final_key_to_global_keys
+        return self.refit_param_info_hf
 
+    @torch.no_grad()
     def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         """Prepare Megatron model weights for IPC transfer to vLLM.
 
@@ -1324,139 +1305,25 @@ def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         """
         from nemo_rl.utils.nvml import get_free_memory_bytes
 
-        no_grad = torch.no_grad()
-        no_grad.__enter__()
-        # Ensure model is in evaluation mode
-        self.model.eval()
-
-        # Get parallel info
-        tp_group = parallel_state.get_tensor_model_parallel_group()
-        tp_world_size = torch.distributed.get_world_size(tp_group)
-        tp_group_rank_ids = get_process_group_ranks(tp_group)
-
-        etp_group = parallel_state.get_expert_tensor_parallel_group()
-        etp_world_size = torch.distributed.get_world_size(etp_group)
-        etp_group_rank_ids = get_process_group_ranks(etp_group)
-
-        pp_group = parallel_state.get_pipeline_model_parallel_group()
-        pp_world_size = torch.distributed.get_world_size(pp_group)
-        pp_group_rank_ids = get_process_group_ranks(pp_group)
-        pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
-
-        ep_group = parallel_state.get_expert_model_parallel_group()
-        ep_world_size = torch.distributed.get_world_size(ep_group)
-        ep_group_rank_ids = get_process_group_ranks(ep_group)
-
-        # Collect parameter info
-        param_info = []
-
-        # Dictionary of modules we can quickly look up to check if a module has TP
-        named_modules_dict = dict(self.model.named_modules())
-
-        # Process each parameter in the model
-        # state_dict includes parameters and persistent buffers
-        ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
-        for name, param in self.model.state_dict().items():
-            # Skip _extra_state entries (these are metadata, not actual weights)
-            if "_extra_state" in name:
-                continue
-
-            use_etp = True if ep_pattern.search(name) else False
-            if use_etp:
-                tensor_mp_rank_ids = etp_group_rank_ids
-            else:
-                tensor_mp_rank_ids = tp_group_rank_ids
-
-            shape = list(param.shape)
-            tp_dim = get_tp_dim(self.model, name, named_modules_dict)
-            if tp_dim is not None:
-                tp_rank_ids = tuple(sorted(tensor_mp_rank_ids))
-                shape[tp_dim] *= len(tp_rank_ids)
-            else:
-                tp_rank_ids = (torch.distributed.get_rank(),)
-
-            pp_rank_ids = tuple(sorted(pp_group_rank_ids))
-            ep_rank_ids = tuple(sorted(ep_group_rank_ids))
-
-            if ep_pattern.search(name):
-                ep_rank_ids = tuple(sorted(ep_group_rank_ids))
-            else:
-                ep_rank_ids = (torch.distributed.get_rank(),)
-
-            # Calculate size for this parameter
-            prec_to_bytes = {
-                torch.bfloat16: 2,
-                torch.float16: 2,
-                torch.float32: 4,
-            }
-            scale = prec_to_bytes[self.dtype] / prec_to_bytes[param.dtype]
-            size_in_bytes = (
-                param.element_size()
-                * param.numel()
-                * len(tensor_mp_rank_ids)
-                * len(ep_rank_ids)
-                * scale
-            )
-            param_info.append(
-                (
-                    (
-                        name,
-                        pp_local_rank_id,
-                        tuple(shape),
-                        param.dtype,
-                    ),
-                    size_in_bytes,
-                )
-            )
-        # Gather parameter info from all pipeline parallel ranks to ensure complete coverage
-        pp_group = parallel_state.get_pipeline_model_parallel_group()
-        pp_world_size = torch.distributed.get_world_size(pp_group)
-
-        # Gather all parameter info from all PP ranks
-        pp_gathered_param_infos = [None] * pp_world_size
-        torch.distributed.all_gather_object(
-            pp_gathered_param_infos, param_info, group=pp_group
-        )
-        pp_gathered_param_infos = [x for y in pp_gathered_param_infos for x in y]  # type: ignore
-
-        # Gather parameter info from all expert parallel ranks to ensure complete coverage
-        ep_group = parallel_state.get_expert_model_parallel_group()
-        ep_world_size = torch.distributed.get_world_size(ep_group)
-
-        # Gather all parameter info from all EP ranks
-        ep_gathered_param_infos = [None] * ep_world_size
-        torch.distributed.all_gather_object(
-            ep_gathered_param_infos, pp_gathered_param_infos, group=ep_group
-        )
-        all_param_infos = [x for y in ep_gathered_param_infos for x in y]
-
-        # Merge all parameter infos, keeping only unique parameter names
-        merged_param_info = []
-        seen_params = set()
-
-        for name, size in all_param_infos:
-            if name not in seen_params:
-                merged_param_info.append((name, size))
-                seen_params.add(name)
-
-        # Update param_info with the merged information
-        param_info = merged_param_info
-
-        print(f"Prepared {len(param_info)} tensors for IPC transfer")
-        no_grad.__exit__(None, None, None)
+        # Get parameter info for refit
+        ## param_info: list of ((name, shape, dtype), size_in_bytes) tuples
+        # Cannot cache refit_param_info_mcore since dtype and size_in_bytes for the 1st and 2nd steps may be different
+        ## e.g. e_score_correction_bias
+        refit_param_info_mcore = get_param_info(self.model, self.dtype)
 
         # Collect current available memory for refit
         ## Get current device index from torch
         device_idx = torch.cuda.current_device()
         ## Get device free memory using NVML
         total_available_bytes = get_free_memory_bytes(device_idx)
-        # TODO: setting to low value (10%) since
-        # more buckets seems to have better perf
-        total_available_bytes *= 0.1
+        ## default to 20% to get some more speedup than 10%, OOM if set to 30%
+        memory_ratio = os.getenv("NRL_REFIT_BUFFER_MEMORY_RATIO", "0.2")
+        total_available_bytes *= float(memory_ratio)
 
-        return param_info, total_available_bytes
+        return refit_param_info_mcore, total_available_bytes
 
     # Temporary fix, 'keys' is a kwarg due to some sort of ray bug
+    @torch.no_grad()
     def get_weights_ipc_handles(self, *, keys: list[str]) -> dict[str, Any]:
         """Get IPC handles for the requested Megatron model weights.
 
@@ -1498,14 +1365,23 @@ def get_weights_ipc_handles(self, *, keys: list[str]) -> dict[str, Any]:
             type_to_total_size = defaultdict(lambda: 0)
             tensor_metadata = dict()
 
+            # Record offset of the tensor
             for key, tensor in gathered_hf_params.items():
-                tensor_metadata[key] = (
-                    tensor.shape,  # shape of the tensor
-                    tensor.dtype,  # dtype of the tensor
-                    type_to_total_size[tensor.dtype],  # offset of the tensor
-                    # in packed buffer
-                    tensor.numel(),  # size of the tensor
-                )
+                # dtype for the 1st and 2nd steps may be different (e.g. e_score_correction_bias)
+                if tensor.dtype == self.refit_param_info_hf[key][1]:
+                    tensor_metadata[key] = type_to_total_size[tensor.dtype]
+                else:
+                    # also send dtype if it changes
+                    tensor_metadata[key] = (
+                        type_to_total_size[tensor.dtype],
+                        tensor.dtype,
+                    )
+                    # update record
+                    self.refit_param_info_hf[key] = (
+                        tensor.shape,
+                        tensor.dtype,
+                        tensor.numel(),
+                    )
                 type_to_total_size[tensor.dtype] += tensor.numel()
 
             # Allocate consolidated tensors for each dtype
@@ -1521,8 +1397,11 @@ def get_weights_ipc_handles(self, *, keys: list[str]) -> dict[str, Any]:
 
             # Copy tensors into consolidated buffers
             for key, tensor in gathered_hf_params.items():
-                metadata = tensor_metadata[key]
-                _, dtype, offset, size = metadata
+                offset = tensor_metadata[key]
+                if isinstance(offset, tuple):
+                    offset, _ = offset
+                dtype = tensor.dtype
+                size = tensor.numel()
                 packed_tensors[dtype][offset : offset + size].copy_(
                     tensor.detach().view(-1)
                 )
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 626fce2b6f..8d6cff05f8 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -41,7 +41,7 @@
         "name": model_name,
     },
     "dtype": "bfloat16",
-    "max_new_tokens": 5,
+    "max_new_tokens": 5,  # Small number of tokens for testing
     "temperature": 0.8,
     "top_p": 1.0,
     "top_k": None,
@@ -133,15 +133,6 @@ def get_basic_megatron_test_config(
         "learning_rate": 5e-6,
         "logprob_batch_size": 2,
         "precision": precision,
-        "generation": {
-            "backend": "megatron",
-            "temperature": 1.0,
-            "max_new_tokens": 16,  # Small number of tokens for testing
-            "top_p": 1.0,
-            "top_k": None,
-            "stop_token_ids": None,
-            "stop_strings": None,
-        },
         "dtensor_cfg": {
             "enabled": False,  # Disabled for Megatron tests
         },
@@ -202,6 +193,7 @@ def get_basic_megatron_test_config(
         "optimizer": None,  # Remove default FSDP optimizer
         "scheduler": None,  # Remove default scheduler
         "max_grad_norm": 1.0,
+        "generation": deepcopy(basic_vllm_test_config),
     }
 
 
@@ -426,11 +418,18 @@ async def test_vllm_policy_generation_async(
         dtensor_config = basic_dtensor_test_config
         from nemo_rl.models.policy.lm_policy import Policy
 
+        print("creating vllm policy...")
         async_policy = VllmGeneration(cluster, vllm_config)
         async_policy.finish_generation()
-        print("creating hf policy...")
 
+        print("creating lm policy...")
         lm_policy = Policy(cluster, dtensor_config, tokenizer)
+
+        print("preparing refit info...")
+        state_dict_info = lm_policy.prepare_refit_info()
+        async_policy.prepare_refit_info(state_dict_info)
+
+        print("refitting vllm policy...")
         refit_policy_generation(
             lm_policy, async_policy, vllm_config["colocated"]["enabled"]
         )
@@ -528,6 +527,9 @@ def test_vllm_worker_seed_behavior(cluster, tokenizer):
     dtensor_config = basic_dtensor_test_config
     lm_policy = Policy(cluster, dtensor_config, tokenizer)
 
+    state_dict_info = lm_policy.prepare_refit_info()
+    policy.prepare_refit_info(state_dict_info)
+
     print("refitting vllm policy...")
     refit_policy_generation(lm_policy, policy, vllm_config["colocated"]["enabled"])
 
@@ -682,6 +684,10 @@ async def test_vllm_generation_with_hf_training(cluster, tokenizer, async_engine
         print("Creating DTensor policy...")
         lm_policy = Policy(cluster, dtensor_config, tokenizer)
 
+        print("preparing refit info...")
+        state_dict_info = lm_policy.prepare_refit_info()
+        vllm_policy.prepare_refit_info(state_dict_info)
+
         print("refitting vllm policy...")
         refit_policy_generation(
             lm_policy, vllm_policy, vllm_config["colocated"]["enabled"]
@@ -930,9 +936,14 @@ def test_vllm_weight_update_and_prefix_cache_reset(
     try:
         print(f"Creating DTensor policy for TP={tensor_parallel_size}...")
         lm_policy = Policy(cluster, dtensor_config, tokenizer)
+
         print(f"Creating vLLM policy for TP={tensor_parallel_size}...")
         vllm_policy = VllmGeneration(cluster, vllm_config)
 
+        print("preparing refit info...")
+        state_dict_info = lm_policy.prepare_refit_info()
+        vllm_policy.prepare_refit_info(state_dict_info)
+
         # Prepare input data (batch size 2)
         text = """Answer the question based on the context below. Keep the answer short and concise. Respond "Unsure about answer" if not sure about the answer. Context: Teplizumab traces its roots to a New Jersey drug company called Ortho Pharmaceutical. There, scientists generated an early version of the antibody, dubbed OKT3. Originally sourced from mice, the molecule was able to bind to the surface of T cells and limit their cell-killing potential. In 1986, it was approved to help prevent organ rejection after kidney transplants, making it the first therapeutic antibody allowed for human use.Question: What was OKT3 originally sourced from?Answer:"""
         test_prompt = [text, text]  # Use batch size 2
@@ -970,7 +981,7 @@ def test_vllm_weight_update_and_prefix_cache_reset(
         grouped_param_keys = lm_policy.prepare_weights_for_ipc()
         for keys in grouped_param_keys:
             ipc_handles = lm_policy.get_weights_ipc_handles(keys)
-            update_success = vllm_policy.update_weights(ipc_handles)
+            update_success = vllm_policy.update_weights_from_ipc_handles(ipc_handles)
             assert update_success, "Weight update should succeed"
         print("vLLM weights successfully updated.")
 
@@ -1035,6 +1046,10 @@ def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor):
     dtensor_config = basic_dtensor_test_config
     lm_policy = Policy(cluster, dtensor_config, tokenizer)
 
+    print("preparing refit info...")
+    state_dict_info = lm_policy.prepare_refit_info()
+    vllm_policy.prepare_refit_info(state_dict_info)
+
     print("refitting vllm policy...")
     # take it outside statistics to get clean peak memory during refit
     lm_policy.offload_before_refit()
@@ -1112,6 +1127,10 @@ def test_vllm_generation_with_stop(
         dtensor_config = basic_dtensor_test_config
         lm_policy = Policy(cluster, dtensor_config, tokenizer)
 
+        print("preparing refit info...")
+        state_dict_info = lm_policy.prepare_refit_info()
+        vllm_generation.prepare_refit_info(state_dict_info)
+
         print("refitting vllm policy...")
         refit_policy_generation(
             lm_policy, vllm_generation, vllm_config["colocated"]["enabled"]
@@ -1219,6 +1238,10 @@ async def test_vllm_refit_non_collocated_update_weights(
     futures_inference = vllm_generation.init_collective(ip, port, world_size=2)
     ray.get(futures_train + futures_inference)
 
+    # prepare refit info
+    state_dict_info = lm_policy.prepare_refit_info()
+    vllm_generation.prepare_refit_info(state_dict_info)
+
     print("refitting vllm policy...")
     refit_policy_generation(
         lm_policy, vllm_generation, vllm_config["colocated"]["enabled"]
@@ -1317,6 +1340,10 @@ def test_vllm_generation_with_megatron_training(
         print("Creating Megatron policy...")
         megatron_policy = Policy(cluster, megatron_config, test_tokenizer)
 
+        print("preparing refit info...")
+        state_dict_info = megatron_policy.prepare_refit_info()
+        vllm_policy.prepare_refit_info(state_dict_info)
+
         print("Refitting vLLM policy with Megatron weights...")
         refit_policy_generation(
             megatron_policy, vllm_policy, vllm_config["colocated"]["enabled"]
@@ -1436,6 +1463,10 @@ def test_vllm_megatron_weight_update_memory(cluster, tokenizer):
     print("Creating Megatron policy...")
     megatron_policy = Policy(cluster, megatron_config, test_tokenizer)
 
+    print("preparing refit info...")
+    state_dict_info = megatron_policy.prepare_refit_info()
+    vllm_policy.prepare_refit_info(state_dict_info)
+
     print("Refitting vLLM policy with Megatron...")
     # Take it outside statistics to get clean peak memory during refit
     megatron_policy.offload_before_refit()
@@ -1539,6 +1570,10 @@ def test_vllm_megatron_pipeline_parallel(cluster, tokenizer):
         vllm_policy = VllmGeneration(cluster, vllm_config)
         vllm_policy.finish_generation()
 
+        print("preparing refit info...")
+        state_dict_info = megatron_policy.prepare_refit_info()
+        vllm_policy.prepare_refit_info(state_dict_info)
+
         print("Refitting vLLM with Megatron PP=2 weights...")
         refit_policy_generation(
             megatron_policy, vllm_policy, vllm_config["colocated"]["enabled"]
@@ -1569,3 +1604,58 @@ def test_vllm_megatron_pipeline_parallel(cluster, tokenizer):
             vllm_policy.shutdown()
         if megatron_policy:
             megatron_policy.shutdown()
+
+
+def test_vllm_megatron_weight_update_with_packing(cluster, test_input_data):
+    megatron_policy = None
+    vllm_generation = None
+
+    try:
+        # Enable packing during test
+        os.environ["NEMO_RL_MEGATRON_IPC_TENSOR_PACKING_THRESHOLD"] = "1"
+
+        # Both policies must use the same model (Qwen2.5-0.5B) for weight transfer compatibility
+        model_name = "Qwen/Qwen2.5-0.5B"
+        tokenizer = get_tokenizer({"name": model_name})
+
+        # Create Policy
+        megatron_config = get_basic_megatron_test_config(
+            tp=1, pp=1, precision="float32"
+        )
+        megatron_config["model_name"] = model_name
+        megatron_config["tokenizer"]["name"] = model_name
+        megatron_policy = Policy(cluster, megatron_config, tokenizer)
+
+        # Create VllmGeneration
+        vllm_config = deepcopy(basic_vllm_test_config)
+        vllm_config = configure_generation_config(vllm_config, tokenizer, is_eval=True)
+        vllm_config["model_name"] = model_name
+        vllm_config["tokenizer"]["name"] = model_name
+        vllm_generation = VllmGeneration(cluster, vllm_config)
+
+        # prepare refit info
+        state_dict_info = megatron_policy.prepare_refit_info()
+        vllm_generation.prepare_refit_info(state_dict_info)
+
+        print("refitting vllm policy...")
+        refit_policy_generation(
+            megatron_policy, vllm_generation, vllm_config["colocated"]["enabled"]
+        )
+
+        # test generate
+        outputs = vllm_generation.generate(test_input_data, greedy=True)
+        output_ids = outputs["output_ids"]
+        generated_texts = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+        assert generated_texts == [
+            "Hello, my name is John. I am a",
+            "The capital of France is Paris. It is the",
+        ], "Output should be the same as the expected output"
+
+    finally:
+        # Restore the original value
+        os.environ.pop("NEMO_RL_MEGATRON_IPC_TENSOR_PACKING_THRESHOLD", None)
+        # Clean up
+        if megatron_policy:
+            megatron_policy.shutdown()
+        if vllm_generation:
+            vllm_generation.shutdown()
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index a23c1b5559..ee1d422a3b 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -60,6 +60,13 @@ def create_megatron_test_config(
             "top_k": None,
             "stop_token_ids": None,
             "stop_strings": None,
+            "colocated": {
+                "enabled": True,
+                "resources": {
+                    "gpus_per_node": None,
+                    "num_nodes": None,
+                },
+            },
         },
         "dtensor_cfg": {
             "enabled": False,  # Disabled for Megatron tests

From 6b9710012ad4240782af68c6ad73f604c636302e Mon Sep 17 00:00:00 2001
From: Xuehan Xiong <xxman@google.com>
Date: Wed, 16 Jul 2025 17:49:15 -0700
Subject: [PATCH 44/59] docs: update converter path in README. (#672)

Signed-off-by: Xuehan <xxman@google.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 3cd472c6ad..2110f255de 100644
--- a/README.md
+++ b/README.md
@@ -351,7 +351,7 @@ If you have trained a model and saved the checkpoint in the Pytorch DCP format,
 
 ```sh
 # Example for a GRPO checkpoint at step 170
-uv run python examples/convert_dcp_to_hf.py \
+uv run python examples/converters/convert_dcp_to_hf.py \
     --config results/grpo/step_170/config.yaml \
     --dcp-ckpt-path results/grpo/step_170/policy/weights/ \
     --hf-ckpt-path results/grpo/hf

From c8115f93fba1d92acb8f515c15c5a42d9a8969d3 Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Wed, 16 Jul 2025 20:45:17 -0700
Subject: [PATCH 45/59] fix: make mcore lr scheduler configuration consistent
 with dtensor (#681)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Anna Shors <ashors@nvidia.com>
Co-authored-by: Parth Chadha <pchadha@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/grpo_math_1B_megatron.yaml         |  2 +-
 examples/configs/grpo_math_70B_megatron.yaml        |  2 +-
 examples/configs/grpo_math_qwen30ba3b_megatron.yaml |  2 +-
 nemo_rl/models/policy/megatron_policy_worker.py     | 11 ++++++-----
 tests/unit/models/policy/test_megatron_worker.py    |  4 ++--
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index 7a8a651a54..600dbfc41c 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -116,7 +116,7 @@ policy:
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
       lr_decay_iters: null
-      lr_warmup_iters: 50
+      lr_warmup_iters: 13
       lr_warmup_init: 5.0e-7
 
     distributed_data_parallel_config:
diff --git a/examples/configs/grpo_math_70B_megatron.yaml b/examples/configs/grpo_math_70B_megatron.yaml
index a7ba2c8a52..1aaad35659 100644
--- a/examples/configs/grpo_math_70B_megatron.yaml
+++ b/examples/configs/grpo_math_70B_megatron.yaml
@@ -49,7 +49,7 @@ policy:
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
       lr_decay_iters: null
-      lr_warmup_iters: 50
+      lr_warmup_iters: 13
       lr_warmup_init: 3.0e-8
 
   generation:
diff --git a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
index 915babbf5c..8ebd93e7a1 100644
--- a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
+++ b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
@@ -52,7 +52,7 @@ policy:
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
       lr_decay_iters: null
-      lr_warmup_iters: 50
+      lr_warmup_iters: 13
       lr_warmup_init: 3.0e-8
     
     env_vars:
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 944f8d4740..a6cfe9083a 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -854,13 +854,8 @@ def train(
                     num_zeros_in_grad
                 )
 
-                # Update learning rate.
                 if update_successful:
-                    increment = total_dataset_size.item()
-                    self.scheduler.step(increment=increment)
                     skipped_iter = 0
-                    curr_lr = self.scheduler.get_lr(self.optimizer.param_groups[0])
-                    curr_wd = self.scheduler.get_wd()
                 else:
                     skipped_iter = 1
 
@@ -877,6 +872,8 @@ def train(
                         for k in x.keys():
                             loss_metrics[k] = x[k] / num_global_batches
                         gb_loss_metrics.append(loss_metrics)
+                        curr_lr = self.scheduler.get_lr(self.optimizer.param_groups[0])
+                        curr_wd = self.scheduler.get_wd()
                         loss_metrics["lr"] = curr_lr
                         loss_metrics["wd"] = curr_wd
                         loss_metrics["grad_norm"] = grad_norm
@@ -902,6 +899,10 @@ def train(
                 all_mb_metrics.extend(gb_loss_metrics)
                 losses.append(torch.tensor(mb_losses).sum().item())
 
+        if not eval_mode:
+            # take one LR step every rollout batch
+            self.scheduler.step(increment=1)
+
         # Aggregate metrics across all microbatches
         mb_metrics = defaultdict(list)
         for m in all_mb_metrics:
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index ee1d422a3b..ea1c70f9b3 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -784,8 +784,8 @@ def test_megatron_reference_policy_functionality():
     )
 
     config = create_megatron_test_config()
-    config["megatron_cfg"]["optimizer"]["lr"] = 1e-3  # Increase from 5e-6 to 1e-3
-    config["megatron_cfg"]["optimizer"]["min_lr"] = 1e-4  # Increase min_lr as well
+    config["megatron_cfg"]["optimizer"]["lr"] = 1e-2  # Increase from 5e-6 to 1e-2
+    config["megatron_cfg"]["optimizer"]["min_lr"] = 1e-3  # Increase min_lr as well
 
     tokenizer = get_tokenizer(config["tokenizer"])
     config["generation"] = configure_generation_config(config["generation"], tokenizer)

From 88a9429039b8f24f259597d707e82a9d01e54765 Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Thu, 17 Jul 2025 12:04:33 -0700
Subject: [PATCH 46/59] fix: fix mcore LR increment (#685)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/models/policy/megatron_policy_worker.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index a6cfe9083a..0bf4e71477 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -901,7 +901,9 @@ def train(
 
         if not eval_mode:
             # take one LR step every rollout batch
-            self.scheduler.step(increment=1)
+            # we need to scale the step by gbs to counteract the fact that NeMo automatically
+            # scales lr_warmup_steps by gbs during init
+            self.scheduler.step(increment=gbs)
 
         # Aggregate metrics across all microbatches
         mb_metrics = defaultdict(list)

From a0df2efb90d45b2a07eae44e1815a2eb28234d74 Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Fri, 18 Jul 2025 09:48:35 -0700
Subject: [PATCH 47/59] fix: upgrade datasets to fix squad download (#692)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 pyproject.toml | 2 +-
 uv.lock        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index b9b0ad1191..945ee9acda 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ dependencies = [
     "transformers>=4.51.0",
     "wandb",
     "numpy",
-    "datasets>=3.6.0",
+    "datasets>=4.0.0",
     "rich",
     "math-verify",
     "accelerate>=0.26",
diff --git a/uv.lock b/uv.lock
index fdce004e44..cfda06c29d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -734,7 +734,7 @@ wheels = [
 
 [[package]]
 name = "datasets"
-version = "3.6.0"
+version = "4.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "dill" },
@@ -751,9 +751,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1a/89/d3d6fef58a488f8569c82fd293ab7cbd4250244d67f425dcae64c63800ea/datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041", size = 569336, upload-time = "2025-05-07T15:15:02.659Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e3/9d/348ed92110ba5f9b70b51ca1078d4809767a835aa2b7ce7e74ad2b98323d/datasets-4.0.0.tar.gz", hash = "sha256:9657e7140a9050db13443ba21cb5de185af8af944479b00e7ff1e00a61c8dbf1", size = 569566, upload-time = "2025-07-09T14:35:52.431Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/34/a08b0ee99715eaba118cbe19a71f7b5e2425c2718ef96007c325944a1152/datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b", size = 491546, upload-time = "2025-05-07T15:14:59.742Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/62/eb8157afb21bd229c864521c1ab4fa8e9b4f1b06bafdd8c4668a7a31b5dd/datasets-4.0.0-py3-none-any.whl", hash = "sha256:7ef95e62025fd122882dbce6cb904c8cd3fbc829de6669a5eb939c77d50e203d", size = 494825, upload-time = "2025-07-09T14:35:50.658Z" },
 ]
 
 [[package]]
@@ -2381,7 +2381,7 @@ requires-dist = [
     { name = "accelerate", specifier = ">=0.26" },
     { name = "blobfile" },
     { name = "colored", specifier = "==2.2.3" },
-    { name = "datasets", specifier = ">=3.6.0" },
+    { name = "datasets", specifier = ">=4.0.0" },
     { name = "debugpy" },
     { name = "flash-attn", marker = "extra == 'automodel'", specifier = "==2.7.4.post1" },
     { name = "flash-attn", marker = "extra == 'mcore'", specifier = "==2.7.4.post1" },

From 22a984a386a96e22e0a64343744fb1c69443ae5f Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Fri, 18 Jul 2025 09:49:42 -0700
Subject: [PATCH 48/59] fix: Megatron config updates to avoid OOM (#687)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/grpo_math_70B_megatron.yaml        | 2 +-
 examples/configs/grpo_math_8B_megatron.yaml         | 2 +-
 examples/configs/grpo_math_qwen30ba3b_megatron.yaml | 9 +++++----
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/examples/configs/grpo_math_70B_megatron.yaml b/examples/configs/grpo_math_70B_megatron.yaml
index 1aaad35659..1317e45a04 100644
--- a/examples/configs/grpo_math_70B_megatron.yaml
+++ b/examples/configs/grpo_math_70B_megatron.yaml
@@ -62,7 +62,7 @@ policy:
     stop_strings: null
     vllm_cfg:
       tensor_parallel_size: 4
-      gpu_memory_utilization: 0.8
+      gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
 
 cluster:
diff --git a/examples/configs/grpo_math_8B_megatron.yaml b/examples/configs/grpo_math_8B_megatron.yaml
index fc839c8239..62ec41359c 100644
--- a/examples/configs/grpo_math_8B_megatron.yaml
+++ b/examples/configs/grpo_math_8B_megatron.yaml
@@ -67,7 +67,7 @@ policy:
     stop_strings: null
     vllm_cfg:
       tensor_parallel_size: 1
-      gpu_memory_utilization: 0.8
+      gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
 
 cluster:
diff --git a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
index 8ebd93e7a1..84d6736cec 100644
--- a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
+++ b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
@@ -29,11 +29,11 @@ policy:
     enabled: true
     empty_unused_memory_level: 1
     converter_type: "LlamaForCausalLM"
-    tensor_model_parallel_size: 4
-    pipeline_model_parallel_size: 4
+    tensor_model_parallel_size: 2
+    pipeline_model_parallel_size: 1
     context_parallel_size: 1
     expert_tensor_parallel_size: 1
-    expert_model_parallel_size: 4
+    expert_model_parallel_size: 8
     sequence_parallel: True
     pipeline_dtype: ${policy.precision}
 
@@ -68,7 +68,8 @@ policy:
     stop_strings: null
     vllm_cfg:
       tensor_parallel_size: 4
-      gpu_memory_utilization: 0.8
+      gpu_memory_utilization: 0.7
+      enforce_eager: false
       max_model_len: ${policy.max_total_sequence_length}
 
 cluster:

From 00e33a995d5a69eb0433be08a123ed0bcbfce6e9 Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Fri, 18 Jul 2025 11:55:37 -0700
Subject: [PATCH 49/59] fix: fix lr scheduler for config that was missed in
 #681 (#693)

Signed-off-by: ashors1 <ashors@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/grpo_math_8B_megatron.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/configs/grpo_math_8B_megatron.yaml b/examples/configs/grpo_math_8B_megatron.yaml
index 62ec41359c..ef0e932b0c 100644
--- a/examples/configs/grpo_math_8B_megatron.yaml
+++ b/examples/configs/grpo_math_8B_megatron.yaml
@@ -54,7 +54,7 @@ policy:
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
       lr_decay_iters: null
-      lr_warmup_iters: 50
+      lr_warmup_iters: 13
       lr_warmup_init: 3.0e-8
 
   generation:

From c4b51513fb9c14212f2a83e87130576cc7bcc4f8 Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Fri, 18 Jul 2025 17:12:33 -0700
Subject: [PATCH 50/59] fix: Fix gemma models broken by HF update (#676)

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/models/dtensor/parallelize.py         |  4 +-
 nemo_rl/models/generation/vllm_backend.py     | 45 +++++++++++-
 tests/unit/models/dtensor/test_parallelize.py | 68 +++++++++++++++++++
 3 files changed, 114 insertions(+), 3 deletions(-)
 create mode 100644 tests/unit/models/dtensor/test_parallelize.py

diff --git a/nemo_rl/models/dtensor/parallelize.py b/nemo_rl/models/dtensor/parallelize.py
index 8004d1fbff..f668834f19 100644
--- a/nemo_rl/models/dtensor/parallelize.py
+++ b/nemo_rl/models/dtensor/parallelize.py
@@ -92,7 +92,7 @@ def _parallelize_gemma3(
     Tensor parallelism is not supported for Gemma3 models because of tied word embeddings.
     """
     if isinstance(model, Gemma3ForConditionalGeneration):
-        model_prefix = "language_model"
+        model_prefix = "model.language_model"
     else:
         model_prefix = "model"
 
@@ -127,7 +127,7 @@ def _parallelize_gemma3(
         ),
         f"{model_prefix}.layers.*.post_feedforward_layernorm": SequenceParallel(),
         f"{model_prefix}.norm": SequenceParallel(),
-        f"{model_prefix}.lm_head": PrepareModuleInput(
+        "lm_head": PrepareModuleInput(
             input_layouts=(Shard(1),),
             desired_input_layouts=(Replicate(),),
             use_local_output=True,
diff --git a/nemo_rl/models/generation/vllm_backend.py b/nemo_rl/models/generation/vllm_backend.py
index fceea5b24f..57c5f7dc1d 100644
--- a/nemo_rl/models/generation/vllm_backend.py
+++ b/nemo_rl/models/generation/vllm_backend.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
-from typing import Any, Optional
+from typing import Any, Iterable, Optional
 
 import torch
 
@@ -26,6 +26,49 @@
     )
 
 
+def _patch_gemma3_mm():
+    """Patch gemma3_mm.py to support new HF multimodal format (post transformers v4.52).
+
+    Patch taken from:https://github.com/vllm-project/vllm/pull/19151/files#diff-5890909300e4e6c3160444e4587ec3fd80498bb83f598b22ce81337f75992b06
+    """
+    from packaging.version import Version as PkgVersion
+
+    assert PkgVersion(vllm.__version__) < PkgVersion("0.9.2"), (
+        f"You are using vllm version {vllm.__version__}. "
+        "Please remove this patch (_patch_gemma3_mm in nemo_rl/models/generation/vllm_backend.py) "
+        "since it is included in vllm>=0.9.2."
+    )
+
+    from vllm.logger import init_logger
+    from vllm.model_executor.models import gemma3_mm
+    from vllm.model_executor.models.utils import (
+        AutoWeightsLoader,
+        WeightsMapper,
+    )
+
+    logger = init_logger("gemma3_mm_patch")
+
+    gemma3_mm.Gemma3ForConditionalGeneration.hf_to_vllm_mapper = WeightsMapper(
+        orig_to_new_prefix={
+            # mapping for new names in checkpoint saved after transformers v4.52
+            "model.language_model.": "language_model.model.",
+            "model.vision_tower.": "vision_tower.",
+            "model.multi_modal_projector.": "multi_modal_projector.",
+            "lm_head.": "language_model.lm_head.",
+        }
+    )
+
+    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
+        loader = AutoWeightsLoader(self)
+        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
+
+    gemma3_mm.Gemma3ForConditionalGeneration.load_weights = load_weights
+    logger.info("Successfully patched gemma3_mm.py in vllm_backend.")
+
+
+_patch_gemma3_mm()
+
+
 class VllmInternalWorkerExtension:
     def init_collective(
         self, rank_prefix: int, ip: str, port: int, world_size: int
diff --git a/tests/unit/models/dtensor/test_parallelize.py b/tests/unit/models/dtensor/test_parallelize.py
new file mode 100644
index 0000000000..5acb7addc4
--- /dev/null
+++ b/tests/unit/models/dtensor/test_parallelize.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from itertools import product
+from unittest.mock import MagicMock
+
+import pytest
+from torch.distributed.tensor.parallel import ParallelStyle, parallelize_module
+from transformers import AutoModelForCausalLM
+
+from nemo_rl.models.dtensor.parallelize import (
+    _parallelize_gemma3,
+    _parallelize_llama,
+    _parallelize_qwen,
+)
+
+
+@pytest.mark.parametrize(
+    "model_name, parallelize_func, sequence_parallel",
+    [
+        (model_name, parallelize_func, sp)
+        for (model_name, parallelize_func), sp in product(
+            [
+                ("google/gemma-3-1b-it", _parallelize_gemma3),
+                ("google/gemma-3-4b-it", _parallelize_gemma3),
+                # ("Qwen/Qwen2.5-1.5B", _parallelize_qwen), # TODO: qwen2 doesn't have q_norm and k_norm, which will cause this test to fail
+                ("Qwen/Qwen3-0.6B", _parallelize_qwen),
+                ("meta-llama/Llama-3.2-1B-Instruct", _parallelize_llama),
+            ],
+            [True, False],
+        )
+    ],
+)
+def test_parallelize_plan_keys(model_name, parallelize_func, sequence_parallel):
+    """Tests that the keys in the parallelization plans are valid by mocking parallel styles."""
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    parallel_plan = parallelize_func(model, sequence_parallel=sequence_parallel)
+
+    applied_keys = set()
+
+    class MockParallelStyle(ParallelStyle):
+        def __init__(self, key, collector):
+            self.key = key
+            self.collector = collector
+
+        def _apply(self, module, device_mesh):
+            self.collector.add(self.key)
+
+    mock_plan = {key: MockParallelStyle(key, applied_keys) for key in parallel_plan}
+    dummy_device_mesh = MagicMock()
+    dummy_device_mesh.ndim = 1
+
+    parallelize_module(model, dummy_device_mesh, mock_plan)
+
+    assert set(parallel_plan.keys()) == applied_keys, (
+        f"Missing keys: {set(parallel_plan.keys()) - applied_keys}"
+    )

From 6a22c8aa17e4352c86ef67037d68c1df0e5533d7 Mon Sep 17 00:00:00 2001
From: yuki <48991475+yuki-666@users.noreply.github.com>
Date: Sat, 19 Jul 2025 13:19:47 +0800
Subject: [PATCH 51/59] chore: add CP+SP (sequence parallel) assertion in
 DTensor worker (#689)

Signed-off-by: Yuki Huang <yukih@nvidia.com>
Co-authored-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 docs/model-quirks.md                          |  5 +++-
 .../models/policy/dtensor_policy_worker.py    | 17 +++++++++++--
 .../unit/models/policy/test_dtensor_worker.py | 25 ++++++++++++-------
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/docs/model-quirks.md b/docs/model-quirks.md
index 7824e8bf78..ec37048469 100644
--- a/docs/model-quirks.md
+++ b/docs/model-quirks.md
@@ -31,9 +31,12 @@ NeMo-RL uses the vLLM V1 runtime for both synchronous and asynchronous inference
 
 ### Context Parallel with FSDP2
 
-NeMo-RL implemented this feature based on torch CP [implementation](https://github.com/pytorch/pytorch/blob/main/torch/distributed/tensor/experimental/_attention.py). And we inherit its limitations.
+- NeMo-RL implemented this feature based on torch CP [implementation](https://github.com/pytorch/pytorch/blob/main/torch/distributed/tensor/experimental/_attention.py). And we inherit its limitations.
 Whether model level support CP only depends on arguments passed to `torch.nn.functional.scaled_dot_product_attention`. Current NeMo-RL passed all ones attention mask to `model.forward`. For Gemma-3, it won't ignore attention mask as result `attn_bias` is not None which is not supported by torch CP. Please see [assertion](https://github.com/pytorch/pytorch/blob/134179474539648ba7dee1317959529fbd0e7f89/torch/distributed/tensor/experimental/_attention.py#L262) .
 
+- It's a known issue that context parallel can't be used together with sequence parallel.
+Refer to [here](https://github.com/NVIDIA-NeMo/RL/issues/659) for more details.
+
 ## vLLM Async Rollout Timeout
 
 vLLM async generation has a configurable timeout for waiting for individual sample results. This is particularly important for longer sequences on large models.
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index 68115dd052..f501c978c5 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -217,13 +217,26 @@ def __init__(
         tp_size = self.cfg["dtensor_cfg"]["tensor_parallel_size"]
         cp_size = self.cfg["dtensor_cfg"]["context_parallel_size"]
         dp_size = world_size // tp_size // cp_size
+        sequence_parallel_enabled = self.cfg["dtensor_cfg"]["sequence_parallel"]
         assert world_size == dp_size * tp_size * cp_size, (
             f"World size({world_size}) must equal to dp_size({dp_size}) * tp_size({tp_size}) * cp_size({cp_size}) to use DTensor"
         )
 
+        if sequence_parallel_enabled and tp_size == 1:
+            print(
+                "[WARNING]: sequence_parallel=True, but tp_size=1 which has no effect. Enable tp_size > 1 to use sequence parallelism."
+            )
+
         if cp_size > 1:
             assert not isinstance(self.model, Gemma3ForCausalLM), (
-                "Context parallel is not supported for Gemma3ForCausalLM. Torch context parallel has many limitations. Please refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
+                "Context parallel is not supported for Gemma3ForCausalLM. Torch context parallel has many limitations. "
+                "Please refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
+            )
+
+            assert not (tp_size > 1 and sequence_parallel_enabled), (
+                "It's a known issue that context parallel can't be used together with sequence parallel in DTensor worker. "
+                "Please either set cp_size = 1 or disable sequence parallel. "
+                "See https://github.com/NVIDIA-NeMo/RL/issues/659 for more details."
             )
 
         device_mesh = torch.distributed.device_mesh.init_device_mesh(
@@ -247,7 +260,7 @@ def __init__(
             self.dp_cp_mesh,
             self.tp_mesh,
             param_dtype=self.dtype,
-            sequence_parallel=self.cfg["dtensor_cfg"]["sequence_parallel"],
+            sequence_parallel=sequence_parallel_enabled,
             cpu_offload=self.cpu_offload,
             activation_checkpointing=self.cfg["dtensor_cfg"][
                 "activation_checkpointing"
diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py
index e208873353..fcd0977117 100644
--- a/tests/unit/models/policy/test_dtensor_worker.py
+++ b/tests/unit/models/policy/test_dtensor_worker.py
@@ -42,7 +42,7 @@ def create_test_config(
     sequence_parallel: bool = False,
     cpu_offload: bool = False,
     activation_checkpointing: bool = False,
-    custom_parallel_plan: str = None,
+    custom_parallel_plan: str | None = None,
 ) -> PolicyConfig:
     return {
         "model_name": model_name,
@@ -237,7 +237,7 @@ def test_lm_policy_init(policy_setup):
 @pytest.fixture
 def training_setup(request, two_gpu_virtual_cluster):
     """Setup and teardown specifically for training tests."""
-    model_name, tp, cp, cpu_offload, sequence_parallel, activation_checkpointing = (
+    model_name, tp, cp, sequence_parallel, cpu_offload, activation_checkpointing = (
         request.param
     )
     policy = None
@@ -246,7 +246,7 @@ def training_setup(request, two_gpu_virtual_cluster):
 
     try:
         config = create_test_config(
-            model_name, tp, cp, cpu_offload, sequence_parallel, activation_checkpointing
+            model_name, tp, cp, sequence_parallel, cpu_offload, activation_checkpointing
         )
         tokenizer = get_tokenizer(config["tokenizer"])
         print(
@@ -300,8 +300,7 @@ def training_setup(request, two_gpu_virtual_cluster):
 @pytest.mark.parametrize(
     "training_setup",
     [
-        # model_name, tp, cp, cpu_offload, sequence_parallel, activation_checkpointing
-        # Split grid over tp/cp/cpu/sp/act across qwen and llama
+        # model_name                        tp cp  sp     cpu    act
         (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 1, False, False, False),
         (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 1, True, False, False),
         (TEST_ASSETS.TINY_LLAMA_MODEL_PATH, 1, 1, False, True, False),
@@ -317,7 +316,14 @@ def training_setup(request, two_gpu_virtual_cluster):
         (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 1, False, True, True),
         (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 1, True, True, True),
         (TEST_ASSETS.TINY_QWEN3_MODEL_PATH, 1, 2, False, False, False),
-        (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 1, 1, True, True, False),
+        (
+            TEST_ASSETS.TINY_GEMMA3_MODEL_PATH,
+            1,
+            1,
+            True,
+            True,
+            False,
+        ),  # gemma3 doesn't support spda
         (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 1, 1, True, False, True),
         (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 1, 1, False, True, True),
         (TEST_ASSETS.TINY_GEMMA3_MODEL_PATH, 1, 1, True, True, True),
@@ -363,7 +369,7 @@ def verify_loss_tensor(loss_tensor):
 @pytest.fixture
 def logprob_setup(request, two_gpu_virtual_cluster):
     """Setup and teardown specifically for training tests."""
-    model_name, tp, cp, cpu_offload, sequence_parallel, activation_checkpointing = (
+    model_name, tp, cp, sequence_parallel, cpu_offload, activation_checkpointing = (
         request.param
     )
     policy = None
@@ -371,7 +377,7 @@ def logprob_setup(request, two_gpu_virtual_cluster):
 
     try:
         config = create_test_config(
-            model_name, tp, cp, cpu_offload, sequence_parallel, activation_checkpointing
+            model_name, tp, cp, sequence_parallel, cpu_offload, activation_checkpointing
         )
         tokenizer = get_tokenizer(config["tokenizer"])
         print(
@@ -494,8 +500,9 @@ def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(two_gpu_virtual_clu
     config = create_test_config(
         model_name=TEST_ASSETS.TINY_LLAMA_TIED_MODEL_PATH,
         tp=2,
-        cpu_offload=False,
+        cp=1,
         sequence_parallel=False,
+        cpu_offload=False,
         activation_checkpointing=False,
         custom_parallel_plan=custom_parallel_plan,
     )

From 75a5a6d318c80addbcbc19b4db960bab1d701d6c Mon Sep 17 00:00:00 2001
From: Terry Kong <terrycurtiskong@gmail.com>
Date: Mon, 21 Jul 2025 10:15:39 -0700
Subject: [PATCH 52/59] feat: MLFlow Integration for experiment tracking (#697)

Signed-off-by: Naveenraj Kamalakannan <therealnaveenkamal@gmail.com>
Signed-off-by: Terry Kong <terryk@nvidia.com>
Co-authored-by: Naveenraj Kamalakannan <therealnaveenkamal@gmail.com>
---
 docs/design-docs/logger.md                    |  73 ++++-
 examples/configs/dpo.yaml                     |   4 +
 examples/configs/grpo-deepscaler-1.5b-8K.yaml |   4 +
 examples/configs/grpo_math_1B.yaml            |   4 +
 examples/configs/grpo_math_1B_megatron.yaml   |   4 +
 examples/configs/grpo_sliding_puzzle.yaml     |   4 +
 ...llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml |   1 +
 ....1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml |   1 +
 ...po-llama3.1-8b-instruct-4n8g-megatron.yaml |   1 +
 ...8b-instruct-4n8g-megatrontp2pp2-quick.yaml |   1 +
 ...llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml |   1 +
 .../llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml  |   1 +
 ...-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml |   1 +
 ...3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml |   1 +
 ...llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml |   1 +
 ...-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml |   1 +
 ...en2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml |   1 +
 ...wen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml |   1 +
 ...5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml |   1 +
 ...3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml |   1 +
 ...ama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml |   1 +
 ...ft-llama3.1-8b-instruct-1n8g-megatron.yaml |   1 +
 .../llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml |   3 +-
 ...wen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml |   1 +
 examples/configs/sft.yaml                     |   4 +
 examples/configs/sft_openmathinstruct2.yaml   |   4 +
 nemo_rl/utils/logger.py                       | 120 +++++++-
 pyproject.toml                                |   3 +-
 tests/unit/utils/test_logger.py               | 282 ++++++++++++++++++
 uv.lock                                       | 222 ++++++++++++++
 30 files changed, 731 insertions(+), 17 deletions(-)

diff --git a/docs/design-docs/logger.md b/docs/design-docs/logger.md
index b13436423b..1c45529540 100644
--- a/docs/design-docs/logger.md
+++ b/docs/design-docs/logger.md
@@ -1,6 +1,6 @@
 # Logger
 
-The logger is designed to track key training metrics (including distributed metrics with reductions and timing), as well as providing integration with logging backends like WandB and Tensorboard.
+The logger is designed to track key training metrics (including distributed metrics with reductions and timing), as well as providing integration with logging backends like WandB, Tensorboard, and MLflow.
 
 ## Requirements
 
@@ -9,12 +9,13 @@ The logger is designed to track key training metrics (including distributed metr
 * Logging:
    * WandB
    * Tensorboard
+   * MLflow
 
 ## Overall Design
 
 Since there is a single controller, the single process running the main training loop will gather the metrics and do the logging.
 
-To handle multiple logger backends, we will have a {py:class}`LoggerInterface <nemo_rl.utils.logger.LoggerInterface>` interface that the {py:class}`TensorboardLogger <nemo_rl.utils.logger.TensorboardLogger>` and {py:class}`WandbLogger <nemo_rl.utils.logger.WandbLogger>` will implement:
+To handle multiple logger backends, we will have a {py:class}`LoggerInterface <nemo_rl.utils.logger.LoggerInterface>` interface that the {py:class}`TensorboardLogger <nemo_rl.utils.logger.TensorboardLogger>`, {py:class}`WandbLogger <nemo_rl.utils.logger.WandbLogger>`, and {py:class}`MLflowLogger <nemo_rl.utils.logger.MLflowLogger>` will implement:
 
 ```python
 class LoggerInterface(ABC):
@@ -34,10 +35,11 @@ class LoggerInterface(ABC):
 A {py:class}`Logger <nemo_rl.utils.logger.Logger>` wrapper class will also implement {py:class}`LoggerInterface <nemo_rl.utils.logger.LoggerInterface>` and maintain a list of loggers to which it delegates writing logs. This will be the main class the user uses in the training loop. Usage example:
 
 ```python
-# Initialize logger with both wandb and tensorboard enabled
+# Initialize logger with wandb, tensorboard, and mlflow enabled
 logging_config = {
     "wandb_enabled": True,
     "tensorboard_enabled": False,
+    "mlflow_enabled": True,
 
     "wandb": {
         "project": "grpo-dev",
@@ -46,17 +48,72 @@ logging_config = {
     "tensorboard": {
         "log_dir": "logs",
     },
+    "mlflow": {
+        "experiment_name": "nemo-rl-experiment",
+        "run_name": "grpo-dev-run",
+        "tracking_uri": None,  # Use local tracking
+    },
 }
 logger = Logger(
     cfg=logger_config,
 )
 
-# Log metrics, will go to both wandb and tensorboard
+# Log metrics, will go to all enabled backends
 logger.log_metrics({
     "loss": 0.123,
 }, step=10)
 ```
 
+## Supported Logging Backends
+
+The logger supports three main logging backends:
+
+### WandB (Weights & Biases)
+- Provides cloud-based experiment tracking
+- Supports custom step metrics for better visualization
+- Includes built-in hyperparameter logging
+- Offers rich visualization and collaboration features
+
+### Tensorboard
+- Local file-based logging
+- Standard TensorBoard visualization
+- Supports hyperparameter logging via HParams
+- Lightweight and self-contained
+
+### MLflow
+- Comprehensive platform for experiment tracking and model management
+- Supports both local and remote tracking servers
+- Provides model versioning and artifact management
+- Includes a web UI for experiment visualization
+- Supports model deployment and serving
+
+#### MLflow Configuration
+
+MLflow can be configured with the following parameters:
+
+```python
+mlflow:
+  experiment_name: "nemo-rl-experiment"  # Name of the MLflow experiment
+  run_name: "my-training-run"            # Run name
+  tracking_uri: "http://localhost:5000"  # Optional tracking server URI
+```
+
+
+#### MLflow UI
+
+After starting training with MLflow enabled, you can view the MLflow UI to monitor your experiments:
+
+```bash
+# Start MLflow UI (run in a separate terminal)
+mlflow ui --host 0.0.0.0 --port 5000
+```
+
+Then access the UI at `http://127.0.0.1:5000/` to view:
+- Training runs and experiments
+- Metrics (loss, validation metrics, etc.)
+- Hyperparameters
+- Model artifacts and checkpoints
+
 ## Validation Pretty Logging
 
 The logger supports pretty-formatted logging of validation samples to help visualize model outputs during training. This feature is controlled by the `num_val_samples_to_print` configuration parameter.
@@ -65,6 +122,7 @@ The logger supports pretty-formatted logging of validation samples to help visua
 logger:
   wandb_enabled: false
   tensorboard_enabled: false
+  mlflow_enabled: false
   num_val_samples_to_print: 10
 ```
 
@@ -82,9 +140,9 @@ When enabled, the pretty logging will generate formatted text similar to:
 
 ## GPU Metric Logging
 
-NeMo RL monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, NeMo RL directly polls GPU memory and utilization data and logs them to TensorBoard and/or WandB.
+NeMo RL monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, NeMo RL directly polls GPU memory and utilization data and logs them to TensorBoard, WandB, and/or MLflow.
 
-This approach allows us to offer the same GPU metric tracking on all loggers (not just Wandb) and simplifies the implementation greatly.
+This approach allows us to offer the same GPU metric tracking on all loggers and simplifies the implementation greatly.
 
 This feature is enabled with the `monitor_gpus` configuration parameter. The frequency of data collection and flushing to the loggers is controlled by the `gpu_collection_interval` and `gpu_flush_interval` parameters, both specified in seconds.
 
@@ -92,6 +150,7 @@ This feature is enabled with the `monitor_gpus` configuration parameter. The fre
 logger:
   wandb_enabled: false
   tensorboard_enabled: false
+  mlflow_enabled: false
   monitor_gpus: true
   gpu_monitoring:
     collection_interval: 10
@@ -103,7 +162,7 @@ While it is feasible to monitor using remote workers, the implementation require
 * Logs sent back to the driver do not introduce significant overhead.
 * Metrics remain clear and interpretable, avoiding issues like double counting caused by colocated workers.
 * Workers can gracefully flush their logs in case of failure.
-* Logging behaves consistently across TensorBoard and Wandb.
+* Logging behaves consistently across TensorBoard, WandB, and MLflow.
 * Workers that spawn other workers accurately report the total resource usage of any grandchild workers.
 
 Due to these complexities, we opted for a simpler approach: collecting metrics exposed by the Ray metrics server from the driver.
diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
index 110729a966..8a5f5dcf00 100755
--- a/examples/configs/dpo.yaml
+++ b/examples/configs/dpo.yaml
@@ -153,12 +153,16 @@ logger:
   log_dir: "logs"  # Base directory for all logs
   wandb_enabled: false # Make sure you do a ``wandb login [Your API key]'' before running
   tensorboard_enabled: false
+  mlflow_enabled: false  # Disable MLflow logging
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "dpo-dev"
     name: "dpo"
   tensorboard:
     log_dir: "tb_logs-dpo-dev"
+  mlflow:
+    experiment_name: "dpo-dev"
+    run_name: "dpo"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/grpo-deepscaler-1.5b-8K.yaml b/examples/configs/grpo-deepscaler-1.5b-8K.yaml
index ce5ed73c17..9efc308a0a 100644
--- a/examples/configs/grpo-deepscaler-1.5b-8K.yaml
+++ b/examples/configs/grpo-deepscaler-1.5b-8K.yaml
@@ -127,11 +127,15 @@ logger:
   num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
   wandb_enabled: false
   tensorboard_enabled: false
+  mlflow_enabled: false
   monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "grpo-dev"
     name: "grpo-dev-logger"
   tensorboard: {}
+  mlflow:
+    experiment_name: "grpo-dev"
+    run_name: "grpo-dev-logger"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
index fd944fa9e7..c003eea2e1 100644
--- a/examples/configs/grpo_math_1B.yaml
+++ b/examples/configs/grpo_math_1B.yaml
@@ -132,11 +132,15 @@ logger:
   num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
   wandb_enabled: false
   tensorboard_enabled: false
+  mlflow_enabled: false  # Disable MLflow logging
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "grpo-dev"
     name: "grpo-dev-logger"
   tensorboard: {}
+  mlflow:
+    experiment_name: "grpo-dev"
+    run_name: "grpo-dev-logger"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index 600dbfc41c..79a579e278 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -153,11 +153,15 @@ logger:
   num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
   wandb_enabled: false
   tensorboard_enabled: false
+  mlflow_enabled: false  # Disable MLflow logging
   monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "grpo-dev"
     name: "sj_megatron_1B"
   tensorboard: {}
+  mlflow:
+    experiment_name: "grpo-dev"
+    run_name: "sj_megatron_1B"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/grpo_sliding_puzzle.yaml b/examples/configs/grpo_sliding_puzzle.yaml
index 8493bfc40e..aeb6b48da4 100644
--- a/examples/configs/grpo_sliding_puzzle.yaml
+++ b/examples/configs/grpo_sliding_puzzle.yaml
@@ -52,11 +52,15 @@ logger:
   num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
   wandb_enabled: false
   tensorboard_enabled: false
+  mlflow_enabled: false
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "grpo-dev"
     name: "grpo-dev-sliding_puzzle"
   tensorboard: {}
+  mlflow:
+    experiment_name: "grpo-dev"
+    run_name: "grpo-dev-sliding_puzzle"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
index 368aafd705..8655dede0a 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
@@ -78,6 +78,7 @@ logger:
   log_dir: "logs"
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
index 91e2fb9569..e8e8f472c0 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
@@ -78,6 +78,7 @@ logger:
   log_dir: "logs"
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
index 6139ce6788..3dbb98006e 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
@@ -111,6 +111,7 @@ logger:
   log_dir: "logs"
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
index 733cce01da..082520095e 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
@@ -111,6 +111,7 @@ logger:
   log_dir: "logs"
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
index c12edd2404..afe19bf4ea 100644
--- a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
@@ -78,6 +78,7 @@ logger:
   log_dir: "logs"
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
index 6bbcd95edd..a2c61ebce9 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
@@ -108,6 +108,7 @@ logger:
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
index af4bb6945d..0fe72a150d 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
@@ -109,6 +109,7 @@ logger:
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
index b854eb7d38..2ad3228001 100644
--- a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
@@ -109,6 +109,7 @@ logger:
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
index 9f92be089b..3caf0ccdbd 100644
--- a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -109,6 +109,7 @@ logger:
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
index 15665e9af8..ae6426e305 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
@@ -109,6 +109,7 @@ logger:
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
index 851ed41ef1..e4449ae147 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
@@ -109,6 +109,7 @@ logger:
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
index 00a40de4d0..585a8f5d88 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
@@ -109,6 +109,7 @@ logger:
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
index d3bbc266f2..78bfeee82d 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -109,6 +109,7 @@ logger:
   num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
index 264d8d028d..a35a13533e 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
@@ -58,6 +58,7 @@ logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
index a1d77244c0..608edace8d 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
@@ -58,6 +58,7 @@ logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
index 585ea7cafa..4fdfb5d37b 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
@@ -102,6 +102,7 @@ logger:
   log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp1
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
index b958faeb66..8dac5cb980 100644
--- a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
@@ -50,7 +50,7 @@ policy:
       fused: false
 data:
   max_input_seq_length: 1024
-  dataset_name: squad
+  dataset_name: squad 
   add_bos: true
   add_eos: true
   add_generation_prompt: false
@@ -58,6 +58,7 @@ logger:
   log_dir: logs/sft-llama3.2-1b-1n8g-fsdp2tp1
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
index bb2e0e1572..bf38f37eb7 100644
--- a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
+++ b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
@@ -58,6 +58,7 @@ logger:
   log_dir: logs/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
   wandb_enabled: true
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true
   wandb:
     project: nemo-rl
diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
index 3839d455e2..b14c6304dd 100644
--- a/examples/configs/sft.yaml
+++ b/examples/configs/sft.yaml
@@ -133,12 +133,16 @@ logger:
   log_dir: "logs"  # Base directory for all logs
   wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "sft-dev"
     name: "sft-dev-${data.dataset_name}"
   tensorboard:
     log_dir: "tb_logs-sft-dev-${data.dataset_name}"
+  mlflow:
+    experiment_name: "sft-dev"
+    run_name: "sft-dev-${data.dataset_name}"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/examples/configs/sft_openmathinstruct2.yaml b/examples/configs/sft_openmathinstruct2.yaml
index 2040bdd5ff..de9fab880a 100644
--- a/examples/configs/sft_openmathinstruct2.yaml
+++ b/examples/configs/sft_openmathinstruct2.yaml
@@ -71,12 +71,16 @@ logger:
   log_dir: "logs"  # Base directory for all logs
   wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
   tensorboard_enabled: true
+  mlflow_enabled: false
   monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "sft-dev"
     name: "openmathinstruct-nemorl-1M_train"
   tensorboard:
     log_dir: "tb_logs-openmathinstruct-nemorl-1M_train"
+  mlflow:
+    experiment_name: "sft-dev"
+    run_name: "openmathinstruct-nemorl-1M_train"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
     flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py
index c039ecd939..8339da80e0 100644
--- a/nemo_rl/utils/logger.py
+++ b/nemo_rl/utils/logger.py
@@ -19,11 +19,13 @@
 import os
 import re
 import subprocess
+import tempfile
 import threading
 import time
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Mapping, Optional, TypedDict
+from typing import Any, Callable, Mapping, NotRequired, Optional, TypedDict
 
+import mlflow
 import ray
 import requests
 import torch
@@ -36,7 +38,6 @@
 from rich.logging import RichHandler
 from rich.panel import Panel
 from torch.utils.tensorboard import SummaryWriter
-from typing_extensions import NotRequired
 
 from nemo_rl.data.interfaces import LLMMessageLogType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
@@ -54,6 +55,12 @@ class TensorboardConfig(TypedDict):
     log_dir: NotRequired[str]
 
 
+class MLflowConfig(TypedDict):
+    experiment_name: str
+    run_name: str
+    tracking_uri: NotRequired[str]
+
+
 class GPUMonitoringConfig(TypedDict):
     collection_interval: int | float
     flush_interval: int | float
@@ -63,8 +70,10 @@ class LoggerConfig(TypedDict):
     log_dir: str
     wandb_enabled: bool
     tensorboard_enabled: bool
+    mlflow_enabled: bool
     wandb: WandbConfig
     tensorboard: TensorboardConfig
+    mlflow: NotRequired[MLflowConfig]
     monitor_gpus: bool
     gpu_monitoring: GPUMonitoringConfig
 
@@ -536,7 +545,7 @@ def _collect(self, metrics: bool = False, sku: bool = False) -> dict[str, Any]:
                 unique_metric_addresses[metrics_address] = True
 
             # Process each node's metrics
-            collected_metrics = {}
+            collected_metrics: dict[str, Any] = {}
             for node_idx, metric_address in enumerate(unique_metric_addresses):
                 metrics = self._fetch_and_parse_metrics(
                     node_idx, metric_address, parser_fn
@@ -611,6 +620,91 @@ def flush(self) -> None:
             self.metrics_buffer = []
 
 
+class MLflowLogger(LoggerInterface):
+    """MLflow logger backend."""
+
+    def __init__(self, cfg: MLflowConfig, log_dir: Optional[str] = None):
+        """Initialize MLflow logger.
+
+        Args:
+            cfg: MLflow configuration
+            log_dir: Optional log directory
+        """
+        if cfg["tracking_uri"]:
+            mlflow.set_tracking_uri(cfg["tracking_uri"])
+
+        experiment = mlflow.get_experiment_by_name(cfg["experiment_name"])
+        if experiment is None:
+            if log_dir:
+                mlflow.create_experiment(
+                    name=cfg["experiment_name"],
+                    artifact_location=log_dir,
+                )
+            else:
+                mlflow.create_experiment(cfg["experiment_name"])
+        else:
+            mlflow.set_experiment(cfg["experiment_name"])
+
+        # Start run
+        run_kwargs: dict[str, str] = {}
+        run_kwargs["run_name"] = cfg["run_name"]
+
+        self.run = mlflow.start_run(**run_kwargs)
+        print(
+            f"Initialized MLflowLogger for experiment {cfg['experiment_name']}, "
+            f"run {cfg['run_name']}"
+        )
+
+    def log_metrics(
+        self,
+        metrics: dict[str, Any],
+        step: int,
+        prefix: Optional[str] = "",
+        step_metric: Optional[str] = None,
+    ) -> None:
+        """Log metrics to MLflow.
+
+        Args:
+            metrics: Dict of metrics to log
+            step: Global step value
+            prefix: Optional prefix for metric names
+            step_metric: Optional step metric name (ignored in MLflow)
+        """
+        for name, value in metrics.items():
+            if prefix:
+                name = f"{prefix}/{name}"
+            mlflow.log_metric(name, value, step=step)
+
+    def log_hyperparams(self, params: Mapping[str, Any]) -> None:
+        """Log hyperparameters to MLflow.
+
+        Args:
+            params: Dictionary of hyperparameters to log
+        """
+        # MLflow does not support nested dicts
+        mlflow.log_params(flatten_dict(params))
+
+    def log_plot(self, figure: plt.Figure, step: int, name: str) -> None:
+        """Log a plot to MLflow.
+
+        Args:
+            figure: Matplotlib figure to log
+            step: Global step value
+            name: Name of the plot
+        """
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=True) as tmp_file:
+            figure.savefig(tmp_file.name, format="png", bbox_inches="tight")
+            mlflow.log_artifact(tmp_file.name, f"plots/{name}")
+
+    def __del__(self) -> None:
+        """Clean up resources when the logger is destroyed."""
+        try:
+            mlflow.end_run()
+        except Exception:
+            # Ignore errors during cleanup
+            pass
+
+
 class Logger(LoggerInterface):
     """Main logger class that delegates to multiple backend loggers."""
 
@@ -621,8 +715,10 @@ def __init__(self, cfg: LoggerConfig):
             cfg: Config dict with the following keys:
                 - wandb_enabled
                 - tensorboard_enabled
+                - mlflow_enabled
                 - wandb
                 - tensorboard
+                - mlflow
                 - monitor_gpus
                 - gpu_collection_interval
                 - gpu_flush_interval
@@ -647,6 +743,12 @@ def __init__(self, cfg: LoggerConfig):
             )
             self.loggers.append(tensorboard_logger)
 
+        if cfg["mlflow_enabled"]:
+            mlflow_log_dir = os.path.join(self.base_log_dir, "mlflow")
+            os.makedirs(mlflow_log_dir, exist_ok=True)
+            mlflow_logger = MLflowLogger(cfg["mlflow"], log_dir=mlflow_log_dir)
+            self.loggers.append(mlflow_logger)
+
         # Initialize GPU monitoring if requested
         self.gpu_monitor = None
         if cfg["monitor_gpus"]:
@@ -765,13 +867,15 @@ def log_plot_token_mult_prob_error(
             return
 
         generation_logprob = generation_logprobs[
-            sample_idx, generation_start_idx:generation_end_idx
+            sample_idx, int(generation_start_idx) : int(generation_end_idx)
         ]
         prev_logprob = (
-            prev_logprobs[sample_idx, generation_start_idx:generation_end_idx]
-            * mask[sample_idx, generation_start_idx:generation_end_idx]
+            prev_logprobs[
+                sample_idx, int(generation_start_idx) : int(generation_end_idx)
+            ]
+            * mask[sample_idx, int(generation_start_idx) : int(generation_end_idx)]
         )
-        diff_i = diff[sample_idx, generation_start_idx:generation_end_idx]
+        diff_i = diff[sample_idx, int(generation_start_idx) : int(generation_end_idx)]
 
         # Find max absolute error token
         max_abs_error_idx = torch.argmax(diff_i).item()
@@ -785,7 +889,7 @@ def log_plot_token_mult_prob_error(
         max_rel_error = relative_error[max_rel_error_idx].item()
 
         fig = plt.figure()
-        step_idx = torch.arange(generation_start_idx, generation_end_idx)
+        step_idx = torch.arange(int(generation_start_idx), int(generation_end_idx))
 
         plt.plot(step_idx, generation_logprob, label="logprob (inference engine)")
         plt.plot(step_idx, prev_logprob, label="logprob (reference policy)")
diff --git a/pyproject.toml b/pyproject.toml
index 945ee9acda..7480d6e590 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,8 @@ dependencies = [
     "nvtx",
     "matplotlib",
     "plotly",
-    "google-adk"
+    "google-adk",
+    "mlflow",
 ]
 
 [project.optional-dependencies]
diff --git a/tests/unit/utils/test_logger.py b/tests/unit/utils/test_logger.py
index c17cb3ae05..e61120f312 100644
--- a/tests/unit/utils/test_logger.py
+++ b/tests/unit/utils/test_logger.py
@@ -21,6 +21,7 @@
 
 from nemo_rl.utils.logger import (
     Logger,
+    MLflowLogger,
     RayGpuMonitorLogger,
     TensorboardLogger,
     WandbLogger,
@@ -262,6 +263,154 @@ def test_log_hyperparams(self, mock_wandb):
         mock_run.config.update.assert_called_once_with(params)
 
 
+class TestMLflowLogger:
+    """Test the MLflowLogger class."""
+
+    @pytest.fixture
+    def temp_dir(self):
+        """Create a temporary directory for logs."""
+        temp_dir = tempfile.mkdtemp()
+        yield temp_dir
+        shutil.rmtree(temp_dir)
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_init_basic_config(self, mock_mlflow, temp_dir):
+        """Test initialization of MLflowLogger with basic config."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": None,
+        }
+        MLflowLogger(cfg, log_dir=temp_dir)
+
+        mock_mlflow.set_experiment.assert_called_once_with("test-experiment")
+        mock_mlflow.start_run.assert_called_once_with(run_name="test-run")
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_init_full_config(self, mock_mlflow, temp_dir):
+        """Test initialization of MLflowLogger with full config."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": "http://localhost:5000",
+        }
+        MLflowLogger(cfg, log_dir=temp_dir)
+
+        mock_mlflow.set_tracking_uri.assert_called_once_with("http://localhost:5000")
+        mock_mlflow.set_experiment.assert_called_once_with("test-experiment")
+        mock_mlflow.start_run.assert_called_once_with(run_name="test-run")
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_log_metrics(self, mock_mlflow, temp_dir):
+        """Test logging metrics to MLflowLogger."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": None,
+        }
+        logger = MLflowLogger(cfg, log_dir=temp_dir)
+
+        metrics = {"loss": 0.5, "accuracy": 0.8}
+        step = 10
+        logger.log_metrics(metrics, step)
+
+        # Check that log_metric was called for each metric
+        assert mock_mlflow.log_metric.call_count == 2
+        mock_mlflow.log_metric.assert_any_call("loss", 0.5, step=10)
+        mock_mlflow.log_metric.assert_any_call("accuracy", 0.8, step=10)
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_log_metrics_with_prefix(self, mock_mlflow, temp_dir):
+        """Test logging metrics with a prefix to MLflowLogger."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": None,
+        }
+        logger = MLflowLogger(cfg, log_dir=temp_dir)
+
+        metrics = {"loss": 0.5, "accuracy": 0.8}
+        step = 10
+        prefix = "train"
+        logger.log_metrics(metrics, step, prefix)
+
+        # Check that log_metric was called for each metric with prefix
+        assert mock_mlflow.log_metric.call_count == 2
+        mock_mlflow.log_metric.assert_any_call("train/loss", 0.5, step=10)
+        mock_mlflow.log_metric.assert_any_call("train/accuracy", 0.8, step=10)
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_log_hyperparams(self, mock_mlflow, temp_dir):
+        """Test logging hyperparameters to MLflowLogger."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": None,
+        }
+        logger = MLflowLogger(cfg, log_dir=temp_dir)
+
+        params = {"lr": 0.001, "batch_size": 32, "model": {"hidden_size": 128}}
+        logger.log_hyperparams(params)
+
+        # Check that log_params was called with flattened params
+        mock_mlflow.log_params.assert_called_once_with(
+            {
+                "lr": 0.001,
+                "batch_size": 32,
+                "model.hidden_size": 128,
+            }
+        )
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    @patch("nemo_rl.utils.logger.plt")
+    @patch("nemo_rl.utils.logger.os")
+    def test_log_plot(self, mock_os, mock_plt, mock_mlflow, temp_dir):
+        """Test logging plots to MLflowLogger."""
+        import tempfile
+
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": None,
+        }
+        logger = MLflowLogger(cfg, log_dir=temp_dir)
+
+        # Mock the figure
+        mock_figure = mock_plt.Figure.return_value
+
+        # Mock tempfile.NamedTemporaryFile
+        mock_temp_file = type("MockTempFile", (), {"name": "/tmp/test.png"})()
+        with patch.object(tempfile, "NamedTemporaryFile") as mock_tempfile:
+            mock_tempfile.return_value.__enter__.return_value = mock_temp_file
+            mock_tempfile.return_value.__exit__.return_value = None
+
+            logger.log_plot(mock_figure, step=10, name="test_plot")
+
+            # Check that figure was saved and logged as artifact
+            mock_figure.savefig.assert_called_once_with(
+                "/tmp/test.png", format="png", bbox_inches="tight"
+            )
+            mock_mlflow.log_artifact.assert_called_once_with(
+                "/tmp/test.png", "plots/test_plot"
+            )
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_cleanup(self, mock_mlflow, temp_dir):
+        """Test cleanup when logger is destroyed."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": None,
+        }
+        logger = MLflowLogger(cfg, log_dir=temp_dir)
+
+        # Trigger cleanup
+        logger.__del__()
+
+        # Check that end_run was called
+        mock_mlflow.end_run.assert_called_once()
+
+
 class TestRayGpuMonitorLogger:
     """Test the RayGpuMonitorLogger class."""
 
@@ -768,6 +917,7 @@ def test_init_with_gpu_monitoring(
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": True,
             "gpu_monitoring": {
                 "collection_interval": 15.0,
@@ -813,6 +963,7 @@ def test_gpu_monitoring_without_wandb(
         cfg = {
             "wandb_enabled": False,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": True,
             "gpu_monitoring": {
                 "collection_interval": 15.0,
@@ -851,6 +1002,7 @@ def test_gpu_monitoring_no_main_loggers(
         cfg = {
             "wandb_enabled": False,
             "tensorboard_enabled": False,
+            "mlflow_enabled": False,
             "monitor_gpus": True,
             "gpu_monitoring": {
                 "collection_interval": 15.0,
@@ -902,6 +1054,7 @@ def test_init_no_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir):
         cfg = {
             "wandb_enabled": False,
             "tensorboard_enabled": False,
+            "mlflow_enabled": False,
             "monitor_gpus": False,
             "log_dir": temp_dir,
         }
@@ -918,6 +1071,7 @@ def test_init_wandb_only(self, mock_tb_logger, mock_wandb_logger, temp_dir):
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": False,
+            "mlflow_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "log_dir": temp_dir,
@@ -937,6 +1091,7 @@ def test_init_tensorboard_only(self, mock_tb_logger, mock_wandb_logger, temp_dir
         cfg = {
             "wandb_enabled": False,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": False,
             "tensorboard": {"log_dir": "test_logs"},
             "log_dir": temp_dir,
@@ -956,6 +1111,7 @@ def test_init_both_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir):
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -979,6 +1135,7 @@ def test_log_metrics(self, mock_tb_logger, mock_wandb_logger, temp_dir):
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1005,6 +1162,7 @@ def test_log_hyperparams(self, mock_tb_logger, mock_wandb_logger, temp_dir):
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1033,6 +1191,7 @@ def test_init_with_gpu_monitoring(
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": True,
             "gpu_monitoring": {
                 "collection_interval": 15.0,
@@ -1077,6 +1236,7 @@ def test_log_metrics_with_prefix_and_step_metric(
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1114,6 +1274,7 @@ def test_log_plot_token_mult_prob_error(
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": True,
+            "mlflow_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1159,3 +1320,124 @@ def test_log_plot_token_mult_prob_error(
         legend_texts = [text.get_text() for text in ax.get_legend().get_texts()]
         assert any("Max abs error" in text for text in legend_texts)
         assert any("Max rel error (prob)" in text for text in legend_texts)
+
+    @patch("nemo_rl.utils.logger.WandbLogger")
+    @patch("nemo_rl.utils.logger.TensorboardLogger")
+    def test_init_mlflow_only(self, mock_tb_logger, mock_wandb_logger, temp_dir):
+        """Test initialization with only MLflowLogger enabled."""
+        cfg = {
+            "wandb_enabled": False,
+            "tensorboard_enabled": False,
+            "mlflow_enabled": True,
+            "monitor_gpus": False,
+            "mlflow": {
+                "experiment_name": "test-experiment",
+                "tracking_uri": None,
+                "run_name": "test-run",
+            },
+            "log_dir": temp_dir,
+        }
+        logger = Logger(cfg)
+
+        assert len(logger.loggers) == 1
+        mock_wandb_logger.assert_not_called()
+        mock_tb_logger.assert_not_called()
+
+    @patch("nemo_rl.utils.logger.WandbLogger")
+    @patch("nemo_rl.utils.logger.TensorboardLogger")
+    @patch("nemo_rl.utils.logger.MLflowLogger")
+    def test_init_all_loggers(
+        self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir
+    ):
+        """Test initialization with all loggers enabled."""
+        cfg = {
+            "wandb_enabled": True,
+            "tensorboard_enabled": True,
+            "mlflow_enabled": True,
+            "monitor_gpus": False,
+            "wandb": {"project": "test-project"},
+            "tensorboard": {"log_dir": "test_logs"},
+            "mlflow": {
+                "experiment_name": "test-experiment",
+                "tracking_uri": None,
+                "run_name": "test-run",
+            },
+            "log_dir": temp_dir,
+        }
+        logger = Logger(cfg)
+
+        assert len(logger.loggers) == 3
+        mock_wandb_logger.assert_called_once()
+        mock_tb_logger.assert_called_once()
+        mock_mlflow_logger.assert_called_once()
+
+    @patch("nemo_rl.utils.logger.WandbLogger")
+    @patch("nemo_rl.utils.logger.TensorboardLogger")
+    @patch("nemo_rl.utils.logger.MLflowLogger")
+    def test_log_metrics_with_mlflow(
+        self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir
+    ):
+        """Test logging metrics to all enabled loggers including MLflow."""
+        cfg = {
+            "wandb_enabled": True,
+            "tensorboard_enabled": True,
+            "mlflow_enabled": True,
+            "monitor_gpus": False,
+            "wandb": {"project": "test-project"},
+            "tensorboard": {"log_dir": "test_logs"},
+            "mlflow": {
+                "experiment_name": "test-experiment",
+                "tracking_uri": None,
+                "run_name": "test-run",
+            },
+            "log_dir": temp_dir,
+        }
+        logger = Logger(cfg)
+
+        # Create mock logger instances
+        mock_wandb_instance = mock_wandb_logger.return_value
+        mock_tb_instance = mock_tb_logger.return_value
+        mock_mlflow_instance = mock_mlflow_logger.return_value
+
+        metrics = {"loss": 0.5, "accuracy": 0.8}
+        step = 10
+        logger.log_metrics(metrics, step)
+
+        # Check that log_metrics was called on all loggers
+        mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None)
+        mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None)
+        mock_mlflow_instance.log_metrics.assert_called_once_with(
+            metrics, step, "", None
+        )
+
+    @patch("nemo_rl.utils.logger.WandbLogger")
+    @patch("nemo_rl.utils.logger.TensorboardLogger")
+    @patch("nemo_rl.utils.logger.MLflowLogger")
+    def test_log_hyperparams_with_mlflow(
+        self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir
+    ):
+        """Test logging hyperparameters to all enabled loggers including MLflow."""
+        cfg = {
+            "wandb_enabled": True,
+            "tensorboard_enabled": True,
+            "mlflow_enabled": True,
+            "monitor_gpus": False,
+            "wandb": {"project": "test-project"},
+            "tensorboard": {"log_dir": "test_logs"},
+            "mlflow": {"experiment_name": "test-experiment"},
+            "log_dir": temp_dir,
+        }
+        logger = Logger(cfg)
+
+        # Create mock logger instances
+        mock_wandb_instance = mock_wandb_logger.return_value
+        mock_tb_instance = mock_tb_logger.return_value
+        mock_mlflow_instance = mock_mlflow_logger.return_value
+
+        params = {"lr": 0.001, "batch_size": 32}
+        logger.log_hyperparams(params)
+
+        # Check that log_hyperparams was called on all loggers
+        mock_wandb_instance.log_hyperparams.assert_called_once_with(params)
+        mock_tb_instance.log_hyperparams.assert_called_once_with(params)
+        mock_mlflow_instance.log_hyperparams.assert_called_once_with(params)
diff --git a/uv.lock b/uv.lock
index cfda06c29d..1e6d6815b2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -165,6 +165,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929, upload-time = "2024-07-26T18:15:02.05Z" },
 ]
 
+[[package]]
+name = "alembic"
+version = "1.16.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mako" },
+    { name = "sqlalchemy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/83/52/72e791b75c6b1efa803e491f7cbab78e963695e76d4ada05385252927e76/alembic-1.16.4.tar.gz", hash = "sha256:efab6ada0dd0fae2c92060800e0bf5c1dc26af15a10e02fb4babff164b4725e2", size = 1968161, upload-time = "2025-07-10T16:17:20.192Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/62/96b5217b742805236614f05904541000f55422a6060a90d7fd4ce26c172d/alembic-1.16.4-py3-none-any.whl", hash = "sha256:b05e51e8e82efc1abd14ba2af6392897e145930c3e0a2faf2b0da2f7f7fd660d", size = 247026, upload-time = "2025-07-10T16:17:21.845Z" },
+]
+
 [[package]]
 name = "aniso8601"
 version = "10.0.1"
@@ -732,6 +746,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" },
 ]
 
+[[package]]
+name = "databricks-sdk"
+version = "0.59.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-auth" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/d9/b48531b1b2caa3ed559ece34bf2abff2536048bf88447592621daeaec5d5/databricks_sdk-0.59.0.tar.gz", hash = "sha256:f60a27f00ccdf57d8496dd4a2e46ad17bb9557add09a6b2e23d46f29c0bca613", size = 719165, upload-time = "2025-07-17T11:13:57.847Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1b/ac/1d97e438f86c26314227f7b2f0711476db79522a137b60533c5181ae481b/databricks_sdk-0.59.0-py3-none-any.whl", hash = "sha256:2ae4baefd1f7360c8314e2ebdc0a0a6d7e76a88805a65d0415ff73631c1e4c0d", size = 676213, upload-time = "2025-07-17T11:13:56.088Z" },
+]
+
 [[package]]
 name = "datasets"
 version = "4.0.0"
@@ -1242,6 +1269,42 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload-time = "2025-04-14T10:17:01.271Z" },
 ]
 
+[[package]]
+name = "graphene"
+version = "3.4.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "graphql-core" },
+    { name = "graphql-relay" },
+    { name = "python-dateutil" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cc/f6/bf62ff950c317ed03e77f3f6ddd7e34aaa98fe89d79ebd660c55343d8054/graphene-3.4.3.tar.gz", hash = "sha256:2a3786948ce75fe7e078443d37f609cbe5bb36ad8d6b828740ad3b95ed1a0aaa", size = 44739, upload-time = "2024-11-09T20:44:25.757Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/66/e0/61d8e98007182e6b2aca7cf65904721fb2e4bce0192272ab9cb6f69d8812/graphene-3.4.3-py2.py3-none-any.whl", hash = "sha256:820db6289754c181007a150db1f7fff544b94142b556d12e3ebc777a7bf36c71", size = 114894, upload-time = "2024-11-09T20:44:23.851Z" },
+]
+
+[[package]]
+name = "graphql-core"
+version = "3.2.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c4/16/7574029da84834349b60ed71614d66ca3afe46e9bf9c7b9562102acb7d4f/graphql_core-3.2.6.tar.gz", hash = "sha256:c08eec22f9e40f0bd61d805907e3b3b1b9a320bc606e23dc145eebca07c8fbab", size = 505353, upload-time = "2025-01-26T16:36:27.374Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/4f/7297663840621022bc73c22d7d9d80dbc78b4db6297f764b545cd5dd462d/graphql_core-3.2.6-py3-none-any.whl", hash = "sha256:78b016718c161a6fb20a7d97bbf107f331cd1afe53e45566c59f776ed7f0b45f", size = 203416, upload-time = "2025-01-26T16:36:24.868Z" },
+]
+
+[[package]]
+name = "graphql-relay"
+version = "3.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "graphql-core" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/13/98fbf8d67552f102488ffc16c6f559ce71ea15f6294728d33928ab5ff14d/graphql-relay-3.2.0.tar.gz", hash = "sha256:1ff1c51298356e481a0be009ccdff249832ce53f30559c1338f22a0e0d17250c", size = 50027, upload-time = "2022-04-16T11:03:45.447Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/74/16/a4cf06adbc711bd364a73ce043b0b08d8fa5aae3df11b6ee4248bcdad2e0/graphql_relay-3.2.0-py3-none-any.whl", hash = "sha256:c9b22bd28b170ba1fe674c74384a8ff30a76c8e26f88ac3aa1584dd3179953e5", size = 16940, upload-time = "2022-04-16T11:03:43.895Z" },
+]
+
 [[package]]
 name = "graphviz"
 version = "0.21"
@@ -1251,6 +1314,39 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" },
 ]
 
+[[package]]
+name = "greenlet"
+version = "3.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c9/92/bb85bd6e80148a4d2e0c59f7c0c2891029f8fd510183afc7d8d2feeed9b6/greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365", size = 185752, upload-time = "2025-06-05T16:16:09.955Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d", size = 271992, upload-time = "2025-06-05T16:11:23.467Z" },
+    { url = "https://files.pythonhosted.org/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b", size = 638820, upload-time = "2025-06-05T16:38:52.882Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d", size = 653046, upload-time = "2025-06-05T16:41:36.343Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264", size = 647701, upload-time = "2025-06-05T16:48:19.604Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688", size = 649747, upload-time = "2025-06-05T16:13:04.628Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb", size = 605461, upload-time = "2025-06-05T16:12:50.792Z" },
+    { url = "https://files.pythonhosted.org/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c", size = 1121190, upload-time = "2025-06-05T16:36:48.59Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e1/25297f70717abe8104c20ecf7af0a5b82d2f5a980eb1ac79f65654799f9f/greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163", size = 1149055, upload-time = "2025-06-05T16:12:40.457Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/8f/8f9e56c5e82eb2c26e8cde787962e66494312dc8cb261c460e1f3a9c88bc/greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849", size = 297817, upload-time = "2025-06-05T16:29:49.244Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/cf/f5c0b23309070ae93de75c90d29300751a5aacefc0a3ed1b1d8edb28f08b/greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad", size = 270732, upload-time = "2025-06-05T16:10:08.26Z" },
+    { url = "https://files.pythonhosted.org/packages/48/ae/91a957ba60482d3fecf9be49bc3948f341d706b52ddb9d83a70d42abd498/greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef", size = 639033, upload-time = "2025-06-05T16:38:53.983Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/df/20ffa66dd5a7a7beffa6451bdb7400d66251374ab40b99981478c69a67a8/greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3", size = 652999, upload-time = "2025-06-05T16:41:37.89Z" },
+    { url = "https://files.pythonhosted.org/packages/51/b4/ebb2c8cb41e521f1d72bf0465f2f9a2fd803f674a88db228887e6847077e/greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95", size = 647368, upload-time = "2025-06-05T16:48:21.467Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/6a/1e1b5aa10dced4ae876a322155705257748108b7fd2e4fae3f2a091fe81a/greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb", size = 650037, upload-time = "2025-06-05T16:13:06.402Z" },
+    { url = "https://files.pythonhosted.org/packages/26/f2/ad51331a157c7015c675702e2d5230c243695c788f8f75feba1af32b3617/greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b", size = 608402, upload-time = "2025-06-05T16:12:51.91Z" },
+    { url = "https://files.pythonhosted.org/packages/26/bc/862bd2083e6b3aff23300900a956f4ea9a4059de337f5c8734346b9b34fc/greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0", size = 1119577, upload-time = "2025-06-05T16:36:49.787Z" },
+    { url = "https://files.pythonhosted.org/packages/86/94/1fc0cc068cfde885170e01de40a619b00eaa8f2916bf3541744730ffb4c3/greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36", size = 1147121, upload-time = "2025-06-05T16:12:42.527Z" },
+    { url = "https://files.pythonhosted.org/packages/27/1a/199f9587e8cb08a0658f9c30f3799244307614148ffe8b1e3aa22f324dea/greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3", size = 297603, upload-time = "2025-06-05T16:20:12.651Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/ca/accd7aa5280eb92b70ed9e8f7fd79dc50a2c21d8c73b9a0856f5b564e222/greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86", size = 271479, upload-time = "2025-06-05T16:10:47.525Z" },
+    { url = "https://files.pythonhosted.org/packages/55/71/01ed9895d9eb49223280ecc98a557585edfa56b3d0e965b9fa9f7f06b6d9/greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97", size = 683952, upload-time = "2025-06-05T16:38:55.125Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/61/638c4bdf460c3c678a0a1ef4c200f347dff80719597e53b5edb2fb27ab54/greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728", size = 696917, upload-time = "2025-06-05T16:41:38.959Z" },
+    { url = "https://files.pythonhosted.org/packages/22/cc/0bd1a7eb759d1f3e3cc2d1bc0f0b487ad3cc9f34d74da4b80f226fde4ec3/greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a", size = 692443, upload-time = "2025-06-05T16:48:23.113Z" },
+    { url = "https://files.pythonhosted.org/packages/67/10/b2a4b63d3f08362662e89c103f7fe28894a51ae0bc890fabf37d1d780e52/greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892", size = 692995, upload-time = "2025-06-05T16:13:07.972Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/c6/ad82f148a4e3ce9564056453a71529732baf5448ad53fc323e37efe34f66/greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141", size = 655320, upload-time = "2025-06-05T16:12:53.453Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/4f/aab73ecaa6b3086a4c89863d94cf26fa84cbff63f52ce9bc4342b3087a06/greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a", size = 301236, upload-time = "2025-06-05T16:15:20.111Z" },
+]
+
 [[package]]
 name = "grpcio"
 version = "1.73.0"
@@ -1279,6 +1375,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d7/35/347db7d2e7674b621afd21b12022e7f48c7b0861b5577134b4e939536141/grpcio-1.73.0-cp313-cp313-win_amd64.whl", hash = "sha256:38cf518cc54cd0c47c9539cefa8888549fcc067db0b0c66a46535ca8032020c4", size = 4335872, upload-time = "2025-06-09T10:04:29.032Z" },
 ]
 
+[[package]]
+name = "gunicorn"
+version = "23.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/7d/6dac2a6e1eba33ee43f318edbed4ff29151a49b5d37f080aad1e6469bca4/gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d", size = 85029, upload-time = "2024-08-10T20:25:24.996Z" },
+]
+
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -1889,6 +1997,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606, upload-time = "2025-04-23T01:47:39.028Z" },
 ]
 
+[[package]]
+name = "mako"
+version = "1.3.10"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474, upload-time = "2025-04-10T12:44:31.16Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload-time = "2025-04-10T12:50:53.297Z" },
+]
+
 [[package]]
 name = "markdown"
 version = "3.8.2"
@@ -2106,6 +2226,59 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/45/c1a1ccfdd02bc4173ca0f4a2d327683a27df85797b885eb1da1ca325b85c/ml_dtypes-0.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d13755f8e8445b3870114e5b6240facaa7cb0c3361e54beba3e07fa912a6e12b", size = 5052731, upload-time = "2025-01-07T03:34:45.308Z" },
 ]
 
+[[package]]
+name = "mlflow"
+version = "3.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "alembic" },
+    { name = "docker" },
+    { name = "flask" },
+    { name = "graphene" },
+    { name = "gunicorn", marker = "sys_platform != 'win32'" },
+    { name = "matplotlib" },
+    { name = "mlflow-skinny" },
+    { name = "numpy" },
+    { name = "pandas" },
+    { name = "pyarrow" },
+    { name = "scikit-learn" },
+    { name = "scipy" },
+    { name = "sqlalchemy" },
+    { name = "waitress", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2b/e1/0cba7a8fc2c81078b4d31948f65fb1580cee1831e955a86028159724d057/mlflow-3.1.1.tar.gz", hash = "sha256:ee98fe929d61625b72ae5010fbf12a7c6d15527790397827191fd6e8246c33e5", size = 24098836, upload-time = "2025-06-25T09:12:56.416Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/07/9f28e7e2b1c9552e64e6161cd3943b02349f8164176cea6b75e69d7df94a/mlflow-3.1.1-py3-none-any.whl", hash = "sha256:16853335292217fde203a645fd50f38d5567ce7818587ed5236040418918872e", size = 24673365, upload-time = "2025-06-25T09:12:53.482Z" },
+]
+
+[[package]]
+name = "mlflow-skinny"
+version = "3.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cachetools" },
+    { name = "click" },
+    { name = "cloudpickle" },
+    { name = "databricks-sdk" },
+    { name = "fastapi" },
+    { name = "gitpython" },
+    { name = "importlib-metadata" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-sdk" },
+    { name = "packaging" },
+    { name = "protobuf" },
+    { name = "pydantic" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "sqlparse" },
+    { name = "typing-extensions" },
+    { name = "uvicorn" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dd/52/e63c0244a24ed23b5f82b30efffce150c19f126b8ef977b78a56f6d192c9/mlflow_skinny-3.1.1.tar.gz", hash = "sha256:9c2ea510eef6c115c7241305b65f7090d7fdc02399de2a6e8ddae5f285bb7a99", size = 1603411, upload-time = "2025-06-25T05:52:22.717Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f6/45/24d553e0f550f82aaadd8b9d08f1410a3d750c51733a5f43fcc6def1be00/mlflow_skinny-3.1.1-py3-none-any.whl", hash = "sha256:73b1be5d0ef3099c2d0e5ec3ca7fd0b85d4a6def7d7ab35feda9f06bf8bf7049", size = 1926660, upload-time = "2025-06-25T05:52:20.556Z" },
+]
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -2311,6 +2484,7 @@ dependencies = [
     { name = "hydra-core" },
     { name = "math-verify" },
     { name = "matplotlib" },
+    { name = "mlflow" },
     { name = "ninja" },
     { name = "numpy" },
     { name = "nvidia-ml-py" },
@@ -2390,6 +2564,7 @@ requires-dist = [
     { name = "math-verify" },
     { name = "matplotlib" },
     { name = "megatron-core", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-LM-workspace" },
+    { name = "mlflow" },
     { name = "nemo-tron", marker = "extra == 'mcore'", editable = "3rdparty/NeMo-workspace" },
     { name = "ninja" },
     { name = "numpy" },
@@ -4527,6 +4702,44 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" },
 ]
 
+[[package]]
+name = "sqlalchemy"
+version = "2.0.41"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424, upload-time = "2025-05-14T17:10:32.339Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3e/2a/f1f4e068b371154740dd10fb81afb5240d5af4aa0087b88d8b308b5429c2/sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9", size = 2119645, upload-time = "2025-05-14T17:55:24.854Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/e8/c664a7e73d36fbfc4730f8cf2bf930444ea87270f2825efbe17bf808b998/sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1", size = 2107399, upload-time = "2025-05-14T17:55:28.097Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/78/8a9cf6c5e7135540cb682128d091d6afa1b9e48bd049b0d691bf54114f70/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70", size = 3293269, upload-time = "2025-05-14T17:50:38.227Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/35/f74add3978c20de6323fb11cb5162702670cc7a9420033befb43d8d5b7a4/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e", size = 3303364, upload-time = "2025-05-14T17:51:49.829Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/d4/c990f37f52c3f7748ebe98883e2a0f7d038108c2c5a82468d1ff3eec50b7/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078", size = 3229072, upload-time = "2025-05-14T17:50:39.774Z" },
+    { url = "https://files.pythonhosted.org/packages/15/69/cab11fecc7eb64bc561011be2bd03d065b762d87add52a4ca0aca2e12904/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae", size = 3268074, upload-time = "2025-05-14T17:51:51.736Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/ca/0c19ec16858585d37767b167fc9602593f98998a68a798450558239fb04a/sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6", size = 2084514, upload-time = "2025-05-14T17:55:49.915Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/23/4c2833d78ff3010a4e17f984c734f52b531a8c9060a50429c9d4b0211be6/sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0", size = 2111557, upload-time = "2025-05-14T17:55:51.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ad/2e1c6d4f235a97eeef52d0200d8ddda16f6c4dd70ae5ad88c46963440480/sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443", size = 2115491, upload-time = "2025-05-14T17:55:31.177Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/8d/be490e5db8400dacc89056f78a52d44b04fbf75e8439569d5b879623a53b/sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc", size = 2102827, upload-time = "2025-05-14T17:55:34.921Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/72/c97ad430f0b0e78efaf2791342e13ffeafcbb3c06242f01a3bb8fe44f65d/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1", size = 3225224, upload-time = "2025-05-14T17:50:41.418Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/51/5ba9ea3246ea068630acf35a6ba0d181e99f1af1afd17e159eac7e8bc2b8/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a", size = 3230045, upload-time = "2025-05-14T17:51:54.722Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2f/8c14443b2acea700c62f9b4a8bad9e49fc1b65cfb260edead71fd38e9f19/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d", size = 3159357, upload-time = "2025-05-14T17:50:43.483Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/b2/43eacbf6ccc5276d76cea18cb7c3d73e294d6fb21f9ff8b4eef9b42bbfd5/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23", size = 3197511, upload-time = "2025-05-14T17:51:57.308Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/2e/677c17c5d6a004c3c45334ab1dbe7b7deb834430b282b8a0f75ae220c8eb/sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f", size = 2082420, upload-time = "2025-05-14T17:55:52.69Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/61/e8c1b9b6307c57157d328dd8b8348ddc4c47ffdf1279365a13b2b98b8049/sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df", size = 2108329, upload-time = "2025-05-14T17:55:54.495Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" },
+]
+
+[[package]]
+name = "sqlparse"
+version = "0.5.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/40/edede8dd6977b0d3da179a342c198ed100dd2aba4be081861ee5911e4da4/sqlparse-0.5.3.tar.gz", hash = "sha256:09f67787f56a0b16ecdbde1bfc7f5d9c3371ca683cfeaa8e6ff60b4807ec9272", size = 84999, upload-time = "2024-12-10T12:05:30.728Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a9/5c/bfd6bd0bf979426d405cc6e71eceb8701b148b16c21d2dc3c261efc61c7b/sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca", size = 44415, upload-time = "2024-12-10T12:05:27.824Z" },
+]
+
 [[package]]
 name = "starlette"
 version = "0.46.2"
@@ -5145,6 +5358,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cd/08/cb88fd52c08df57ccc4f722241150643d521b3174f8d0c3a1ec5549c3927/vllm-0.9.0-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:a130715cc915377f78e84088fc35c426266e278a0793be0b2ad78deda2e2f55e", size = 377192911, upload-time = "2025-05-28T01:30:28.547Z" },
 ]
 
+[[package]]
+name = "waitress"
+version = "3.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bf/cb/04ddb054f45faa306a230769e868c28b8065ea196891f09004ebace5b184/waitress-3.0.2.tar.gz", hash = "sha256:682aaaf2af0c44ada4abfb70ded36393f0e307f4ab9456a215ce0020baefc31f", size = 179901, upload-time = "2024-11-16T20:02:35.195Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/57/a27182528c90ef38d82b636a11f606b0cbb0e17588ed205435f8affe3368/waitress-3.0.2-py3-none-any.whl", hash = "sha256:c56d67fd6e87c2ee598b76abdd4e96cfad1f24cacdea5078d382b1f9d7b5ed2e", size = 56232, upload-time = "2024-11-16T20:02:33.858Z" },
+]
+
 [[package]]
 name = "wandb"
 version = "0.20.1"

From 022647b961bb2cb32fc7ef341d4f96691ef9c3a5 Mon Sep 17 00:00:00 2001
From: Yi-Fu Wu <yifu.wu@gmail.com>
Date: Mon, 21 Jul 2025 13:37:22 -0700
Subject: [PATCH 53/59] fix: Fix activation checkpointing for mcore path (#703)

Signed-off-by: Yi-Fu Wu <yifu.wu@gmail.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/models/policy/megatron_policy_worker.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 0bf4e71477..b0d544aabb 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -496,9 +496,9 @@ def __init__(
         model_cfg.pipeline_dtype = dtype_map[self.cfg["megatron_cfg"]["pipeline_dtype"]]
         model_cfg.parallel_output = True
         if self.cfg["megatron_cfg"]["activation_checkpointing"]:
-            model_cfg.activations_checkpoint_granularity = "full"
-            model_cfg.activations_checkpoint_method = "uniform"
-            model_cfg.activations_checkpoint_num_layers = 1
+            model_cfg.recompute_granularity = "full"
+            model_cfg.recompute_method = "uniform"
+            model_cfg.recompute_num_layers = 1
         if not model_cfg.gated_linear_unit:
             assert model_cfg.activation_func is not None, (
                 "activation_func must be set if not using gated_linear_unit. This likely "

From 163d750c24122753dc5310ba7ba41116656c88b2 Mon Sep 17 00:00:00 2001
From: Sahil Jain <48468750+SahilJain314@users.noreply.github.com>
Date: Tue, 22 Jul 2025 09:22:24 -0700
Subject: [PATCH 54/59] feat: Enable Context Parallelism and Sequence Packing
 for MCore and Dtensor (#704)

Signed-off-by: Sahil Jain <sahilj@nvidia.com>
Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Jonas yang <joyang@nvidia.com>
Signed-off-by: Yuki Huang <yukih@nvidia.com>
Co-authored-by: Terry Kong <terrycurtiskong@gmail.com>
Co-authored-by: Jonas Yang CN <joyang@nvidia.com>
Co-authored-by: yuki <48991475+yuki-666@users.noreply.github.com>
Co-authored-by: Ahmad Kiswani <kiswani.ahmad@gmail.com>
Co-authored-by: Terry Kong <terryk@nvidia.com>
Co-authored-by: Yuki Huang <yukih@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 docs/model-quirks.md                          |   5 +
 examples/configs/dpo.yaml                     |   3 +
 examples/configs/grpo-deepscaler-1.5b-8K.yaml |   3 +
 .../configs/grpo_deepscaler-1.5b-24K.yaml     |   3 +
 examples/configs/grpo_math_1B.yaml            |  10 +
 examples/configs/grpo_math_1B_megatron.yaml   |   9 +-
 examples/configs/grpo_math_8B_megatron.yaml   |   2 +-
 ...llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml |   5 +-
 ....1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml |   5 +-
 ...po-llama3.1-8b-instruct-4n8g-megatron.yaml |   5 +-
 ...8b-instruct-4n8g-megatrontp2pp2-quick.yaml |   5 +-
 ...llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml |   5 +-
 .../llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml  |   2 +
 ...-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml |   2 +
 ...3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml |   2 +
 ...llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml |   2 +
 ...-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml |   2 +
 ...en2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml |   2 +
 ...wen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml |   2 +
 ...5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml |   2 +
 ...3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml |   4 +-
 ...ama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml |   4 +-
 ...ft-llama3.1-8b-instruct-1n8g-megatron.yaml |   4 +-
 .../llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml |   4 +-
 ...wen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml |   4 +-
 examples/configs/sft.yaml                     |   8 +-
 examples/configs/sft_openmathinstruct2.yaml   |   3 +
 examples/run_sft.py                           |   2 +
 nemo_rl/algorithms/loss_functions.py          |  95 +++
 nemo_rl/data/packing/__init__.py              |  35 +
 nemo_rl/data/packing/algorithms.py            | 571 ++++++++++++++
 nemo_rl/data/packing/metrics.py               | 249 ++++++
 nemo_rl/distributed/batched_data_dict.py      | 189 ++++-
 nemo_rl/distributed/model_utils.py            | 297 +++++++-
 nemo_rl/models/dtensor/parallelize.py         |   8 +-
 nemo_rl/models/huggingface/common.py          | 253 +++++++
 nemo_rl/models/megatron/common.py             | 320 +++++++-
 .../models/policy/dtensor_policy_worker.py    | 367 +++++++--
 nemo_rl/models/policy/lm_policy.py            | 121 ++-
 .../models/policy/megatron_policy_worker.py   | 136 +++-
 pyproject.toml                                |   1 -
 tests/functional/test_converter_roundtrip.py  |   1 +
 tests/unit/algorithms/__init__.py             |  13 +
 tests/unit/algorithms/test_grpo.py            |  14 +-
 .../test_sequence_packing_gradients.py        | 449 +++++++++++
 tests/unit/data/packing/__init__.py           |  15 +
 tests/unit/data/packing/test_algorithms.py    | 326 ++++++++
 .../distributed/test_batched_data_dict.py     | 370 ++++++++-
 tests/unit/distributed/test_model_utils.py    | 424 +++++++++++
 .../models/generation/test_vllm_generation.py |   6 +
 tests/unit/models/megatron/__init__.py        |  13 +
 tests/unit/models/megatron/test_common.py     | 707 ++++++++++++++++++
 .../unit/models/policy/test_dtensor_worker.py |   3 +
 .../models/policy/test_megatron_worker.py     | 413 ++++++++++
 tests/unit/test_utils.py                      |   2 +
 tests/unit/utils/test_native_checkpoint.py    |   3 +
 56 files changed, 5322 insertions(+), 188 deletions(-)
 create mode 100644 nemo_rl/data/packing/__init__.py
 create mode 100644 nemo_rl/data/packing/algorithms.py
 create mode 100644 nemo_rl/data/packing/metrics.py
 create mode 100644 tests/unit/algorithms/test_sequence_packing_gradients.py
 create mode 100644 tests/unit/data/packing/__init__.py
 create mode 100644 tests/unit/data/packing/test_algorithms.py
 create mode 100644 tests/unit/distributed/test_model_utils.py
 create mode 100644 tests/unit/models/megatron/__init__.py
 create mode 100644 tests/unit/models/megatron/test_common.py

diff --git a/docs/model-quirks.md b/docs/model-quirks.md
index ec37048469..31827af86d 100644
--- a/docs/model-quirks.md
+++ b/docs/model-quirks.md
@@ -33,6 +33,11 @@ NeMo-RL uses the vLLM V1 runtime for both synchronous and asynchronous inference
 
 - NeMo-RL implemented this feature based on torch CP [implementation](https://github.com/pytorch/pytorch/blob/main/torch/distributed/tensor/experimental/_attention.py). And we inherit its limitations.
 Whether model level support CP only depends on arguments passed to `torch.nn.functional.scaled_dot_product_attention`. Current NeMo-RL passed all ones attention mask to `model.forward`. For Gemma-3, it won't ignore attention mask as result `attn_bias` is not None which is not supported by torch CP. Please see [assertion](https://github.com/pytorch/pytorch/blob/134179474539648ba7dee1317959529fbd0e7f89/torch/distributed/tensor/experimental/_attention.py#L262) .
+ - Context parallel can't be used together with sequence packing. Sequence packing requires `attn_implementation="flash_attention_2"`, this conflict with context parallel requires SDPA impl. Refer to [here](https://github.com/huggingface/transformers/blob/bda75b4011239d065de84aa3e744b67ebfa7b245/src/transformers/modeling_utils.py#L2317) for more details.
+
+
+- It's a known issue that context parallel can't be used together with sequence parallel.
+Refer to [here](https://github.com/NVIDIA-NeMo/RL/issues/659) for more details.
 
 - It's a known issue that context parallel can't be used together with sequence parallel.
 Refer to [here](https://github.com/NVIDIA-NeMo/RL/issues/659) for more details.
diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
index 8a5f5dcf00..bcbffb0761 100755
--- a/examples/configs/dpo.yaml
+++ b/examples/configs/dpo.yaml
@@ -57,6 +57,9 @@ policy:
   dynamic_batching:
     enabled: false
 
+  sequence_packing:
+    enabled: false
+
   # makes the training sequence length divisible by the tensor parallel size
   # this is useful for sequence parallel training
   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
diff --git a/examples/configs/grpo-deepscaler-1.5b-8K.yaml b/examples/configs/grpo-deepscaler-1.5b-8K.yaml
index 9efc308a0a..08d021f582 100644
--- a/examples/configs/grpo-deepscaler-1.5b-8K.yaml
+++ b/examples/configs/grpo-deepscaler-1.5b-8K.yaml
@@ -55,6 +55,9 @@ policy:
   dynamic_batching:
     enabled: False
 
+  sequence_packing:
+    enabled: False
+
   # makes the training sequence length divisible by the tensor parallel size
   # this is useful for sequence parallel training
   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
diff --git a/examples/configs/grpo_deepscaler-1.5b-24K.yaml b/examples/configs/grpo_deepscaler-1.5b-24K.yaml
index f2552eea7e..dc9db4ceab 100644
--- a/examples/configs/grpo_deepscaler-1.5b-24K.yaml
+++ b/examples/configs/grpo_deepscaler-1.5b-24K.yaml
@@ -21,6 +21,9 @@ policy:
   dynamic_batching:
     enabled: False
 
+  sequence_packing:
+    enabled: False
+
   optimizer:
     name: "torch.optim.AdamW"
     kwargs:
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
index c003eea2e1..a388f7b2cc 100644
--- a/examples/configs/grpo_math_1B.yaml
+++ b/examples/configs/grpo_math_1B.yaml
@@ -51,6 +51,9 @@ policy:
     tensor_parallel_size: 1
     context_parallel_size: 1
     custom_parallel_plan: null
+  
+  megatron_cfg:
+    enabled: false
 
   # dynamic_batching improves performance by ensuring logprob and training microbatches
   # have a sufficent number of tokens to maximize GPU utilization. Specifically, variable length
@@ -58,9 +61,16 @@ policy:
   # amount of tokens is approximately close to 'train_mb_tokens' and 'logprob_mb_tokens' for the
   # training and logprob stages respectively.
   dynamic_batching:
+    enabled: False
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    sequence_length_round: 64
+
+  sequence_packing:
     enabled: True
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    algorithm: "modified_first_fit_decreasing"
     sequence_length_round: 64
 
   # makes the training sequence length divisible by the tensor parallel size
diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index 79a579e278..d58eb47aae 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -49,14 +49,19 @@ policy:
   # responses are sorted by sequence length and bucketed into microbatches with a total
   # amount of tokens is approximately close to 'train_mb_tokens' and 'logprob_mb_tokens' for the
   # training and logprob stages respectively.
+  # 
+  # We disable it for Megatron as it is incompatible with Pipeline parallelism. Instead, we use sequence packing
   dynamic_batching:
     enabled: False
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    sequence_length_round: 64
 
   sequence_packing:
-    enabled: False # coming soon
+    enabled: True
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    algorithm: "modified_ffd"
+    algorithm: "modified_first_fit_decreasing"
     sequence_length_round: 64
 
   max_grad_norm: 1.0
diff --git a/examples/configs/grpo_math_8B_megatron.yaml b/examples/configs/grpo_math_8B_megatron.yaml
index ef0e932b0c..004bc738b0 100644
--- a/examples/configs/grpo_math_8B_megatron.yaml
+++ b/examples/configs/grpo_math_8B_megatron.yaml
@@ -72,4 +72,4 @@ policy:
 
 cluster:
   gpus_per_node: 8
-  num_nodes: 1
\ No newline at end of file
+  num_nodes: 1
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
index 8655dede0a..b060004882 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
@@ -43,7 +43,10 @@ policy:
     custom_parallel_plan: null
 
   dynamic_batching:
-    enabled: False
+    enabled: false
+
+  sequence_packing:
+    enabled: false
 
   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
   max_grad_norm: 1.0
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
index e8e8f472c0..c34771595b 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
@@ -43,7 +43,10 @@ policy:
     custom_parallel_plan: null
   
   dynamic_batching:
-    enabled: False
+    enabled: false
+
+  sequence_packing:
+    enabled: false
 
   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
   max_grad_norm: 1.0
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
index 3dbb98006e..abc42f30eb 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
@@ -37,7 +37,10 @@ policy:
     enabled: false
 
   dynamic_batching:
-    enabled: False
+    enabled: false
+
+  sequence_packing:
+    enabled: false
 
   make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
   max_grad_norm: 1.0
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
index 082520095e..a571f32582 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
@@ -37,7 +37,10 @@ policy:
     enabled: false
 
   dynamic_batching:
-    enabled: False
+    enabled: false
+
+  sequence_packing:
+    enabled: false
 
   make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
   max_grad_norm: 1.0
diff --git a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
index afe19bf4ea..832d989b59 100644
--- a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
@@ -44,7 +44,10 @@ policy:
     custom_parallel_plan: null
 
   dynamic_batching:
-    enabled: False
+    enabled: false
+
+  sequence_packing:
+    enabled: false
 
   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
   max_grad_norm: 1.0
diff --git a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
index a2c61ebce9..b503afad4b 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
@@ -49,6 +49,8 @@ policy:
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
     sequence_length_round: 64
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 1
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
index 0fe72a150d..ea3188b9ae 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
@@ -50,6 +50,8 @@ policy:
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
     sequence_length_round: 64
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 8
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
index 2ad3228001..d29b88c4e0 100644
--- a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
@@ -49,6 +49,8 @@ policy:
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
     sequence_length_round: 64
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 1
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
index 3caf0ccdbd..355cd3a5d3 100644
--- a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -49,6 +49,8 @@ policy:
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
     sequence_length_round: 64
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 1
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
index ae6426e305..0ce93de5ae 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
@@ -49,6 +49,8 @@ policy:
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
     sequence_length_round: 64
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 8
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
index e4449ae147..45788b3172 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
@@ -49,6 +49,8 @@ policy:
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
     sequence_length_round: 64
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 8
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
index 585a8f5d88..ae0add9bd2 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
@@ -49,6 +49,8 @@ policy:
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
     sequence_length_round: 64
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 4
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
index 78bfeee82d..cce3f5b327 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -49,6 +49,8 @@ policy:
     train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
     logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
     sequence_length_round: 64
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 1
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
index a35a13533e..50aa3b96c6 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
@@ -34,7 +34,9 @@ policy:
     context_parallel_size: 1
     custom_parallel_plan: null
   dynamic_batching:
-    enabled: False
+    enabled: false
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 1
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
index 608edace8d..7a774c3654 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
@@ -34,7 +34,9 @@ policy:
     context_parallel_size: 1
     custom_parallel_plan: null
   dynamic_batching:
-    enabled: False
+    enabled: false
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 2
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
index 4fdfb5d37b..14c2f9692e 100644
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
@@ -28,7 +28,9 @@ policy:
   dtensor_cfg:
     enabled: false
   dynamic_batching:
-    enabled: False
+    enabled: false
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
   max_grad_norm: 1
   optimizer: null
diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
index 8dac5cb980..617ce45096 100644
--- a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
+++ b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
@@ -34,7 +34,9 @@ policy:
     context_parallel_size: 1
     custom_parallel_plan: null
   dynamic_batching:
-    enabled: False
+    enabled: false
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 1
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
index bf38f37eb7..6761e2f015 100644
--- a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
+++ b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
@@ -34,7 +34,9 @@ policy:
     context_parallel_size: 1
     custom_parallel_plan: null
   dynamic_batching:
-    enabled: False
+    enabled: false
+  sequence_packing:
+    enabled: false
   make_sequence_length_divisible_by: 8
   max_grad_norm: 1
   optimizer:
diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
index b14c6304dd..8a8b7d7129 100644
--- a/examples/configs/sft.yaml
+++ b/examples/configs/sft.yaml
@@ -44,6 +44,12 @@ policy:
   dynamic_batching:
     enabled: false
 
+  sequence_packing:
+    enabled: False
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    algorithm: "modified_first_fit_decreasing"
+    sequence_length_round: 64
+
   # makes the training sequence length divisible by the tensor parallel size
   # this is useful for sequence parallel training
   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
@@ -121,7 +127,7 @@ policy:
       average_in_collective: true
       data_parallel_sharding_strategy: "optim_grads_params"
 
-    
+
 data:
   max_input_seq_length: ${policy.max_total_sequence_length}
   dataset_name: "squad"
diff --git a/examples/configs/sft_openmathinstruct2.yaml b/examples/configs/sft_openmathinstruct2.yaml
index de9fab880a..aa128e5a99 100644
--- a/examples/configs/sft_openmathinstruct2.yaml
+++ b/examples/configs/sft_openmathinstruct2.yaml
@@ -40,6 +40,9 @@ policy:
   dynamic_batching:
     enabled: false
 
+  sequence_packing:
+    enabled: false
+
   # makes the training sequence length divisible by the tensor parallel size
   # this is useful for sequence parallel training
   make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
diff --git a/examples/run_sft.py b/examples/run_sft.py
index ce5b258b0c..df0d7ce3f7 100644
--- a/examples/run_sft.py
+++ b/examples/run_sft.py
@@ -31,6 +31,8 @@
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
 
+OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
+
 
 def parse_args():
     """Parse command line arguments."""
diff --git a/nemo_rl/algorithms/loss_functions.py b/nemo_rl/algorithms/loss_functions.py
index 1bf472d830..923e836554 100644
--- a/nemo_rl/algorithms/loss_functions.py
+++ b/nemo_rl/algorithms/loss_functions.py
@@ -114,6 +114,7 @@ def __call__(
         global_valid_toks: torch.Tensor,
         vocab_parallel_rank: Optional[int] = None,
         vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+        context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> tuple[torch.Tensor, dict]:
         """Clipped Policy Gradient RL loss function."""
         token_mask = data["token_mask"][:, 1:]
@@ -149,7 +150,10 @@ def __call__(
                 vocab_end_index=(vocab_parallel_rank + 1) * next_token_logits.shape[-1],
                 tp_group=vocab_parallel_group,
                 inference_only=False,
+                cp_group=context_parallel_group,
             )
+            # slice off to the correct length to remove potential CP padding
+            curr_logprobs = curr_logprobs[:, : data["input_ids"].shape[1] - 1]
         elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
             curr_logprobs = get_logprobs_from_vocab_parallel_logits(
                 next_token_logits, data["input_ids"], seq_index=seq_index
@@ -312,6 +316,7 @@ def __call__(
         global_valid_toks: Tensor,
         vocab_parallel_rank: Optional[int] = None,
         vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+        context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
         dpo_loss: bool = False,
         dpo_average_log_probs: bool = False,
     ) -> tuple[torch.Tensor, dict[str, Any]]:
@@ -335,7 +340,10 @@ def __call__(
                 vocab_end_index=(vocab_parallel_rank + 1) * next_token_logits.shape[-1],
                 tp_group=vocab_parallel_group,
                 inference_only=False,
+                cp_group=context_parallel_group,
             )
+            # slice off to the correct length to remove potential CP padding
+            token_logprobs = token_logprobs[:, : data["input_ids"].shape[1] - 1]
         elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
             token_logprobs = get_logprobs_from_vocab_parallel_logits(
                 next_token_logits, data["input_ids"]
@@ -466,6 +474,7 @@ def _preference_loss(
         global_valid_seqs: Tensor,
         vocab_parallel_rank: Optional[int] = None,
         vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+        context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> tuple[Tensor, Tensor, Tensor, Tensor]:
         ## TODO(@ashors): there's some duplicate code here with the NLLLoss function. We should refactor
         token_mask = data["token_mask"][:, 1:]
@@ -483,7 +492,10 @@ def _preference_loss(
                 vocab_end_index=(vocab_parallel_rank + 1) * next_token_logits.shape[-1],
                 tp_group=vocab_parallel_group,
                 inference_only=False,
+                cp_group=context_parallel_group,
             )
+            # slice off to the correct length to remove potential CP padding
+            token_logprobs = token_logprobs[:, : data["input_ids"].shape[1] - 1]
         elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
             token_logprobs = get_logprobs_from_vocab_parallel_logits(
                 next_token_logits, data["input_ids"]
@@ -548,6 +560,7 @@ def __call__(
         global_valid_toks: Tensor | None,
         vocab_parallel_rank: Optional[int] = None,
         vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+        context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> tuple[torch.Tensor, dict[str, Any]]:
         sft_loss_chosen = torch.tensor(0.0)
         if self.sft_loss_weight > 0:
@@ -561,6 +574,7 @@ def __call__(
                 global_valid_toks=global_valid_toks,  ## unused because sft loss returned is at the sample level
                 vocab_parallel_rank=vocab_parallel_rank,
                 vocab_parallel_group=vocab_parallel_group,
+                context_parallel_group=context_parallel_group,
                 dpo_loss=True,
                 dpo_average_log_probs=self.sft_average_log_probs,
             )
@@ -582,6 +596,7 @@ def __call__(
             global_valid_seqs,
             vocab_parallel_rank=vocab_parallel_rank,
             vocab_parallel_group=vocab_parallel_group,
+            context_parallel_group=context_parallel_group,
         )
 
         dpo_loss = (
@@ -601,3 +616,83 @@ def __call__(
             "rewards_rejected_mean": rewards_rejected_mean.item(),
             "num_valid_samples": num_valid_samples.item(),
         }
+
+
+class SequencePackingLossWrapper:
+    def __init__(
+        self,
+        loss_fn: LossFunction,
+        cu_seqlens_q: Tensor,
+        cu_seqlens_q_padded: Optional[Tensor] = None,
+    ):
+        self.loss_fn = loss_fn
+        self.cu_seqlens_q = cu_seqlens_q
+        self.cu_seqlens_q_padded = cu_seqlens_q_padded
+
+    def __call__(
+        self,
+        next_token_logits: Tensor,
+        data: BatchedDataDict[Any],
+        global_valid_seqs: Tensor | None,
+        global_valid_toks: Tensor | None,
+        vocab_parallel_rank: Optional[int] = None,
+        vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+        context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+    ) -> tuple[Tensor, dict[str, Any]]:
+        """Wraps a loss function to handle sequence packing by doing one sequence at a time to avoid excessive padding."""
+        unpadded_cu_seqlens = self.cu_seqlens_q
+        unpadded_seq_lengths = self.cu_seqlens_q[1:] - self.cu_seqlens_q[:-1]
+        if self.cu_seqlens_q_padded is not None:
+            padded_cu_seqlens = self.cu_seqlens_q_padded
+            padded_seq_lengths = (
+                self.cu_seqlens_q_padded[1:] - self.cu_seqlens_q_padded[:-1]
+            )
+        else:
+            padded_cu_seqlens = unpadded_cu_seqlens
+            padded_seq_lengths = unpadded_seq_lengths
+        seq_starts = padded_cu_seqlens[:-1]
+        seq_ends = padded_cu_seqlens[1:]
+
+        loss_accum = 0
+        metrics_accum = {}
+        for seq_idx in range(len(seq_starts)):
+            seq_start = seq_starts[seq_idx].item()
+            seq_end = seq_ends[seq_idx].item()
+
+            # get sequence and unpad all 'data' tensors. The data dict is a BatchedDataDict of unpacked tensors
+            seq_data = data.slice(seq_idx, seq_idx + 1)
+            unpadded_seq_data = {}
+            for k, v in seq_data.items():
+                if isinstance(v, torch.Tensor) and v.ndim > 1 and v.shape[1] > 1:
+                    unpadded_seq_data[k] = v[:, : unpadded_seq_lengths[seq_idx]]
+                else:
+                    unpadded_seq_data[k] = v
+
+            # get next_token_logits
+            cp_size = (
+                1
+                if context_parallel_group is None
+                else torch.distributed.get_world_size(context_parallel_group)
+            )
+            logit_slice_idxs = slice(
+                seq_start // cp_size,
+                (seq_start + padded_seq_lengths[seq_idx]) // cp_size,
+            )
+            next_token_logits_slice = next_token_logits[:, logit_slice_idxs, :]
+
+            loss, metrics = self.loss_fn(
+                next_token_logits_slice,
+                unpadded_seq_data,
+                global_valid_seqs,
+                global_valid_toks,
+                vocab_parallel_rank=vocab_parallel_rank,
+                vocab_parallel_group=vocab_parallel_group,
+                context_parallel_group=context_parallel_group,
+            )
+            loss_accum += loss
+            for k, v in metrics.items():
+                if k not in metrics_accum:
+                    metrics_accum[k] = 0
+                metrics_accum[k] += v
+
+        return loss_accum, metrics_accum
diff --git a/nemo_rl/data/packing/__init__.py b/nemo_rl/data/packing/__init__.py
new file mode 100644
index 0000000000..a955f681cc
--- /dev/null
+++ b/nemo_rl/data/packing/__init__.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from nemo_rl.data.packing.algorithms import (
+    ConcatenativePacker,
+    FirstFitDecreasingPacker,
+    FirstFitShufflePacker,
+    ModifiedFirstFitDecreasingPacker,
+    PackingAlgorithm,
+    SequencePacker,
+    get_packer,
+)
+from nemo_rl.data.packing.metrics import PackingMetrics
+
+__all__ = [
+    "PackingAlgorithm",
+    "SequencePacker",
+    "ConcatenativePacker",
+    "FirstFitDecreasingPacker",
+    "FirstFitShufflePacker",
+    "ModifiedFirstFitDecreasingPacker",
+    "get_packer",
+    "PackingMetrics",
+]
diff --git a/nemo_rl/data/packing/algorithms.py b/nemo_rl/data/packing/algorithms.py
new file mode 100644
index 0000000000..71e643f2b7
--- /dev/null
+++ b/nemo_rl/data/packing/algorithms.py
@@ -0,0 +1,571 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Sequence packing algorithms for efficient batching of variable-length sequences."""
+
+import enum
+import math
+import random
+from abc import ABC, abstractmethod
+from typing import Dict, List, Tuple, Type, Union
+
+
+class PackingAlgorithm(enum.Enum):
+    """Enum for supported sequence packing algorithms."""
+
+    CONCATENATIVE = "concatenative"
+    FIRST_FIT_DECREASING = "first_fit_decreasing"
+    FIRST_FIT_SHUFFLE = "first_fit_shuffle"
+    MODIFIED_FIRST_FIT_DECREASING = "modified_first_fit_decreasing"
+
+
+class SequencePacker(ABC):
+    """Abstract base class for sequence packing algorithms.
+
+    Sequence packing is the process of efficiently arranging sequences of different
+    lengths into fixed-capacity bins (batches) to maximize computational efficiency.
+    """
+
+    def __init__(self, bin_capacity: int, collect_metrics: bool = False):
+        """Initialize the sequence packer.
+
+        Args:
+            bin_capacity: The maximum capacity of each bin.
+            collect_metrics: Whether to collect metrics across multiple packing operations.
+        """
+        self.bin_capacity = bin_capacity
+        self.collect_metrics = collect_metrics
+        self.metrics = None
+
+        if collect_metrics:
+            from nemo_rl.data.packing.metrics import PackingMetrics
+
+            self.metrics = PackingMetrics()
+
+    @abstractmethod
+    def _pack_implementation(self, sequence_lengths: List[int]) -> List[List[int]]:
+        """Implementation of the packing algorithm.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to pack.
+
+        Returns:
+            A list of bins, where each bin is a list of indices into the original
+            sequence_lengths list.
+        """
+        pass
+
+    def pack(self, sequence_lengths: List[int]) -> List[List[int]]:
+        """Pack sequences into bins and update metrics if enabled.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to pack.
+
+        Returns:
+            A list of bins, where each bin is a list of indices into the original
+            sequence_lengths list.
+        """
+        # Call the implementation
+        bins = self._pack_implementation(sequence_lengths)
+
+        # Update metrics if collection is enabled
+        if self.collect_metrics and self.metrics:
+            self.metrics.update(sequence_lengths, bins, self.bin_capacity)
+
+        return bins
+
+    def reset_metrics(self) -> None:
+        """Reset collected metrics."""
+        if self.metrics:
+            self.metrics.reset()
+
+    def compute_metrics(
+        self, sequence_lengths: List[int], bins: List[List[int]]
+    ) -> Dict[str, float]:
+        """Calculate metrics for a packing solution without updating the metrics tracker.
+
+        Args:
+            sequence_lengths: List of sequence lengths
+            bins: List of bins, where each bin is a list of indices
+
+        Returns:
+            Dictionary of packing metrics
+        """
+        if self.metrics:
+            return self.metrics.calculate_stats_only(
+                sequence_lengths, bins, self.bin_capacity
+            )
+        else:
+            # Create a temporary metrics object if not collecting
+            from nemo_rl.data.packing.metrics import PackingMetrics
+
+            temp_metrics = PackingMetrics()
+            return temp_metrics.calculate_stats_only(
+                sequence_lengths, bins, self.bin_capacity
+            )
+
+    def get_aggregated_metrics(self) -> Dict[str, float]:
+        """Get aggregated metrics across all packing operations.
+
+        Returns:
+            Dictionary of aggregated metrics, or empty dict if not collecting
+        """
+        if self.metrics:
+            return self.metrics.get_aggregated_stats()
+        else:
+            return {}
+
+    def print_metrics(self) -> None:
+        """Print the current metrics in a formatted way."""
+        if not self.metrics:
+            print(
+                "Metrics collection is not enabled. Initialize with collect_metrics=True."
+            )
+            return
+
+        self.metrics.print_aggregated_stats()
+
+    def _validate_sequence_lengths(self, sequence_lengths: List[int]) -> None:
+        """Validate that all sequence lengths are within bin capacity.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to validate.
+
+        Raises:
+            ValueError: If any sequence length exceeds bin capacity.
+        """
+        for length in sequence_lengths:
+            if length > self.bin_capacity:
+                raise ValueError(
+                    f"Sequence length {length} exceeds bin capacity {self.bin_capacity}"
+                )
+
+    def _create_indexed_lengths(
+        self, sequence_lengths: List[int], reverse: bool = False
+    ) -> List[Tuple[int, int]]:
+        """Create a list of (length, index) pairs from sequence lengths.
+
+        Args:
+            sequence_lengths: A list of sequence lengths.
+            reverse: Whether to sort in descending order (True) or ascending order (False).
+
+        Returns:
+            A list of (length, index) pairs, optionally sorted.
+        """
+        indexed_lengths = [(length, i) for i, length in enumerate(sequence_lengths)]
+        if reverse:
+            indexed_lengths.sort(reverse=True)  # Sort in descending order
+        return indexed_lengths
+
+    def _estimate_bins_needed(self, sequence_lengths: List[int]) -> int:
+        """Estimate the number of bins needed based on total length.
+
+        Args:
+            sequence_lengths: A list of sequence lengths.
+
+        Returns:
+            Estimated number of bins needed.
+        """
+        total_length = sum(sequence_lengths)
+        return max(1, math.ceil(total_length / self.bin_capacity))
+
+
+class ConcatenativePacker(SequencePacker):
+    """Concatenative packing algorithm.
+
+    This algorithm simply concatenates sequences in order until reaching the bin capacity,
+    then starts a new bin. It doesn't try to optimize the packing in any way.
+
+    Time complexity: O(n) where n is the number of sequences.
+
+    Example:
+    ```python
+    >>> examples = {
+    ...     "sequence_lengths": [4, 1, 3, 2, 1, 3, 4, 5]
+    ... }
+    >>> # If packed with seq_length=5:
+    ... {"bins": [ [0, 1], [2, 3], [4, 5], [6], [7] ]}
+    >>> # If packed with seq_length=8:
+    ... {"bins": [ [0, 1, 2], [3, 4, 5], [6], [7] ]}
+    """
+
+    # Global class variable to limit the number of sequences packed in a unit
+    # -1 disables this limit
+    max_sequences_per_bin = 4  # Useful for debugging and testing
+
+    def _pack_implementation(self, sequence_lengths: List[int]) -> List[List[int]]:
+        """Pack sequences using the Concatenative algorithm.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to pack.
+
+        Returns:
+            A list of bins, where each bin is a list of indices into the original
+            sequence_lengths list.
+        """
+        # Validate sequence lengths
+        self._validate_sequence_lengths(sequence_lengths)
+
+        bins = []  # List of bins, each bin is a list of sequence indices
+        current_bin = []  # Current bin being filled
+        current_length = 0  # Current length of sequences in the bin
+
+        for i, length in enumerate(sequence_lengths):
+            # Check if adding this sequence would exceed bin capacity or sequence limit
+            exceeds_capacity = current_length + length > self.bin_capacity
+            exceeds_sequence_limit = (
+                self.max_sequences_per_bin != -1
+                and len(current_bin) >= self.max_sequences_per_bin
+            )
+
+            # If adding this sequence would exceed constraints, start a new bin
+            if exceeds_capacity or exceeds_sequence_limit:
+                if current_bin:  # Only add the bin if it's not empty
+                    bins.append(current_bin)
+                current_bin = [i]
+                current_length = length
+            else:
+                # Add the sequence to the current bin
+                current_bin.append(i)
+                current_length += length
+
+        # Add the last bin if it's not empty
+        if current_bin:
+            bins.append(current_bin)
+
+        return bins
+
+
+class FirstFitPacker(SequencePacker):
+    """Base class for First-Fit algorithms.
+
+    First-Fit algorithms place each sequence into the first bin where it fits.
+    If no bin can fit the sequence, a new bin is created.
+
+    This is an abstract base class that provides the common implementation for
+    First-Fit variants. Subclasses must implement the _prepare_sequences method
+    to determine the order in which sequences are processed.
+    """
+
+    def _prepare_sequences(self, sequence_lengths: List[int]) -> List[Tuple[int, int]]:
+        """Prepare sequences for packing.
+
+        This method determines the order in which sequences are processed.
+        Subclasses must override this method.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to pack.
+
+        Returns:
+            A list of (length, index) pairs.
+        """
+        raise NotImplementedError("Subclasses must implement _prepare_sequences")
+
+    def _pack_implementation(self, sequence_lengths: List[int]) -> List[List[int]]:
+        """Pack sequences using the First-Fit algorithm.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to pack.
+
+        Returns:
+            A list of bins, where each bin is a list of indices into the original
+            sequence_lengths list.
+        """
+        # Prepare sequences for packing (order determined by subclass)
+        indexed_lengths = self._prepare_sequences(sequence_lengths)
+
+        bins = []  # List of bins, each bin is a list of sequence indices
+        bin_remaining = []  # Remaining capacity for each bin
+
+        for length, idx in indexed_lengths:
+            # If the sequence is larger than the bin capacity, it cannot be packed
+            if length > self.bin_capacity:
+                raise ValueError(
+                    f"Sequence length {length} exceeds bin capacity {self.bin_capacity}"
+                )
+
+            # Try to find a bin where the sequence fits
+            bin_found = False
+            for i, remaining in enumerate(bin_remaining):
+                if remaining >= length:
+                    # Add the sequence to this bin
+                    bins[i].append(idx)
+                    bin_remaining[i] -= length
+                    bin_found = True
+                    break
+
+            # If no suitable bin was found, create a new one
+            if not bin_found:
+                bins.append([idx])
+                bin_remaining.append(self.bin_capacity - length)
+
+        return bins
+
+
+class FirstFitDecreasingPacker(FirstFitPacker):
+    """First-Fit Decreasing (FFD) algorithm for sequence packing.
+
+    This algorithm sorts sequences by length in descending order and then
+    places each sequence into the first bin where it fits.
+
+    Time complexity: O(n log n) for sorting + O(n * m) for packing,
+    where n is the number of sequences and m is the number of bins.
+    """
+
+    def _prepare_sequences(self, sequence_lengths: List[int]) -> List[Tuple[int, int]]:
+        """Prepare sequences for packing by sorting them in descending order.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to pack.
+
+        Returns:
+            A list of (length, index) pairs sorted by length in descending order.
+        """
+        # Create a list of (length, index) pairs
+        indexed_lengths = [(length, i) for i, length in enumerate(sequence_lengths)]
+
+        # Sort by length in descending order
+        indexed_lengths.sort(reverse=True)
+
+        return indexed_lengths
+
+
+class FirstFitShufflePacker(FirstFitPacker):
+    """First-Fit Shuffle algorithm for sequence packing.
+
+    This algorithm randomly shuffles the sequences and then places each
+    sequence into the first bin where it fits.
+
+    Time complexity: O(n * m) for packing, where n is the number of sequences
+    and m is the number of bins.
+    """
+
+    def _prepare_sequences(self, sequence_lengths: List[int]) -> List[Tuple[int, int]]:
+        """Prepare sequences for packing by randomly shuffling them.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to pack.
+
+        Returns:
+            A list of (length, index) pairs in random order.
+        """
+        # Create a list of (length, index) pairs
+        indexed_lengths = [(length, i) for i, length in enumerate(sequence_lengths)]
+
+        # Shuffle the sequences
+        random.shuffle(indexed_lengths)
+
+        return indexed_lengths
+
+
+class ModifiedFirstFitDecreasingPacker(SequencePacker):
+    """Modified First-Fit Decreasing (MFFD) algorithm for sequence packing.
+
+    This algorithm implements the Johnson & Garey (1985) Modified First-Fit-Decreasing
+    heuristic. It classifies items into four categories (large, medium, small, tiny)
+    and uses a sophisticated 5-phase packing strategy to achieve better bin utilization
+    than standard First-Fit Decreasing.
+
+    The algorithm phases:
+    1. Classify items by size relative to bin capacity
+    2. Create one bin per large item
+    3. Add medium items to large bins (forward pass)
+    4. Add pairs of small items to bins with medium items (backward pass)
+    5. Greedily fit remaining items
+    6. Apply FFD to any leftovers
+
+    Time complexity: O(n log n) for sorting + O(n * m) for packing,
+    where n is the number of sequences and m is the number of bins.
+    """
+
+    def _classify_items(
+        self, items: List[Tuple[int, int]]
+    ) -> Tuple[
+        List[Tuple[int, int]],
+        List[Tuple[int, int]],
+        List[Tuple[int, int]],
+        List[Tuple[int, int]],
+    ]:
+        """Split items into large / medium / small / tiny classes.
+
+        Follows the classification used by Johnson & Garey:
+            large   : (C/2, C]
+            medium  : (C/3, C/2]
+            small   : (C/6, C/3]
+            tiny    : (0  , C/6]
+
+        Args:
+            items: List of (index, size) tuples
+
+        Returns:
+            Tuple of four lists (large, medium, small, tiny) without additional sorting.
+        """
+        large, medium, small, tiny = [], [], [], []
+        for idx, size in items:
+            if size > self.bin_capacity / 2:
+                large.append((idx, size))
+            elif size > self.bin_capacity / 3:
+                medium.append((idx, size))
+            elif size > self.bin_capacity / 6:
+                small.append((idx, size))
+            else:
+                tiny.append((idx, size))
+        return large, medium, small, tiny
+
+    def _pack_implementation(self, sequence_lengths: List[int]) -> List[List[int]]:
+        """Pack sequences using the Modified First-Fit Decreasing algorithm.
+
+        Args:
+            sequence_lengths: A list of sequence lengths to pack.
+
+        Returns:
+            A list of bins, where each bin is a list of indices into the original
+            sequence_lengths list.
+        """
+        # Validate inputs
+        if self.bin_capacity <= 0:
+            raise ValueError("bin_capacity must be positive")
+        if any(l <= 0 for l in sequence_lengths):
+            raise ValueError("sequence lengths must be positive")
+
+        # Validate sequence lengths don't exceed capacity
+        self._validate_sequence_lengths(sequence_lengths)
+
+        items: List[Tuple[int, int]] = [(i, l) for i, l in enumerate(sequence_lengths)]
+
+        # Phase-0: classify
+        large, medium, small, tiny = self._classify_items(items)
+
+        # Sort according to the rules of MFFD
+        large.sort(key=lambda x: x[1], reverse=True)  # descending size
+        medium.sort(key=lambda x: x[1], reverse=True)
+        small.sort(key=lambda x: x[1])  # ascending size
+        tiny.sort(key=lambda x: x[1])
+
+        # Phase-1: start one bin per large item
+        bins: List[List[Tuple[int, int]]] = [[item] for item in large]
+
+        # Phase-2: try to add one medium item to each large bin (forward pass)
+        for b in bins:
+            remaining = self.bin_capacity - sum(size for _, size in b)
+            for i, (idx, size) in enumerate(medium):
+                if size <= remaining:
+                    b.append(medium.pop(i))
+                    break
+
+        # Phase-3: backward pass – fill with two small items where possible
+        for b in reversed(bins):
+            has_medium = any(
+                self.bin_capacity / 3 < size <= self.bin_capacity / 2 for _, size in b
+            )
+            if has_medium or len(small) < 2:
+                continue
+            remaining = self.bin_capacity - sum(size for _, size in b)
+            if small[0][1] + small[1][1] > remaining:
+                continue
+            first_small = small.pop(0)
+            # pick the *largest* small that fits with first_small (so iterate from end)
+            second_idx = None
+            for j in range(len(small) - 1, -1, -1):
+                if small[j][1] <= remaining - first_small[1]:
+                    second_idx = j
+                    break
+            if second_idx is not None:
+                second_small = small.pop(second_idx)
+                b.extend([first_small, second_small])
+
+        # Phase-4: forward greedy fit of remaining items
+        remaining_items = sorted(
+            medium + small + tiny, key=lambda x: x[1], reverse=True
+        )
+        for b in bins:
+            while remaining_items:
+                rem = self.bin_capacity - sum(size for _, size in b)
+                # if even the smallest remaining doesn't fit we break
+                if rem < remaining_items[-1][1]:
+                    break
+
+                # pick the first (largest) that fits
+                chosen_idx = None
+                for i, (_, size) in enumerate(remaining_items):
+                    if size <= rem:
+                        chosen_idx = i
+                        break
+                if chosen_idx is None:
+                    break
+                b.append(remaining_items.pop(chosen_idx))
+
+        # Phase-5: FFD on leftovers
+        leftovers = remaining_items  # renamed for clarity
+        ffd_bins: List[List[Tuple[int, int]]] = []
+        for idx, size in sorted(leftovers, key=lambda x: x[1], reverse=True):
+            placed = False
+            for bin_ffd in ffd_bins:
+                if size <= self.bin_capacity - sum(s for _, s in bin_ffd):
+                    bin_ffd.append((idx, size))
+                    placed = True
+                    break
+            if not placed:
+                ffd_bins.append([(idx, size)])
+        bins.extend(ffd_bins)
+
+        # Convert to list of index lists (discard sizes)
+        return [[idx for idx, _ in b] for b in bins]
+
+
+def get_packer(
+    algorithm: Union[PackingAlgorithm, str],
+    bin_capacity: int,
+    collect_metrics: bool = False,
+) -> SequencePacker:
+    """Factory function to get a sequence packer based on the algorithm.
+
+    Args:
+        algorithm: The packing algorithm to use. Can be either a PackingAlgorithm enum value
+                  or a string (case-insensitive) matching one of the enum names.
+        bin_capacity: The maximum capacity of each bin.
+        collect_metrics: Whether to collect metrics across multiple packing operations.
+
+    Returns:
+        A SequencePacker instance for the specified algorithm.
+
+    Raises:
+        ValueError: If the algorithm is not recognized.
+    """
+    packers: Dict[PackingAlgorithm, Type[SequencePacker]] = {
+        PackingAlgorithm.CONCATENATIVE: ConcatenativePacker,
+        PackingAlgorithm.FIRST_FIT_DECREASING: FirstFitDecreasingPacker,
+        PackingAlgorithm.FIRST_FIT_SHUFFLE: FirstFitShufflePacker,
+        PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING: ModifiedFirstFitDecreasingPacker,
+    }
+
+    # Convert string to enum if needed
+    if isinstance(algorithm, str):
+        try:
+            algorithm = PackingAlgorithm[algorithm.upper()]
+        except KeyError:
+            available_algorithms = ", ".join([alg.name for alg in PackingAlgorithm])
+            raise ValueError(
+                f"Unknown packing algorithm: {algorithm}. "
+                f"Available algorithms: {available_algorithms}"
+            )
+
+    if algorithm not in packers:
+        available_algorithms = ", ".join([alg.name for alg in PackingAlgorithm])
+        raise ValueError(
+            f"Unknown packing algorithm: {algorithm}. "
+            f"Available algorithms: {available_algorithms}"
+        )
+
+    return packers[algorithm](bin_capacity, collect_metrics=collect_metrics)
diff --git a/nemo_rl/data/packing/metrics.py b/nemo_rl/data/packing/metrics.py
new file mode 100644
index 0000000000..f4c8da0aae
--- /dev/null
+++ b/nemo_rl/data/packing/metrics.py
@@ -0,0 +1,249 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Metrics for evaluating sequence packing algorithms."""
+
+import math
+import statistics
+from typing import Dict, List, Optional
+
+
+class PackingMetrics:
+    """Class for tracking and computing metrics for sequence packing algorithms.
+
+    This class provides methods to calculate various metrics that evaluate the
+    efficiency and effectiveness of sequence packing algorithms, such as bin
+    utilization, waste, and imbalance.
+    """
+
+    def __init__(self):
+        """Initialize the metrics tracker."""
+        self.reset()
+
+    def reset(self) -> None:
+        """Reset all metrics."""
+        # Counters for aggregated metrics
+        self.total_sequences = 0
+        self.total_bins = 0
+        self.total_sequence_length = 0
+        self.total_bin_capacity = 0
+        self.total_waste = 0
+        self.bin_utilizations = []
+        self.bin_counts = []
+        self.packing_times = []
+
+        # Tracking best and worst cases
+        self.min_utilization = 1.0
+        self.max_utilization = 0.0
+        self.min_waste_ratio = 1.0
+        self.max_waste_ratio = 0.0
+
+    def update(
+        self,
+        sequence_lengths: List[int],
+        bins: List[List[int]],
+        bin_capacity: int,
+        packing_time: Optional[float] = None,
+    ) -> Dict[str, float]:
+        """Update metrics with a new packing solution.
+
+        Args:
+            sequence_lengths: List of sequence lengths
+            bins: List of bins, where each bin is a list of indices
+            bin_capacity: Maximum capacity of each bin
+            packing_time: Optional time taken to compute the packing solution
+
+        Returns:
+            Dictionary of metrics for this packing solution
+        """
+        # Calculate metrics for this solution
+        stats = self.calculate_stats_only(sequence_lengths, bins, bin_capacity)
+
+        # Update counters
+        self.total_sequences += len(sequence_lengths)
+        self.total_bins += len(bins)
+        self.total_sequence_length += sum(sequence_lengths)
+        self.total_bin_capacity += len(bins) * bin_capacity
+        self.total_waste += stats["total_waste"]
+        self.bin_utilizations.append(stats["average_utilization"])
+        self.bin_counts.append(len(bins))
+
+        if packing_time is not None:
+            self.packing_times.append(packing_time)
+
+        # Update min/max values
+        self.min_utilization = min(self.min_utilization, stats["average_utilization"])
+        self.max_utilization = max(self.max_utilization, stats["average_utilization"])
+        self.min_waste_ratio = min(self.min_waste_ratio, stats["waste_ratio"])
+        self.max_waste_ratio = max(self.max_waste_ratio, stats["waste_ratio"])
+
+        return stats
+
+    def calculate_stats_only(
+        self, sequence_lengths: List[int], bins: List[List[int]], bin_capacity: int
+    ) -> Dict[str, float]:
+        """Calculate metrics for a packing solution without updating the tracker.
+
+        Args:
+            sequence_lengths: List of sequence lengths
+            bins: List of bins, where each bin is a list of indices
+            bin_capacity: Maximum capacity of each bin
+
+        Returns:
+            Dictionary of metrics for this packing solution
+        """
+        if not bins:
+            return {
+                "num_sequences": 0,
+                "num_bins": 0,
+                "total_sequence_length": 0,
+                "total_bin_capacity": 0,
+                "total_waste": 0,
+                "average_utilization": 0.0,
+                "waste_ratio": 0.0,
+                "bin_balance": 0.0,
+                "theoretical_min_bins": 0,
+                "bin_efficiency": 0.0,
+            }
+
+        # Calculate bin loads
+        bin_loads = [
+            sum(sequence_lengths[idx] for idx in bin_indices) for bin_indices in bins
+        ]
+
+        # Calculate basic metrics
+        num_sequences = len(sequence_lengths)
+        num_bins = len(bins)
+        total_sequence_length = sum(sequence_lengths)
+        total_bin_capacity = num_bins * bin_capacity
+        total_waste = total_bin_capacity - total_sequence_length
+
+        # Calculate utilization metrics
+        bin_utilizations = [load / bin_capacity for load in bin_loads]
+        average_utilization = total_sequence_length / total_bin_capacity
+        waste_ratio = total_waste / total_bin_capacity
+
+        # Calculate bin balance metrics (standard deviation of utilization)
+        if num_bins > 1:
+            bin_balance = 1.0 - statistics.stdev(bin_utilizations) / average_utilization
+        else:
+            bin_balance = 1.0
+
+        # Calculate theoretical minimum number of bins
+        theoretical_min_bins = math.ceil(total_sequence_length / bin_capacity)
+
+        # Calculate bin efficiency (ratio of theoretical min bins to actual bins)
+        bin_efficiency = theoretical_min_bins / num_bins if num_bins > 0 else 0.0
+
+        return {
+            "num_sequences": num_sequences,
+            "num_bins": num_bins,
+            "total_sequence_length": total_sequence_length,
+            "total_bin_capacity": total_bin_capacity,
+            "total_waste": total_waste,
+            "average_utilization": average_utilization,
+            "waste_ratio": waste_ratio,
+            "bin_balance": bin_balance,
+            "theoretical_min_bins": theoretical_min_bins,
+            "bin_efficiency": bin_efficiency,
+        }
+
+    def get_aggregated_stats(self) -> Dict[str, float]:
+        """Get aggregated metrics across all packing operations.
+
+        Returns:
+            Dictionary of aggregated metrics
+        """
+        if not self.bin_utilizations:
+            return {}
+
+        # Calculate aggregated metrics
+        avg_utilization = (
+            self.total_sequence_length / self.total_bin_capacity
+            if self.total_bin_capacity > 0
+            else 0.0
+        )
+        avg_waste_ratio = (
+            self.total_waste / self.total_bin_capacity
+            if self.total_bin_capacity > 0
+            else 0.0
+        )
+        avg_bin_count = (
+            sum(self.bin_counts) / len(self.bin_counts) if self.bin_counts else 0.0
+        )
+
+        # Calculate theoretical minimum number of bins
+        theoretical_min_bins = (
+            math.ceil(
+                self.total_sequence_length / (self.total_bin_capacity / self.total_bins)
+            )
+            if self.total_bins > 0
+            else 0
+        )
+
+        # Calculate bin efficiency (ratio of theoretical min bins to actual bins)
+        bin_efficiency = (
+            theoretical_min_bins / self.total_bins if self.total_bins > 0 else 0.0
+        )
+
+        # Calculate average packing time if available
+        avg_packing_time = (
+            sum(self.packing_times) / len(self.packing_times)
+            if self.packing_times
+            else None
+        )
+
+        stats = {
+            "total_sequences": self.total_sequences,
+            "total_bins": self.total_bins,
+            "average_utilization": avg_utilization,
+            "min_utilization": self.min_utilization,
+            "max_utilization": self.max_utilization,
+            "average_waste_ratio": avg_waste_ratio,
+            "min_waste_ratio": self.min_waste_ratio,
+            "max_waste_ratio": self.max_waste_ratio,
+            "average_bin_count": avg_bin_count,
+            "bin_efficiency": bin_efficiency,
+        }
+
+        if avg_packing_time is not None:
+            stats["average_packing_time"] = avg_packing_time
+
+        return stats
+
+    def print_aggregated_stats(self) -> None:
+        """Print the aggregated metrics in a formatted way."""
+        stats = self.get_aggregated_stats()
+
+        if not stats:
+            print("No metrics collected yet.")
+            return
+
+        print("\n=== Packing Metrics Summary ===")
+        print(f"Total sequences packed: {stats['total_sequences']}")
+        print(f"Total bins used: {stats['total_bins']}")
+        print(
+            f"Average bin utilization: {stats['average_utilization']:.4f} (min: {stats['min_utilization']:.4f}, max: {stats['max_utilization']:.4f})"
+        )
+        print(
+            f"Average waste ratio: {stats['average_waste_ratio']:.4f} (min: {stats['min_waste_ratio']:.4f}, max: {stats['max_waste_ratio']:.4f})"
+        )
+        print(
+            f"Bin efficiency (theoretical min bins / actual bins): {stats['bin_efficiency']:.4f}"
+        )
+
+        if "average_packing_time" in stats:
+            print(f"Average packing time: {stats['average_packing_time']:.6f} seconds")
+
+        print("===============================\n")
diff --git a/nemo_rl/distributed/batched_data_dict.py b/nemo_rl/distributed/batched_data_dict.py
index dc30d68364..969738d203 100644
--- a/nemo_rl/distributed/batched_data_dict.py
+++ b/nemo_rl/distributed/batched_data_dict.py
@@ -28,6 +28,7 @@
 import torch
 from typing_extensions import Self
 
+from nemo_rl.data.packing import get_packer
 from nemo_rl.distributed.collectives import (
     gather_jagged_object_lists,
     rebalance_nd_tensor,
@@ -36,6 +37,21 @@
 DictT = TypeVar("DictT", bound=Mapping[str, Any])
 
 
+class SequencePackingArgs(TypedDict):
+    """Configuration settings for sequence packing.
+
+    Pass this to 'shard_by_batch_size()' to preprocess batches for sequence packing.
+    """
+
+    max_tokens_per_microbatch: int
+    input_key: str
+    input_lengths_key: str
+    algorithm: str
+    sequence_length_pad_multiple: (
+        int  # pad each sequence to a multiple of this value (for CP/TP alignment)
+    )
+
+
 class DynamicBatchingArgs(TypedDict):
     """Configuration settings for dynamic batching.
 
@@ -58,6 +74,7 @@ def __init__(self, *args, **kwargs):
 
         self.micro_batch_indices = None
         self.micro_batch_lengths = None
+        self.elem_counts_per_gb = None
 
     @classmethod
     def from_batches(
@@ -204,6 +221,7 @@ def shard_by_batch_size(
         batch_size: Optional[int] = None,
         allow_uneven_shards: bool = False,
         dynamic_batching_args: Optional[DynamicBatchingArgs] = None,
+        sequence_packing_args: Optional[SequencePackingArgs] = None,
     ) -> list["SlicedDataDict"] | tuple[list["SlicedDataDict"], list[int]]:
         """Shards a batch by first dividing it into chunks of size batch_size, then further dividing each chunk into shards equal parts. Finally aggregates the sub-shards by their position.
 
@@ -219,7 +237,7 @@ def shard_by_batch_size(
             allow_uneven_shards (bool): Whether to allow shards to be unevenly sized.
                                         If True, the last shard may be smaller than the others.
             dynamic_batching_args (dict): If passed, preprocess batch for dynamic batching. This
-                                            dict requires two keys:
+                                            dict requires four keys:
                                             1. max_tokens_per_microbatch (int): the maximum
                                                 number of tokens in a microbatch
                                             2. sequence_length_round (int): round each all
@@ -229,6 +247,21 @@ def shard_by_batch_size(
                                             4. input_lengths_key (str): the key in the batch
                                                 which holds the sequence length per value.
                                                 The sequence dim index is assumed to be 1.
+                                          Cannot be passed with sequence_packing_args.
+
+            sequence_packing_args (dict): If passed, preprocess batch for sequence packing. This
+                                            dict requires five keys:
+                                            1. max_tokens_per_microbatch (int): the maximum
+                                                number of tokens in a microbatch
+                                            2. input_key (str): the key in the batch
+                                                which holds input ids.
+                                            3. input_lengths_key (str): the key in the batch
+                                                which holds the sequence length per value.
+                                                The sequence dim index is assumed to be 1.
+                                            4. algorithm (str): the algorithm to use for sequence packing.
+                                            5. sequence_length_pad_multiple (int): the multiple to pad each sequence to.
+                                               With CP enabled, this should be set to a multiple of 2*CP and SP.
+                                          Cannot be passed with dynamic_batching_args.
 
         Returns:
             list[BatchedDataDict]: A list of BatchedDataDicts, length equal to shards.
@@ -268,6 +301,9 @@ def shard_by_batch_size(
             assert batch_size is None, (
                 "batch_size must be None if allow_uneven_shards is True"
             )
+        assert dynamic_batching_args is None or sequence_packing_args is None, (
+            "dynamic_batching_args and sequence_packing_args cannot be passed together"
+        )
 
         # Get the total batch size
         batch_sizes = set()
@@ -336,6 +372,112 @@ def shard_by_batch_size(
                 else:
                     sorted_v = [v[i] for i in batch_sorted_indices]
                 data[k] = sorted_v
+
+        elif sequence_packing_args is not None:
+            bin_packer = get_packer(
+                algorithm=sequence_packing_args["algorithm"],
+                bin_capacity=sequence_packing_args["max_tokens_per_microbatch"],
+                collect_metrics=False,  # TODO(ahmadki): make configurable
+            )
+
+            input_lengths_key = sequence_packing_args["input_lengths_key"]
+            input_lens = self.data[input_lengths_key]
+            if not isinstance(input_lens, torch.Tensor):
+                input_lens = torch.tensor(input_lens)
+
+            pad_multiple = sequence_packing_args["sequence_length_pad_multiple"]
+
+            def _get_padded_seqlen(seqlen: int) -> int:
+                return (seqlen + pad_multiple - 1) // pad_multiple * pad_multiple
+
+            # Store bin assignments for each chunk to reuse later
+            all_chunk_bin_assignments = []
+
+            # Process each chunk separately to respect chunk boundaries
+            for chunk_idx in range(num_chunks):
+                chunk_start = chunk_idx * batch_size
+                chunk_end = (chunk_idx + 1) * batch_size
+
+                # Get sequence lengths for this chunk
+                chunk_seqlens = input_lens[chunk_start:chunk_end]
+                chunk_padded_seqlens_list = [
+                    _get_padded_seqlen(seq_len.item()) for seq_len in chunk_seqlens
+                ]
+
+                # Pack sequences in this chunk into bins
+                chunk_bin_assignments = bin_packer.pack(
+                    sequence_lengths=chunk_padded_seqlens_list,
+                )
+                all_chunk_bin_assignments.append(chunk_bin_assignments)
+
+            # create shards with the packed bins
+            sharded_data: list[list[dict]] = [[] for _ in range(shards)]
+            sharded_micro_indices: list = [[] for _ in range(shards)]
+            sharded_micro_lengths: list = [[] for _ in range(shards)]
+            sharded_elem_counts_per_gb: list = [[] for _ in range(shards)]
+            global_indices_per_shard: list[list[int]] = [[] for _ in range(shards)]
+            for chunk_idx in range(num_chunks):
+                chunk_sharded_micro_indices: list[list[list[int]]] = [
+                    [] for _ in range(shards)
+                ]
+                chunk_sharded_micro_lengths: list[list[int]] = [
+                    [] for _ in range(shards)
+                ]
+
+                num_bins = len(all_chunk_bin_assignments[chunk_idx])
+                chunk_start = chunk_idx * batch_size
+                for bin_idx in range(num_bins):
+                    shard_idx = bin_idx % shards
+                    bin_indices = all_chunk_bin_assignments[chunk_idx][bin_idx]
+                    global_bin_indices = [i + chunk_start for i in bin_indices]
+                    sharded_data[shard_idx].append(
+                        self.select_indices(global_bin_indices)
+                    )
+                    global_indices_per_shard[shard_idx].extend(global_bin_indices)
+                    bin_seqlen = sum(
+                        [
+                            _get_padded_seqlen(input_lens[i].item())
+                            for i in global_bin_indices
+                        ]
+                    )
+
+                    if chunk_sharded_micro_indices[shard_idx] == []:
+                        chunk_sharded_micro_indices[shard_idx].append(
+                            [0, len(bin_indices)]
+                        )
+                    else:
+                        prev_bin_end = chunk_sharded_micro_indices[shard_idx][-1][1]
+                        chunk_sharded_micro_indices[shard_idx].append(
+                            [prev_bin_end, prev_bin_end + len(bin_indices)]
+                        )
+                    chunk_sharded_micro_lengths[shard_idx].append(bin_seqlen)
+
+                for shard_idx in range(shards):
+                    sharded_micro_indices[shard_idx].append(
+                        chunk_sharded_micro_indices[shard_idx]
+                    )
+                    sharded_micro_lengths[shard_idx].append(
+                        chunk_sharded_micro_lengths[shard_idx]
+                    )
+                    sharded_elem_counts_per_gb[shard_idx].append(
+                        chunk_sharded_micro_indices[shard_idx][-1][1]
+                    )
+
+            # flatten global_indices_per_shard
+            batch_sorted_indices = []
+            for shard_idx in range(shards):
+                batch_sorted_indices.extend(global_indices_per_shard[shard_idx])
+
+            aggregated_shards = []
+            for shard_idx in range(shards):
+                shard = SlicedDataDict.from_batches(sharded_data[shard_idx])
+                shard.micro_batch_indices = sharded_micro_indices[shard_idx]
+                shard.micro_batch_lengths = sharded_micro_lengths[shard_idx]
+                shard.elem_counts_per_gb = sharded_elem_counts_per_gb[shard_idx]
+                aggregated_shards.append(shard)
+
+            return aggregated_shards, batch_sorted_indices
+
         else:
             data = self.data
 
@@ -457,7 +599,7 @@ def shard_by_batch_size(
 
         return aggregated_shards
 
-    def get_batch(self, batch_idx, batch_size) -> "SlicedDataDict":
+    def get_batch(self, batch_idx, batch_size=None) -> "SlicedDataDict":
         """Slices a subbatch from the batch.
 
         Args:
@@ -467,6 +609,21 @@ def get_batch(self, batch_idx, batch_size) -> "SlicedDataDict":
         Returns:
             BatchedDataDict: A new BatchedDataDict containing the sliced data
         """
+        if self.elem_counts_per_gb is not None:
+            assert self.micro_batch_indices is not None, (
+                "micro_batch_indices must be provided if sequence_packing is True"
+            )
+            elem_count = self.elem_counts_per_gb[batch_idx]
+            cum_elem_count = [0]
+            for i in range(len(self.elem_counts_per_gb)):
+                cum_elem_count.append(cum_elem_count[i] + self.elem_counts_per_gb[i])
+
+            batch = self.slice(cum_elem_count[batch_idx], cum_elem_count[batch_idx + 1])
+            batch.micro_batch_indices = [self.micro_batch_indices[batch_idx]]
+            batch.micro_batch_lengths = [self.micro_batch_lengths[batch_idx]]  # type: ignore # This exists if idxs do
+            batch.elem_counts_per_gb = [elem_count]
+            return batch
+
         start = batch_size * batch_idx
         end = batch_size * (batch_idx + 1)
         batch = self.slice(start, end)
@@ -488,6 +645,10 @@ def slice(self, start: int, end: int) -> "SlicedDataDict":
         """
         sliced_batch = SlicedDataDict()
         for k in self.data:
+            if isinstance(self.data[k], torch.Tensor):
+                assert end <= self.data[k].shape[0], (
+                    f"end: {end} is greater than the shape of the tensor: {self.data[k].shape[0]} for key: {k}"
+                )
             sliced_batch[k] = self.data[k][start:end]
         return sliced_batch
 
@@ -520,7 +681,7 @@ def make_microbatch_iterator_with_dynamic_shapes(
         self,
         sequence_dim: int = 1,
     ) -> Iterator["SlicedDataDict"]:
-        """Makes an interator that yields microbatchs of dynamic batch and sequence sizes.
+        """Makes an iterator that yields microbatchs of dynamic batch and sequence sizes.
 
         Args:
             sequence_dim: the index of the sequence dim for all tensors in the data dict
@@ -542,9 +703,29 @@ def make_microbatch_iterator_with_dynamic_shapes(
             yield mb
 
     def get_microbatch_iterator_dynamic_shapes_len(self) -> int:
-        """Get the length of the microbatch iterator with dynamic shapes."""
+        """Get the length of the microbatch iterator for dynamic shapes."""
         return len(self.micro_batch_indices[0])
 
+    def make_microbatch_iterator_for_packable_sequences(
+        self,
+    ) -> Iterator["SlicedDataDict"]:
+        """Make an iterator over the batch that yields microbatches that can be packed into a given max_tokens_per_microbatch."""
+        assert (
+            self.micro_batch_indices is not None
+            and len(self.micro_batch_indices) == 1
+            and self.micro_batch_lengths is not None
+        )
+
+        for seqlen, (start_idx, end_idx) in zip(
+            self.micro_batch_lengths[0], self.micro_batch_indices[0]
+        ):
+            mb = self.slice(start_idx, end_idx)
+            yield mb
+
+    def get_microbatch_iterator_for_packable_sequences_len(self) -> tuple[int, int]:
+        """Get the length of the microbatch iterator for sequence packing and the max packed seqlen."""
+        return len(self.micro_batch_indices[0]), max(self.micro_batch_lengths[0])
+
     def make_microbatch_iterator(
         self, microbatch_size: int
     ) -> Iterator["SlicedDataDict"]:
diff --git a/nemo_rl/distributed/model_utils.py b/nemo_rl/distributed/model_utils.py
index 31ac71cc23..606fd8464b 100644
--- a/nemo_rl/distributed/model_utils.py
+++ b/nemo_rl/distributed/model_utils.py
@@ -120,21 +120,21 @@ def backward(
         return grad_input, None, None, None, None, None, None
 
 
-def from_parallel_logits_to_logprobs(
+def dtensor_from_parallel_logits_to_logprobs(
     vocab_parallel_logits: torch.Tensor,
-    target: torch.Tensor | DTensor,
+    target: DTensor | torch.Tensor,
     vocab_start_index: int,
     vocab_end_index: int,
     tp_group: torch.distributed.ProcessGroup,
     inference_only: bool = False,
     seq_index: Optional[torch.Tensor] = None,
 ) -> torch.Tensor:
-    """Get log probabilities from TP sharded vocab logits.
+    """Get log probabilities from TP+CP sharded vocab logits.
 
     Args:
-        vocab_parallel_logits (torch.Tensor): Logits tensor with shape [batch_size, seq_len, vocab_size//TP]
-            where TP is the tensor parallel size.
-        target (torch.Tensor): Target token indices with shape [batch_size, seq_len].
+        vocab_parallel_logits (orch.Tensor): Logits distributed across tensor parallel workers,
+            with shape [batch_size, seq_len, vocab_size/tp_size].
+        target (DTensor): Target token indices with shape [batch_size, seq_len].
             NOTE: Must be the unmodified targets as this function will shift them internally.
         vocab_start_index (int): Starting vocabulary index for this worker's partition.
         vocab_end_index (int): Ending vocabulary index for this worker's partition.
@@ -146,8 +146,6 @@ def from_parallel_logits_to_logprobs(
     Returns:
         torch.Tensor: Log probabilities tensor with shape [batch_size, seq_len-1].
             The sequence dimension is reduced by 1 due to the target shifting.
-
-    Taken from: https://github.com/NVIDIA/NeMo-Aligner/blob/9faab404f21994a7eb1d6ed5890b76152b941636/nemo_aligner/utils/distributed.py#L354
     """
     cp_size = 1
 
@@ -188,3 +186,286 @@ def from_parallel_logits_to_logprobs(
         assert probs.shape == target_shape
 
     return probs[:, :-1]
+
+
+def from_parallel_logits_to_logprobs(
+    vocab_parallel_logits: torch.Tensor,
+    target: torch.Tensor,
+    vocab_start_index: int,
+    vocab_end_index: int,
+    tp_group: torch.distributed.ProcessGroup,
+    inference_only: bool = False,
+    cp_group: Optional[torch.distributed.ProcessGroup] = None,
+) -> torch.Tensor:
+    """Get log probabilities from TP+CP sharded vocab logits.
+
+    Args:
+        vocab_parallel_logits (torch.Tensor): Logits tensor with shape [batch_size, seq_len // CP, vocab_size // TP]
+            where TP is the tensor parallel size.
+        target (torch.Tensor): Target token indices with shape [batch_size, seq_len].
+            NOTE: Must be the unmodified targets as this function will shift them internally.
+        vocab_start_index (int): Starting vocabulary index for this worker's partition.
+        vocab_end_index (int): Ending vocabulary index for this worker's partition.
+        tp_group (torch.distributed.ProcessGroup): Process group for distributed communication.
+        inference_only (bool, optional): If True, tensors won't be saved for backward pass. Defaults to False.
+        cp_group (torch.distributed.ProcessGroup, optional): Context parallelism process group. Defaults to None.
+
+    Returns:
+        torch.Tensor: Log probabilities tensor with shape [batch_size, seq_len-1].
+            The sequence dimension is reduced by 1 due to the target shifting.
+
+    Taken from: https://github.com/NVIDIA/NeMo-Aligner/blob/9faab404f21994a7eb1d6ed5890b76152b941636/nemo_aligner/utils/distributed.py#L354
+    """
+    target = target.roll(shifts=-1, dims=-1)
+    cp_size = 1 if cp_group is None else torch.distributed.get_world_size(cp_group)
+    pad_len = 0
+    # if cp_size > 1:
+    # Pad the targets to local size * cp_size
+    pad_len = vocab_parallel_logits.shape[1] * cp_size - target.shape[1]
+    if pad_len > 0:
+        target = torch.nn.functional.pad(target, (0, pad_len), value=0)
+
+    # Shard the targets by context parallelism
+    cp_rank = torch.distributed.get_rank(cp_group)
+    target = _get_tokens_on_this_cp_rank(target, cp_rank, cp_size, seq_dim=1)
+
+    probs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
+        vocab_parallel_logits,
+        target,
+        vocab_start_index,
+        vocab_end_index,
+        tp_group,
+        inference_only,
+    ).contiguous()
+
+    if cp_size > 1:
+        # we need to gather the logits by context parallelism
+        probs = allgather_cp_sharded_tensor(
+            probs, cp_group, seq_dim=1
+        )  # , unpadded_seqlen=target.shape[1])
+
+    if pad_len > 0:
+        probs = probs[:, :-pad_len]
+
+    return probs[:, :-1]
+
+
+def from_parallel_logits_to_logprobs_packed_sequences(
+    vocab_parallel_logits: torch.Tensor,
+    target: torch.Tensor,
+    cu_seqlens_padded: torch.Tensor,
+    unpacked_seqlen: int,
+    vocab_start_index: int,
+    vocab_end_index: int,
+    group: torch.distributed.ProcessGroup,
+    inference_only: bool = False,
+    cp_group: Optional[torch.distributed.ProcessGroup] = None,
+) -> torch.Tensor:
+    """Get log probabilities from TP sharded vocab logits for packed sequences.
+
+    Args:
+        vocab_parallel_logits (torch.Tensor): Packed logits tensor with shape [1, T // CP, vocab_size//TP]
+            where T is the total number of tokens across all packed sequences.
+        target (torch.Tensor): Packed target token indices with shape [1, T].
+            NOTE: Must be the unmodified targets as this function will shift them internally.
+        cu_seqlens (torch.Tensor): Cumulative sequence lengths tensor with shape [batch_size + 1].
+            cu_seqlens[i] indicates the start position of sequence i in the packed format.
+        unpacked_seqlen (int): The length of the unpacked sequence tensor.
+        vocab_start_index (int): Starting vocabulary index for this worker's partition.
+        vocab_end_index (int): Ending vocabulary index for this worker's partition.
+        group (torch.distributed.ProcessGroup): Process group for distributed communication.
+        inference_only (bool, optional): If True, tensors won't be saved for backward pass. Defaults to False.
+        cp_group (torch.distributed.ProcessGroup, optional): Context parallelism process group. Defaults to None.
+
+    Returns:
+        torch.Tensor: Unpacked log probabilities tensor with shape [batch_size, unpacked_seqlen-1].
+            The total length is reduced by batch_size due to target shifting (one token per sequence).
+    """
+    # Remove batch dimension to work with [T, vocab_size] and [T]
+    vocab_parallel_logits = vocab_parallel_logits.squeeze(0)
+    target = target.squeeze(0)
+
+    batch_size = cu_seqlens_padded.shape[0] - 1
+    cp_size = 1 if cp_group is None else torch.distributed.get_world_size(cp_group)
+    cp_rank = 0 if cp_group is None else torch.distributed.get_rank(cp_group)
+
+    # Roll each sequence individually
+    rolled_targets = torch.zeros(
+        target.shape[0] // cp_size, dtype=target.dtype, device=target.device
+    )
+    for i in range(batch_size):
+        start_idx = cu_seqlens_padded[i].item()
+        end_idx = cu_seqlens_padded[i + 1].item()
+
+        # Get the sequence targets and roll by -1
+        seq_targets = target[start_idx:end_idx]
+        rolled_seq_targets = seq_targets.roll(shifts=-1, dims=0)
+        rolled_targets[start_idx // cp_size : end_idx // cp_size] = (
+            _get_tokens_on_this_cp_rank(rolled_seq_targets, cp_rank, cp_size, seq_dim=0)
+        )
+
+    # Add batch dimension back for DistributedLogprob
+    rolled_targets = rolled_targets.unsqueeze(0)
+    vocab_parallel_logits = vocab_parallel_logits.unsqueeze(0)
+
+    # Apply distributed log probability computation
+    probs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
+        vocab_parallel_logits,
+        rolled_targets,
+        vocab_start_index,
+        vocab_end_index,
+        group,
+        inference_only,
+    ).contiguous()
+
+    # Remove batch dimension for filtering
+    probs = probs.squeeze(0)
+
+    # Ensure probs is 1D after squeezing
+    if probs.dim() != 1:
+        raise ValueError(
+            f"Expected probs to be 1D after squeezing, but got shape {probs.shape}. "
+            f"Original shape before squeeze: {probs.unsqueeze(0).shape}"
+        )
+
+    if cp_size > 1:
+        # per-sequence cp_allgather
+        final_probs = torch.zeros(probs.shape[0] * cp_size, device=probs.device)
+        for i in range(batch_size):
+            start_idx = cu_seqlens_padded[i].item()
+            end_idx = cu_seqlens_padded[i + 1].item()
+            final_probs[start_idx:end_idx] = allgather_cp_sharded_tensor(
+                probs[start_idx // cp_size : end_idx // cp_size], cp_group, seq_dim=0
+            )
+        probs = final_probs
+
+    out_logprobs = torch.zeros(
+        (batch_size, unpacked_seqlen - 1), dtype=probs.dtype, device=probs.device
+    )
+    # Filter out the last token of each sequence
+    for i in range(batch_size):
+        start_idx = cu_seqlens_padded[i].item()
+        end_idx = cu_seqlens_padded[i + 1].item()
+
+        # Exclude the last position (which has the rolled target from position 0)
+        if end_idx - start_idx > 0:
+            seq_probs = probs[start_idx : end_idx - 1]
+            # Ensure seq_probs is 1D
+            if seq_probs.dim() > 1:
+                seq_probs = seq_probs.squeeze()
+
+            # Ensure we don't exceed the unpacked sequence length
+            seq_len = min(seq_probs.shape[0], unpacked_seqlen - 1)
+            if seq_len > 0:
+                out_logprobs[i, :seq_len] = seq_probs[:seq_len]
+
+    return out_logprobs
+
+
+def _get_tokens_on_this_cp_rank(
+    input_ids: torch.Tensor,
+    cp_rank: int,
+    cp_size: int,
+    seq_dim: int = 1,
+) -> torch.Tensor:
+    """Get tokens on this context parallelism rank.
+
+    Assumes that input_ids are already padded to a multiple of cp_size * 2 or cp_size == 1.
+
+    Args:
+        input_ids: Input token IDs [seq_length, ]
+        cp_rank: Context parallelism rank
+        cp_size: Context parallelism size
+
+    Returns:
+        Tokens on this context parallelism rank [1, seq_length // cp_size]
+    """
+    if cp_size == 1:
+        return input_ids
+
+    # load balance for causal attention
+    shard_size = input_ids.shape[seq_dim] // (cp_size * 2)
+    shard_inds = (cp_rank, (cp_size * 2) - cp_rank - 1)
+
+    # Create slices for each dimension
+    slices = [slice(None)] * input_ids.dim()
+    ids_chunks = []
+
+    for ind in shard_inds:
+        slices[seq_dim] = slice(ind * shard_size, (ind + 1) * shard_size)
+        ids_chunks.append(input_ids[slices])
+
+    ids = torch.cat(ids_chunks, dim=seq_dim)
+    return ids
+
+
+def allgather_cp_sharded_tensor(
+    tensor, cp_group, seq_dim=1
+):  # , unpadded_seqlen=None):
+    return AllGatherCPTensor.apply(tensor, cp_group, seq_dim)  # , unpadded_seqlen)
+
+
+class AllGatherCPTensor(torch.autograd.Function):
+    def forward(
+        ctx, tensor, cp_group: torch.distributed.ProcessGroup, seq_dim=1
+    ):  # , unpadded_seqlen: Optional[int] = None):
+        cp_size = torch.distributed.get_world_size(cp_group)
+        cp_rank_chunks = []
+        for _ in range(cp_size):
+            cp_rank_chunks.append(torch.empty_like(tensor))
+
+        torch.distributed.all_gather(
+            tensor_list=cp_rank_chunks, tensor=tensor, group=cp_group
+        )
+
+        # undo the CP load balancing chunking
+        tensor_chunks = []
+        for logit_chunk in cp_rank_chunks:
+            tensor_chunks.extend(torch.chunk(logit_chunk, chunks=2, dim=seq_dim))
+
+        chunk_indices = []
+        for cp_rank in range(cp_size):
+            chunk_indices.append(cp_rank)
+            chunk_indices.append(2 * cp_size - cp_rank - 1)
+
+        chunks_and_indices = list(zip(tensor_chunks, chunk_indices))
+        chunks_and_indices = sorted(chunks_and_indices, key=lambda tup: tup[1])
+        ret_tensor = [chunk for chunk, _ in chunks_and_indices]
+        ret_tensor = torch.cat(ret_tensor, dim=seq_dim)
+
+        ctx.seq_dim = seq_dim
+        ctx.cp_group = cp_group
+        # ctx.unpadded_seqlen = unpadded_seqlen
+
+        return ret_tensor
+
+    def backward(ctx, grad_output):
+        cp_size = torch.distributed.get_world_size(ctx.cp_group)
+        cp_rank = torch.distributed.get_rank(ctx.cp_group)
+        torch.distributed.all_reduce(grad_output, group=ctx.cp_group)
+
+        # chunk the seqdim in 2*cp chunks, and select with a CP load balanced indexing
+        seq_dim = ctx.seq_dim
+        # if ctx.unpadded_seqlen is not None:
+        # # Zero out grad_output along the seq_dim after unpadded_seqlen
+        # slicer = [slice(None)] * grad_output.dim()
+        # slicer[seq_dim] = slice(ctx.unpadded_seqlen, None)
+        #     grad_output[tuple(slicer)] = 0
+
+        grad_output = grad_output.view(
+            *grad_output.shape[0:seq_dim],
+            2 * cp_size,
+            grad_output.shape[seq_dim] // (2 * cp_size),
+            *grad_output.shape[(seq_dim + 1) :],
+        )
+
+        index = torch.tensor(
+            [cp_rank, (2 * cp_size - cp_rank - 1)], device="cpu", pin_memory=True
+        ).cuda(non_blocking=True)
+
+        grad_input = grad_output.index_select(seq_dim, index)
+        grad_input = grad_input.view(
+            *grad_input.shape[0:seq_dim], -1, *grad_input.shape[(seq_dim + 2) :]
+        )
+
+        return grad_input, None, None  # , None
diff --git a/nemo_rl/models/dtensor/parallelize.py b/nemo_rl/models/dtensor/parallelize.py
index f668834f19..cc069be65d 100644
--- a/nemo_rl/models/dtensor/parallelize.py
+++ b/nemo_rl/models/dtensor/parallelize.py
@@ -41,7 +41,7 @@
 from transformers.models.qwen2.modeling_qwen2 import Qwen2ForCausalLM
 from transformers.models.qwen3.modeling_qwen3 import Qwen3ForCausalLM
 
-from nemo_rl.distributed.model_utils import from_parallel_logits_to_logprobs
+from nemo_rl.distributed.model_utils import dtensor_from_parallel_logits_to_logprobs
 from nemo_rl.models.policy.utils import import_class_from_path
 
 
@@ -621,8 +621,10 @@ def get_logprobs_from_vocab_parallel_logits(
     Args:
         vocab_parallel_logits (DTensor): Logits distributed across tensor parallel workers,
             with shape [batch_size, seq_len, vocab_size/tp_size].
-        input_ids (torch.Tensor): Input token IDs for which to compute log probabilities,
+        input_ids (torch.Tensor | DTensor): Input token IDs for which to compute log probabilities,
             with shape [batch_size, seq_len].
+        seq_index (Optional[torch.Tensor]): Sequence index for the input IDs,
+            with shape [sequence_length].
 
     Returns:
         torch.Tensor: Log probabilities for the given input IDs.
@@ -641,7 +643,7 @@ def get_logprobs_from_vocab_parallel_logits(
 
     vocab_interval_per_rank = vocab_parallel_logits.shape[-1] // tp_size
 
-    return from_parallel_logits_to_logprobs(
+    return dtensor_from_parallel_logits_to_logprobs(
         vocab_parallel_logits.to_local(),
         input_ids,
         vocab_interval_per_rank * tp_rank,
diff --git a/nemo_rl/models/huggingface/common.py b/nemo_rl/models/huggingface/common.py
index df913f95b4..c057f6d89a 100644
--- a/nemo_rl/models/huggingface/common.py
+++ b/nemo_rl/models/huggingface/common.py
@@ -12,10 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from dataclasses import dataclass
 from enum import Enum, auto
+from typing import Optional, Tuple, TypeVar
 
+import torch
 from transformers import AutoConfig
 
+Tensor = TypeVar("Tensor", bound=torch.Tensor)
+
+
+@dataclass
+class FlashAttentionKwargs:
+    """Dataclass to hold FlashAttention v2 kwargs."""
+
+    cu_seqlens_q: Tensor
+    cu_seqlens_k: Tensor
+    max_seqlen_q: int
+    max_seqlen_k: int
+
 
 class ModelFlag(Enum):
     """Enum that defines special flags for model-specific behaviors.
@@ -53,3 +68,241 @@ def is_gemma_model(model_name: str) -> bool:
         "gemma3",
         "gemma3_text",
     ]
+
+
+def group_and_cat_tensors(
+    tensors: list[torch.Tensor],
+    group_sizes: list[int],
+    padding_value: int = 0,
+    min_seq_len: int = 0,
+) -> torch.Tensor:
+    """Groups and concatenates tensors according to group_sizes, then pads them to form a 2D tensor.
+
+    Each group of 1D tensors is concatenated into a single 1D tensor, and all resulting
+    group tensors are padded to the same length and stacked into a 2D tensor.
+
+    Args:
+        tensors: List of 1D tensors of varying lengths.
+        group_sizes: List of integers. Each integer specifies how many tensors to group.
+        padding_value: Integer used to pad shorter sequences.
+        min_seq_len: Minimum sequence length.
+
+    Returns:
+        A 2D tensor where each row is a padded concatenation of the grouped tensors.
+
+    Example:
+        >>> tensors = [
+        ...     torch.tensor([1, 2]),
+        ...     torch.tensor([3]),
+        ...     torch.tensor([4, 5, 6]),
+        ...     torch.tensor([7])
+        ... ]
+        >>> group_sizes = [2, 2]
+        >>> group_and_cat_tensors(tensors, group_sizes, padding_value=-1)
+        tensor([[ 1,  2,  3, -1, -1],
+                [ 4,  5,  6,  7, -1]])
+    """
+    grouped = []
+    index = 0
+    for size in group_sizes:
+        group = tensors[index : index + size]
+        concat = torch.cat(group, dim=0)
+        grouped.append(concat)
+        index += size
+
+    # Compute the maximum length for padding
+    max_len = max(t.size(0) for t in grouped)
+    max_len = max(max_len, min_seq_len)
+
+    # Pad each tensor to max_len
+    padded = torch.stack(
+        [
+            torch.nn.functional.pad(t, (0, max_len - t.size(0)), value=padding_value)
+            for t in grouped
+        ]
+    )
+
+    return padded
+
+
+def pack_sequences(
+    input_ids: torch.Tensor,
+    input_lengths: torch.Tensor,
+    packed_sequence_size: list[int],
+    padding_value: int = 0,
+    return_attention_mask: bool = True,
+    min_seq_len: int = 0,
+) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
+    """Packs sequences into rows where each row concatenates multiple sequences.
+
+    Useful for sequence packing in transformer models (e.g. for SFT training). Returns:
+    packed input_ids, packed position_ids, and optional attention_mask.
+
+    Args:
+        input_ids (torch.Tensor): Tensor of shape [num_sequences, max_seq_len]
+        input_lengths (torch.Tensor): Tensor of shape [num_sequences], containing true lengths
+        packed_sequence_size (List[int]): How many sequences to pack per row
+        padding_value (int): Pad value for input_ids
+        return_attention_mask (bool): Whether to return per-row causal attention mask
+        min_seq_len (int): Minimum sequence length.
+
+    Returns:
+        Tuple:
+            input_ids_packed (torch.Tensor): [batch_size, max_packed_seq_len]
+            position_ids_packed (torch.Tensor): [batch_size, max_packed_seq_len]
+            attention_mask (Optional[torch.Tensor]): [batch_size, max_len, max_len] if requested
+
+    Example:
+        >>> input_ids = torch.tensor([
+        ...     [1, 2, 0, 0],   # len 2
+        ...     [3, 4, 5, 0],   # len 3
+        ...     [6, 0, 0, 0],   # len 1
+        ...     [7, 8, 9, 9],   # len 4
+        ...     [8, 7, 0, 0],   # len 2
+        ...     [6, 0, 0, 0],   # len 1
+        ...     [5, 4, 3, 0],   # len 3
+        ... ])
+        >>> input_lengths = torch.tensor([2, 3, 1, 4, 2, 1, 3])
+        >>> packed_sequence_size = [3, 4]
+        >>> input_ids_packed, position_ids_packed, attention_mask = pack_sequences(
+        ...     input_ids, input_lengths, packed_sequence_size, padding_value=-1, return_attention_mask=True
+        ... )
+        >>> input_ids_packed
+        tensor([
+            [ 1,  2,  3,  4,  5,  6, -1, -1, -1, -1],
+            [ 7,  8,  9,  9,  8,  7,  6,  5,  4,  3]
+        ])
+        >>> position_ids_packed
+        tensor([
+            [0, 1, 0, 1, 2, 0, 0, 0, 0, 0],
+            [0, 1, 2, 3, 0, 1, 0, 0, 1, 2]
+        ])
+        >>> attention_mask[0]
+        tensor([
+            [ True,  True, False, False, False, False, False, False, False, False],
+            [False, False,  True,  True,  True, False, False, False, False, False],
+            [False, False, False, False, False,  True, False, False, False, False],
+            [False, False, False, False, False, False, False, False, False, False],
+        ])
+        >>> attention_mask[1]
+        tensor([
+            [ True,  True,  True,  True, False, False, False, False, False, False],
+            [False, False, False, False,  True,  True,  True, False, False, False],
+            [False, False, False, False, False, False,  True,  True,  True,  True],
+            [False, False, False, False, False, False, False,  True,  True,  True],
+        ])
+    """
+    flat_input_ids = []
+    position_ids = []
+    flat_lengths = input_lengths.tolist()
+
+    for i, seq_len in enumerate(flat_lengths):
+        flat_input_ids.append(input_ids[i, :seq_len])
+        position_ids.append(
+            torch.arange(seq_len, dtype=torch.long, device=input_ids.device)
+        )
+
+    # Group and pad
+    input_ids_packed = group_and_cat_tensors(
+        flat_input_ids, packed_sequence_size, padding_value, min_seq_len=min_seq_len
+    )
+    position_ids_packed = group_and_cat_tensors(
+        position_ids, packed_sequence_size, padding_value=0, min_seq_len=min_seq_len
+    )
+
+    # Compute max length
+    batch_size, max_seq_len = input_ids_packed.shape
+
+    attention_mask = None
+    if return_attention_mask:
+        attention_mask = torch.zeros(
+            (batch_size, max_seq_len, max_seq_len),
+            dtype=torch.bool,
+            device=input_ids.device,
+        )
+        index = 0
+        for i, group_size in enumerate(packed_sequence_size):
+            group_lengths = flat_lengths[index : index + group_size]
+            total_len = sum(group_lengths)
+            attention_mask[i, :total_len, :total_len] = torch.tril(
+                torch.ones(
+                    (total_len, total_len), dtype=torch.bool, device=input_ids.device
+                )
+            )
+            index += group_size
+
+    return input_ids_packed, position_ids_packed, attention_mask
+
+
+# TODO(ahmadki): the function doesn't actually handle returning 2D tensors because none of the backends support this.
+#  but we should support this anyways
+def unpack_tensor(tensor, input_lengths):
+    """Unpacks a packed tensor into individual sequences padded to the same length.
+
+    Args:
+        tensor (torch.Tensor): Packed tensor of shape [batch_size, packed_seq_len].
+        packed_lengths (List[int]): Original sequence lengths in the order they were packed.
+
+    Returns:
+        torch.Tensor: [num_sequences, max_seq_len], each row is one unpacked and padded sequence.
+
+    Example:
+        >>> packed_tensor = torch.tensor([
+        ...     [1, 2, 3, 4, 5, 6, -1, -1],
+        ...     [7, 8, 9, 9, 8, 7, 6, -1]
+        ... ])
+        >>> packed_lengths = [2, 3, 1, 4, 2]
+        >>> unpack_tensor(packed_tensor, packed_lengths)
+        tensor([
+            [1, 2, 0, 0],
+            [3, 4, 5, 0],
+            [6, 0, 0, 0],
+            [7, 8, 9, 9],
+            [8, 7, 0, 0],
+        ])
+    """
+    packed_seqlen = tensor.shape[1]
+    splitsizes = input_lengths.tolist()
+    splitsizes.append(packed_seqlen - sum(splitsizes))
+    tensor_split = torch.split(tensor, tuple(splitsizes), dim=1)
+
+    max_len = max(input_lengths.tolist())  # max sequence length in the batch
+
+    tensor_stacked = []
+    for t in tensor_split[0:-1]:
+        padding_needed = max_len - t.shape[1]
+        tensor_stacked.append(
+            torch.nn.functional.pad(
+                t, (0, 0, 0, padding_needed), mode="constant", value=0.0
+            )
+        )
+    return torch.cat(tensor_stacked, dim=0)
+
+
+def get_flash_attention_kwargs(input_lengths: torch.Tensor) -> FlashAttentionKwargs:
+    """Returns kwargs required for FlashAttention v2 forward functions.
+
+    Args:
+        input_lengths (torch.Tensor): [batch_size] containing lengths of each sequence
+
+    Returns:
+        Dict[str, torch.Tensor | int]:
+            {
+                "cu_seqlens_q": Tensor[int32],
+                "cu_seqlens_k": Tensor[int32],
+                "max_seqlen_q": int,
+                "max_seqlen_k": int
+            }
+    """
+    input_lengths_int32 = input_lengths.to(torch.int32)
+    cu_seqlens = torch.nn.functional.pad(
+        input_lengths_int32.cumsum(dim=0), (1, 0)
+    )  # prepend 0
+    max_len = input_lengths.max().item()
+
+    return FlashAttentionKwargs(
+        cu_seqlens_q=cu_seqlens,
+        cu_seqlens_k=cu_seqlens.clone(),  # same for self-attention
+        max_seqlen_q=max_len,
+        max_seqlen_k=max_len,
+    )
diff --git a/nemo_rl/models/megatron/common.py b/nemo_rl/models/megatron/common.py
index 5c6431b15e..bc0d499f08 100644
--- a/nemo_rl/models/megatron/common.py
+++ b/nemo_rl/models/megatron/common.py
@@ -12,20 +12,240 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from functools import partial
-from typing import Any, Iterator
+from typing import Any, Iterator, Optional
 
 import torch
 import torch.distributed as dist
 from megatron.core.models.gpt import GPTModel
+from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.parallel_state import (
+    get_context_parallel_group,
+    get_context_parallel_rank,
+    get_context_parallel_world_size,
     get_tensor_model_parallel_group,
     get_tensor_model_parallel_rank,
 )
 from megatron.training.utils import get_ltor_masks_and_position_ids
 from nemo.tron.state import GlobalState
 
-from nemo_rl.algorithms.loss_functions import LossFunction
+from nemo_rl.algorithms.loss_functions import LossFunction, SequencePackingLossWrapper
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.model_utils import _get_tokens_on_this_cp_rank
+
+
+def _pack_sequences_for_megatron(
+    input_ids: torch.Tensor,
+    seq_lengths: torch.Tensor,
+    pad_individual_seqs_to_multiple_of: int = 1,
+    pad_packed_seq_to: Optional[int] = None,
+    cp_rank: int = 0,
+    cp_size: int = 1,
+) -> tuple[torch.Tensor, PackedSeqParams, torch.Tensor, Optional[torch.Tensor]]:
+    """Pack sequences for Megatron model processing with optional context parallelism.
+
+    Args:
+        input_ids: Input token IDs [batch_size, seq_length]
+        seq_lengths: Actual sequence lengths for each sample [batch_size]
+        pad_individual_seqs_to_multiple_of: Pad individual sequences to a multiple of this value
+        pad_packed_seq_to: Pad packed sequences to this value (before CP)
+        cp_size: Context parallelism size
+
+    Returns:
+        Tuple of:
+        - packed_input_ids: Packed input tensor [1, T]
+        - input_ids_cp_sharded: Sharded input tensor [cp_size, T // cp_size]
+        - packed_seq_params: PackedSeqParams object
+        - cu_seqlens: Cumulative sequence lengths
+        - cu_seqlens_padded: Padded cumulative sequence lengths
+    """
+    batch_size = input_ids.shape[0]
+
+    # Build cumulative sequence lengths (cu_seqlens) and extract valid tokens
+    cu_seqlens = [0]
+    cu_seqlens_padded = (
+        [0]
+        if pad_individual_seqs_to_multiple_of > 1 or pad_packed_seq_to is not None
+        else None
+    )
+    valid_tokens = []
+
+    pad_factor = pad_individual_seqs_to_multiple_of
+
+    for b in range(batch_size):
+        seq_len = (
+            seq_lengths[b].item() if torch.is_tensor(seq_lengths[b]) else seq_lengths[b]
+        )
+
+        # Extract valid tokens for this sequence
+        valid_tokens.append(input_ids[b, :seq_len])
+
+        # Update cumulative sequence lengths
+        cu_seqlens.append(cu_seqlens[-1] + seq_len)
+
+        # For context parallelism, track padded sequence lengths
+        if pad_factor > 1 or pad_packed_seq_to is not None:
+            # Pad sequence length to multiple of (cp_size * 2)
+            padded_seq_len = ((seq_len + pad_factor - 1) // pad_factor) * pad_factor
+            cu_seqlens_padded.append(cu_seqlens_padded[-1] + padded_seq_len)
+
+    # Convert to tensors
+    cu_seqlens = torch.tensor(cu_seqlens, dtype=torch.int32, device=input_ids.device)
+    if pad_factor > 1 or pad_packed_seq_to is not None:
+        cu_seqlens_padded = torch.tensor(
+            cu_seqlens_padded, dtype=torch.int32, device=input_ids.device
+        )
+        if pad_packed_seq_to is not None:
+            cu_seqlens_padded[-1] = pad_packed_seq_to
+
+    # Calculate max sequence length (padded if using CP)
+    if pad_factor > 1 or (pad_packed_seq_to is not None):
+        seq_lens_padded = cu_seqlens_padded[1:] - cu_seqlens_padded[:-1]
+        max_seqlen = seq_lens_padded.max().item()
+    else:
+        seq_lens = cu_seqlens[1:] - cu_seqlens[:-1]
+        max_seqlen = seq_lens.max().item()
+
+    # Concatenate all valid tokens
+    # If using individual padding, we need to pad individual sequences
+    # CP will always need padding (of at least cp_size * 2)
+    running_seq_len = 0
+    if pad_factor > 1:
+        all_input_ids = []
+        padded_tokens = []
+        for b in range(batch_size):
+            seq_len = (
+                seq_lengths[b].item()
+                if torch.is_tensor(seq_lengths[b])
+                else seq_lengths[b]
+            )
+            # if last element, pad to the max sequence length
+            if b == batch_size - 1 and pad_packed_seq_to is not None:
+                padded_seq_len = pad_packed_seq_to - running_seq_len
+                running_seq_len += padded_seq_len
+            else:
+                padded_seq_len = ((seq_len + pad_factor - 1) // pad_factor) * pad_factor
+
+            running_seq_len += padded_seq_len
+
+            # Pad this sequence to the required length
+            seq_tokens = input_ids[b, :seq_len]
+            if padded_seq_len > seq_len:
+                # Pad with zeros (or use a padding token if available)
+                seq_tokens = torch.nn.functional.pad(
+                    seq_tokens, (0, padded_seq_len - seq_len), value=0
+                )
+            all_input_ids.append(seq_tokens)
+
+            if cp_size > 1:
+                seq_tokens = _get_tokens_on_this_cp_rank(
+                    seq_tokens, cp_rank, cp_size, seq_dim=0
+                )
+
+            padded_tokens.append(seq_tokens)
+
+        # Concatenate all padded tokens
+        # For 'thd' format, the shape should be [1, T] where T is total tokens
+        packed_input_ids = torch.cat(padded_tokens, dim=0).unsqueeze(0)
+        all_input_ids = torch.cat(all_input_ids, dim=0).unsqueeze(0)
+    else:
+        # No individual padding, just concatenate valid tokens
+        # For 'thd' format, the shape should be [1, T] where T is total tokens
+        packed_input_ids = torch.cat(valid_tokens, dim=0).unsqueeze(0)
+        all_input_ids = packed_input_ids
+        if pad_packed_seq_to is not None:
+            pad_len = pad_packed_seq_to - packed_input_ids.shape[1]
+            if pad_len > 0:
+                packed_input_ids = torch.nn.functional.pad(
+                    packed_input_ids, (0, pad_len), value=0
+                )
+                all_input_ids = torch.nn.functional.pad(
+                    all_input_ids, (0, pad_len), value=0
+                )
+
+    if cu_seqlens_padded is None:
+        cu_seqlens_padded = cu_seqlens.clone()
+
+    packed_seq_params = PackedSeqParams(
+        cu_seqlens_q=cu_seqlens_padded,
+        cu_seqlens_kv=cu_seqlens_padded,
+        cu_seqlens_q_padded=cu_seqlens_padded,
+        cu_seqlens_kv_padded=cu_seqlens_padded,
+        max_seqlen_q=int(max_seqlen),
+        max_seqlen_kv=int(max_seqlen),
+        qkv_format="thd",
+    )
+
+    return (
+        all_input_ids.contiguous(),
+        packed_input_ids.contiguous(),
+        packed_seq_params,
+        cu_seqlens,
+        cu_seqlens_padded,
+    )
+
+
+def _unpack_sequences_from_megatron(
+    output_tensor: torch.Tensor,
+    seq_lengths: torch.Tensor,
+    cu_seqlens: torch.Tensor,
+    cu_seqlens_padded: Optional[torch.Tensor],
+    original_batch_size: int,
+    original_seq_length: int,
+) -> torch.Tensor:
+    """Unpack sequences from Megatron output format.
+
+    Args:
+        output_tensor: Packed output tensor [1, T, vocab_size]
+        seq_lengths: Actual sequence lengths for each sample
+        cu_seqlens: Cumulative sequence lengths
+        cu_seqlens_padded: Padded cumulative sequence lengths (if CP was used)
+        original_batch_size: Original batch size
+        original_seq_length: Original maximum sequence length
+
+    Returns:
+        Unpacked output tensor [batch_size, seq_length, vocab_size]
+    """
+    # Remove the batch dimension to get [T, vocab_size]
+    output_tensor = output_tensor.squeeze(0)
+
+    # Create a padded output tensor with original shape
+    vocab_size = output_tensor.shape[-1]
+    unpacked_output = torch.zeros(
+        (original_batch_size, original_seq_length, vocab_size),
+        dtype=output_tensor.dtype,
+        device=output_tensor.device,
+    )
+
+    # Get context parallel size to determine which cu_seqlens to use
+    cp_size = get_context_parallel_world_size()
+
+    # Fill in the unpacked output tensor with valid tokens
+    for b in range(original_batch_size):
+        # Get actual sequence length for this sample
+        seq_len = (
+            seq_lengths[b].item() if torch.is_tensor(seq_lengths[b]) else seq_lengths[b]
+        )
+
+        if cp_size > 1 and cu_seqlens_padded is not None:
+            # When using CP, we need to account for padding
+            # Calculate the padded sequence boundaries
+            pad_factor = cp_size * 2
+            padded_seq_len = ((seq_len + pad_factor - 1) // pad_factor) * pad_factor
+            start_idx = cu_seqlens_padded[b].item()
+
+            # Only copy the valid tokens (not the padding)
+            unpacked_output[b, :seq_len] = output_tensor[
+                start_idx : start_idx + seq_len
+            ]
+        else:
+            # No CP, use regular cu_seqlens
+            start_idx = cu_seqlens[b].item()
+            end_idx = cu_seqlens[b + 1].item()
+
+            # Copy the valid tokens to the unpacked tensor
+            unpacked_output[b, :seq_len] = output_tensor[start_idx:end_idx]
+
+    return unpacked_output
 
 
 def forward_step_arbitrary_loss(
@@ -35,6 +255,11 @@ def forward_step_arbitrary_loss(
     data_iterator: Iterator[BatchedDataDict[Any]],
     model: GPTModel,
     loss_fn: LossFunction,
+    pack_sequences: bool = False,
+    seq_length_key: Optional[str] = None,
+    pad_individual_seqs_to_multiple_of: int = 1,
+    pad_full_seq_to: Optional[int] = None,
+    cp_normalize: bool = True,
 ):
     """Forward training step with support for packed sequences and context parallelism.
 
@@ -45,30 +270,111 @@ def forward_step_arbitrary_loss(
         data_iterator: Input data iterator
         model (GPTModel): The GPT Model
         loss_fn (LossFunction): Loss function to apply
+        pack_sequences (bool): Whether to pack sequences for efficiency
+        seq_length_key (Optional[str]): Key in data_dict containing actual sequence lengths
+        cp_normalize (bool): Whether to normalize the loss by the cp_size
+
+    Notes on packed sequences with context parallelism (CP):
+        - When CP > 1, each sequence is padded to a multiple of (cp_size * 2)
+        - The factor of 2 ensures load balancing for causal attention
+        - cu_seqlens tracks actual sequence boundaries
+        - cu_seqlens_padded tracks padded sequence boundaries for CP
+        - Requires TransformerEngine >= 1.10 for CP support
     """
     straggler_timer = state.straggler_timer
 
     with straggler_timer(bdata=True):
         data_dict = next(data_iterator).to("cuda")
         input_ids = data_dict["input_ids"]
-        attention_mask, _, position_ids = get_ltor_masks_and_position_ids(
-            input_ids, 0, False, False, False
-        )
+        attention_mask = None
+        position_ids = None
+        packed_seq_params = None
+
+        original_batch_size = input_ids.shape[0]
+        original_seq_length = input_ids.shape[1]
+        seq_lengths = None  # Will be set if using packed sequences
+        cu_seqlens = None
+        cu_seqlens_padded = None
+
+        if pack_sequences:
+            # For packed sequences with padded input, we need sequence lengths
+            assert seq_length_key is not None, (
+                "seq_length_key must be provided for packed sequences"
+            )
+            assert seq_length_key in data_dict, (
+                f"{seq_length_key} not found in data_dict"
+            )
+
+            # Get sequence lengths and context parallel size
+            seq_lengths = data_dict[seq_length_key]
+
+            # Pack sequences
+            (
+                input_ids,
+                input_ids_cp_sharded,
+                packed_seq_params,
+                cu_seqlens,
+                cu_seqlens_padded,
+            ) = _pack_sequences_for_megatron(
+                input_ids,
+                seq_lengths,
+                pad_individual_seqs_to_multiple_of,
+                pad_full_seq_to,
+                cp_rank=get_context_parallel_rank(),
+                cp_size=get_context_parallel_world_size(),
+            )
+
+            # For packed sequences, position_ids and attention_mask are typically None
+            # The PackedSeqParams handles all necessary sequence information
+            position_ids = None
+            attention_mask = None
+        else:
+            input_ids_cp_sharded = input_ids
+            attention_mask, _, position_ids = get_ltor_masks_and_position_ids(
+                input_ids, 0, False, False, False
+            )
 
     with straggler_timer:
-        output_tensor = model(input_ids, position_ids, attention_mask)
+        output_tensor = model(
+            input_ids_cp_sharded,
+            position_ids,
+            attention_mask,
+            packed_seq_params=packed_seq_params,
+        )
+
+        # Unpack the output tensor if we did packed sequences
+        if pack_sequences and packed_seq_params is not None:
+            # remove padding
+            loss_fn = SequencePackingLossWrapper(
+                loss_fn=loss_fn,
+                cu_seqlens_q=packed_seq_params.cu_seqlens_q,
+                cu_seqlens_q_padded=packed_seq_params.cu_seqlens_q_padded,
+            )
 
         loss_data = data_dict
 
-    return output_tensor, partial(
+    loss_fn_wrapped = partial(
         loss_fn,
         data=loss_data,
         global_valid_seqs=global_valid_seqs,
         global_valid_toks=global_valid_toks,
         vocab_parallel_rank=get_tensor_model_parallel_rank(),
         vocab_parallel_group=get_tensor_model_parallel_group(),
+        context_parallel_group=get_context_parallel_group(),
     )
 
+    if cp_normalize:
+        cp_size = get_context_parallel_world_size()
+        orig_loss_fn_wrapped = loss_fn_wrapped
+
+        def _div_by_cp_size(*args, **kwargs):
+            loss, metrics = orig_loss_fn_wrapped(*args, **kwargs)
+            return loss / cp_size, metrics
+
+        loss_fn_wrapped = _div_by_cp_size
+
+    return output_tensor, loss_fn_wrapped
+
 
 def broadcast_tensor(
     tensor: torch.Tensor | None, src_rank: int, group: dist.ProcessGroup
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index f501c978c5..61dfe9b51c 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -14,6 +14,7 @@
 
 import contextlib
 import gc
+import itertools
 import os
 from collections import defaultdict
 from contextlib import AbstractContextManager, contextmanager, nullcontext
@@ -40,6 +41,7 @@
 from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
+from nemo_rl.algorithms.loss_functions import SequencePackingLossWrapper
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.models.dtensor.parallelize import (
     _parallelize_model,
@@ -48,7 +50,11 @@
     get_logprobs_from_vocab_parallel_logits,
     to_local_if_dtensor,
 )
-from nemo_rl.models.huggingface.common import ModelFlag
+from nemo_rl.models.huggingface.common import (
+    ModelFlag,
+    get_flash_attention_kwargs,
+    pack_sequences,
+)
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.interfaces import (
     LogprobOutputSpec,
@@ -170,6 +176,14 @@ def __init__(
         else:
             raise ValueError(f"Unknown precision: {self.cfg['precision']}")
 
+        print(f"[Rank {self.rank}] Loading model {model_name} on CPU...")
+        self.enable_seq_packing = self.cfg["sequence_packing"]["enabled"]
+        if self.enable_seq_packing:
+            print(
+                f"[Rank {self.rank}] Sequence packing is enabled for model {model_name}"
+            )
+            print(f"[Rank {self.rank}] Using FlashAttention2 for sequence packing")
+
         model_config = AutoConfig.from_pretrained(
             model_name,
             # Always load the model in float32 to keep master weights in float32.
@@ -179,6 +193,9 @@ def __init__(
             **sliding_window_overwrite(
                 model_name
             ),  # due to https://github.com/huggingface/transformers/issues/38002
+            attn_implementation="flash_attention_2"
+            if self.enable_seq_packing
+            else None,
         )
 
         full_state_dict = None
@@ -216,6 +233,10 @@ def __init__(
 
         tp_size = self.cfg["dtensor_cfg"]["tensor_parallel_size"]
         cp_size = self.cfg["dtensor_cfg"]["context_parallel_size"]
+        if cp_size > 1 and self.enable_seq_packing:
+            raise ValueError(
+                "Context parallel is not supported for sequence packing. Refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
+            )
         dp_size = world_size // tp_size // cp_size
         sequence_parallel_enabled = self.cfg["dtensor_cfg"]["sequence_parallel"]
         assert world_size == dp_size * tp_size * cp_size, (
@@ -463,8 +484,13 @@ def train(
         if mbs is None:
             mbs = self.cfg["train_micro_batch_size"]
         local_gbs = gbs // self.dp_size
-        dataset_size = data["input_ids"].shape[0]
-        num_global_batches = dataset_size // local_gbs
+        total_dataset_size = torch.tensor(data.size, device="cuda")
+        torch.distributed.all_reduce(
+            total_dataset_size,
+            op=torch.distributed.ReduceOp.SUM,
+            group=self.dp_mesh.get_group(),
+        )
+        num_global_batches = int(total_dataset_size.item()) // gbs
 
         # dim 1 is always assumed to be the sequence dim, sanity check this here
         sequence_dim = 1
@@ -489,10 +515,8 @@ def train(
 
             losses = []
             all_mb_metrics = []
-            for gb_idx, gb_start in enumerate(range(0, dataset_size, local_gbs)):
-                global_batch: BatchedDataDict[Any] = data.slice(
-                    gb_start, gb_start + local_gbs
-                )
+            for gb_idx in range(num_global_batches):
+                global_batch = data.get_batch(batch_idx=gb_idx, batch_size=local_gbs)
 
                 assert "sample_mask" in global_batch, (
                     "sample_mask must be present in the data!"
@@ -528,32 +552,72 @@ def train(
                 # Calculate number of microbatches to process
                 # make_microbatch_iterator assumes that the batch size is a multiple of the microbatch size
                 # so its safe to not check for the case where the last data slice is smaller than mbs
+                dummy_iterator = iter([])
                 if self.cfg["dynamic_batching"]["enabled"]:
                     mb_iterator = batch.make_microbatch_iterator_with_dynamic_shapes()
+                    iterator_len = batch.get_microbatch_iterator_dynamic_shapes_len()
+                elif self.enable_seq_packing:
+                    mb_iterator = (
+                        batch.make_microbatch_iterator_for_packable_sequences()
+                    )
+                    iterator_len, max_seqlen = (
+                        batch.get_microbatch_iterator_for_packable_sequences_len()
+                    )
+                    max_batch_ct = torch.tensor([iterator_len], device="cuda")
+                    torch.distributed.all_reduce(
+                        max_batch_ct, op=torch.distributed.ReduceOp.MAX
+                    )
+
+                    # Sequence packing can end up with unevenly distributed batch counts across DP ranks.
+                    # We add dummy batches to the end of the iterator to make the batch counts equal.
+                    dummy_batch_ct = int(max_batch_ct.item() - iterator_len)
+                    dummy_iterator = (
+                        batch.make_microbatch_iterator_for_packable_sequences()
+                    )
+                    dummy_iterator = itertools.islice(
+                        itertools.cycle(dummy_iterator), dummy_batch_ct
+                    )
                 else:
                     mb_iterator = batch.make_microbatch_iterator(mbs)
+                    iterator_len = batch.size // mbs
 
-                for mb in mb_iterator:
-                    input_ids = mb.get("input_ids").cuda()
-                    input_lengths = mb.get("input_lengths")
-                    batch_size, seq_len = input_ids.shape
+                for mb_idx, mb in enumerate(
+                    itertools.chain(mb_iterator, dummy_iterator)
+                ):
+                    with torch.autocast(device_type="cuda", dtype=self.dtype):
+                        if self.enable_seq_packing:
+                            input_ids = mb.get("input_ids").cuda()
+                            input_ids, position_ids, _ = pack_sequences(
+                                input_ids=input_ids,
+                                input_lengths=mb["input_lengths"],
+                                packed_sequence_size=[
+                                    len(mb["input_lengths"])
+                                ],  # flash attention 2 expects flattened input
+                                padding_value=self.tokenizer.eos_token_id,
+                                return_attention_mask=False,
+                                min_seq_len=self.cfg["sequence_packing"][
+                                    "train_mb_tokens"
+                                ],  # TODO: this is a WAR for sequence packing, we should fix this. Without this, backward will fail when TP is enabled.
+                            )
+                            seq_len = input_ids.shape[1]
+                            attention_mask = None
+                            flash_attn_kwargs = get_flash_attention_kwargs(
+                                input_lengths=mb["input_lengths"],
+                            )
 
-                    attention_mask = torch.zeros(
-                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
-                    )
-                    for i, length in enumerate(input_lengths):
-                        # For right-padded sequence, set 1s at the beginning of the sequence
-                        attention_mask[i, :length] = 1
+                        else:
+                            input_ids = mb.get("input_ids").cuda()
+                            batch_size, seq_len = input_ids.shape
 
-                    with torch.autocast(device_type="cuda", dtype=self.dtype):
-                        attention_mask_input_all_ones = torch.ones(
-                            (batch_size, seq_len),
-                            dtype=torch.long,
-                            device=input_ids.device,
-                        )
-                        position_ids = torch.arange(
-                            seq_len, device=input_ids.device
-                        ).repeat(batch_size, 1)
+                            attention_mask = torch.ones(
+                                (batch_size, seq_len),
+                                dtype=torch.long,
+                                device=input_ids.device,
+                            )
+                            position_ids = torch.arange(
+                                seq_len, device=input_ids.device
+                            ).repeat(batch_size, 1)
+                            flash_attn_kwargs = {}
 
                     context_parallel_ctx = None
                     if self.cp_size > 1:
@@ -578,9 +642,10 @@ def train(
                         with torch.autocast(device_type="cuda", dtype=self.dtype):
                             outputs = self.model(
                                 input_ids=input_ids,
-                                attention_mask=attention_mask_input_all_ones,
+                                attention_mask=attention_mask,
                                 position_ids=position_ids,
                                 use_cache=False,
+                                flash_attn_kwargs=flash_attn_kwargs,
                             )
 
                         # Get logprobs
@@ -648,18 +713,34 @@ def train(
                                     placements=[Shard(sequence_dim), Shard(-1)],
                                 )
 
-                        loss, loss_metrics = loss_fn(
-                            logits, mb, global_valid_seqs, global_valid_toks
+                        if self.enable_seq_packing:
+                            loss_fn_ = SequencePackingLossWrapper(
+                                loss_fn=loss_fn,
+                                cu_seqlens_q=flash_attn_kwargs.cu_seqlens_q,
+                                cu_seqlens_q_padded=flash_attn_kwargs.cu_seqlens_q,
+                            )
+                        else:
+                            loss_fn_ = loss_fn
+
+                        loss, loss_metrics = loss_fn_(
+                            logits,
+                            mb,
+                            global_valid_seqs,
+                            global_valid_toks,
                         )
 
-                        ## scale by the number of global batches so we get the correct
-                        ## value when summing metrics across all microbatches
-                        for k in loss_metrics.keys():
-                            loss_metrics[k] /= num_global_batches
-                        num_valid_samples = loss_metrics["num_valid_samples"]
-                        loss_metrics["lr"] = self.optimizer.param_groups[0]["lr"]
-                        loss_metrics["global_valid_seqs"] = global_valid_seqs.item()
-                        loss_metrics["global_valid_toks"] = global_valid_toks.item()
+                        # skip the update for dummy batches
+                        if mb_idx < iterator_len:
+                            ## scale by the number of global batches so we get the correct
+                            ## value when summing metrics across all microbatches
+                            for k in loss_metrics.keys():
+                                loss_metrics[k] /= num_global_batches
+                            num_valid_samples = loss_metrics["num_valid_samples"]
+                            loss_metrics["lr"] = self.optimizer.param_groups[0]["lr"]
+                            loss_metrics["global_valid_seqs"] = global_valid_seqs.item()
+                            loss_metrics["global_valid_toks"] = global_valid_toks.item()
+                        else:
+                            loss *= 0
 
                         # Backward pass
                         if not eval_mode:
@@ -762,29 +843,70 @@ def get_logprobs(
 
         with unshard_fsdp2_model(self.model), torch.no_grad():
             data.to("cuda")
+            dummy_iterator = iter([])
             if self.cfg["dynamic_batching"]["enabled"]:
                 mb_iterator = data.make_microbatch_iterator_with_dynamic_shapes()
+                iterator_len = data.get_microbatch_iterator_dynamic_shapes_len()
+            elif self.enable_seq_packing:
+                mb_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                iterator_len, max_seqlen = (
+                    data.get_microbatch_iterator_for_packable_sequences_len()
+                )
+                max_batch_ct = torch.tensor([iterator_len], device="cuda")
+                torch.distributed.all_reduce(
+                    max_batch_ct, op=torch.distributed.ReduceOp.MAX
+                )
+
+                # Sequence packing can end up with unevenly distributed batch counts across DP ranks.
+                # We add dummy batches to the end of the iterator to make the batch counts equal.
+                dummy_batch_ct = int(max_batch_ct.item() - iterator_len)
+                dummy_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                dummy_iterator = itertools.islice(
+                    itertools.cycle(dummy_iterator), dummy_batch_ct
+                )
             else:
                 mb_iterator = data.make_microbatch_iterator(logprob_batch_size)
+                iterator_len = data.size // logprob_batch_size
 
-            for lp_batch in mb_iterator:
+            step = 0
+            for batch_idx, lp_batch in enumerate(
+                itertools.chain(mb_iterator, dummy_iterator)
+            ):
+                step += 1
                 input_ids = lp_batch.get("input_ids").cuda()
                 input_lengths = lp_batch.get("input_lengths")
 
                 batch_size, seq_len = input_ids.shape
-                # Create attention mask for right-padded data
-                attention_mask = torch.zeros(
-                    (batch_size, seq_len), dtype=torch.long, device=input_ids.device
-                )
-                for i, length in enumerate(input_lengths):
-                    # For right-padded sequence, set 1s at the beginning of the sequence
-                    attention_mask[i, :length] = 1
-
-                # explicitly create position ids for the input, otherwise the sharding
-                # for DTensor will be incorrect
-                position_ids = torch.arange(seq_len, device=input_ids.device).repeat(
-                    batch_size, 1
-                )
+                if self.enable_seq_packing:
+                    input_ids, position_ids, _ = pack_sequences(
+                        input_ids=input_ids,
+                        input_lengths=input_lengths,
+                        packed_sequence_size=[
+                            batch_size
+                        ],  # flash attention 2 expects flattened input
+                        padding_value=self.tokenizer.eos_token_id,
+                        return_attention_mask=False,
+                    )
+                    seq_len = input_ids.shape[1]
+                    attention_mask = None
+                    flash_attn_kwargs = get_flash_attention_kwargs(
+                        input_lengths=input_lengths,
+                    )
+                else:
+                    # Create attention mask for right-padded data
+                    attention_mask = torch.zeros(
+                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
+                    )
+                    for i, length in enumerate(input_lengths):
+                        # For right-padded sequence, set 1s at the beginning of the sequence
+                        attention_mask[i, :length] = 1
+
+                    # explicitly create position ids for the input, otherwise the sharding
+                    # for DTensor will be incorrect
+                    position_ids = torch.arange(
+                        seq_len, device=input_ids.device
+                    ).repeat(batch_size, 1)
+                    flash_attn_kwargs = {}
 
                 with torch.autocast(device_type="cuda", dtype=self.dtype):
                     # DTensor requires the casual attention kernel to hit,
@@ -795,41 +917,128 @@ def get_logprobs(
                         (batch_size, seq_len), dtype=torch.long, device=input_ids.device
                     )
 
-                    outputs = self.model(
-                        input_ids=input_ids,
-                        attention_mask=attention_mask_input_all_ones,
-                        position_ids=position_ids,
-                        use_cache=False,
+                context_parallel_ctx = None
+                if self.cp_size > 1:
+                    seq_index = torch.arange(seq_len, device=input_ids.device).repeat(
+                        1, 1
                     )
-
-                if isinstance(outputs.logits, DTensor):
-                    token_logprobs = get_logprobs_from_vocab_parallel_logits(
-                        outputs.logits.to(torch.float32), input_ids
+                    cp_buffers = [input_ids, position_ids, seq_index]
+
+                    # Create context parallel context
+                    context_parallel_ctx = self.create_context_parallel_ctx(
+                        cp_mesh=self.cp_mesh,
+                        cp_buffers=cp_buffers,
+                        cp_seq_dims=[sequence_dim] * len(cp_buffers),
+                        cp_no_restore_buffers=set(cp_buffers),
                     )
-                else:
-                    # Extract logprobs for each token in the sequence by gathering the logprob
-                    # corresponding to the next token at each position
-                    # Input shapes:
-                    #   log_probs: [batch_size, sequence_length, vocab_size] - logits for each position
-                    #   token_ids: [batch_size, sequence_length] - actual tokens
-                    # Output shape: [batch_size, sequence_length] - logprob of each token given previous
-                    # We get logprob of token[t+1] from logits[t], prepending 0 to maintain sequence length
-
-                    log_probs = torch.nn.functional.log_softmax(
-                        outputs.logits.to(torch.float32), dim=-1
-                    )
-                    next_tokens = input_ids[:, 1:]
-                    log_probs = log_probs[:, :-1]
-                    token_logprobs = log_probs.gather(
-                        dim=-1, index=next_tokens.unsqueeze(-1)
-                    ).squeeze(-1)
+
+                with DTensorPolicyWorker.train_context(context_parallel_ctx):
+                    with torch.autocast(device_type="cuda", dtype=self.dtype):
+                        outputs = self.model(
+                            input_ids=input_ids,
+                            attention_mask=attention_mask_input_all_ones,
+                            position_ids=position_ids,
+                            use_cache=False,
+                            flash_attn_kwargs=flash_attn_kwargs,
+                        )
+
+                    logits = outputs.logits
+
+                    if self.cp_size > 1:
+                        seq_index_tensor = (
+                            DTensor.from_local(
+                                seq_index,
+                                device_mesh=self.cp_mesh,
+                                placements=[Shard(1)],
+                            )
+                            .full_tensor()
+                            .squeeze(0)
+                        )
+
+                        input_ids_dtensor = DTensor.from_local(
+                            input_ids,
+                            device_mesh=self.cp_mesh,
+                            placements=[Shard(sequence_dim)],
+                        )
+
+                        if isinstance(logits, DTensor):
+                            # Must be tp sharded
+                            assert (
+                                logits.device_mesh.ndim == 1
+                                and logits.device_mesh.mesh_dim_names[0] == "tp"
+                            ), "logits must be tp sharded"
+
+                            # CP is implicitly sharded on the seq dim, so we need to redistribute to the tp dim
+                            logits = DTensor.from_local(
+                                logits.to_local(),
+                                device_mesh=self.device_mesh[("cp", "tp")],
+                                placements=[Shard(sequence_dim), Shard(-1)],
+                            )
+                        else:
+                            logits = DTensor.from_local(
+                                logits,
+                                device_mesh=self.device_mesh[("cp", "tp")],
+                                placements=[Shard(sequence_dim), Shard(-1)],
+                            )
+
+                        token_logprobs = get_logprobs_from_vocab_parallel_logits(
+                            logits.to(torch.float32),
+                            input_ids_dtensor,
+                            seq_index_tensor,
+                        )
+
+                        assert token_logprobs.shape[1] == seq_len - 1
+                    else:
+                        if isinstance(logits, DTensor):
+                            token_logprobs = get_logprobs_from_vocab_parallel_logits(
+                                logits.to(torch.float32), input_ids
+                            )
+                        else:
+                            # Extract logprobs for each token in the sequence by gathering the logprob
+                            # corresponding to the next token at each position
+                            # Input shapes:
+                            #   log_probs: [batch_size, sequence_length, vocab_size] - logits for each position
+                            #   token_ids: [batch_size, sequence_length] - actual tokens
+                            # Output shape: [batch_size, sequence_length] - logprob of each token given previous
+                            # We get logprob of token[t+1] from logits[t], prepending 0 to maintain sequence length
+
+                            log_probs = torch.nn.functional.log_softmax(
+                                outputs.logits.to(torch.float32), dim=-1
+                            )
+                            next_tokens = input_ids[:, 1:]
+                            log_probs = log_probs[:, :-1]
+                            token_logprobs = log_probs.gather(
+                                dim=-1, index=next_tokens.unsqueeze(-1)
+                            ).squeeze(-1)
 
                 token_logprobs = torch.cat(
                     [torch.zeros_like(token_logprobs[:, :1]), token_logprobs], dim=1
                 )
 
-                # Apply mask to zero out padding tokens logprobs
-                token_logprobs = token_logprobs * attention_mask
+                # skip keeping the logprobs for the dummy batches
+                if batch_idx >= iterator_len:
+                    continue
+
+                if not self.enable_seq_packing:
+                    # Apply mask to zero out padding tokens logprobs
+                    token_logprobs = token_logprobs * attention_mask
+                else:
+                    # For packed sequences, unpack logprobs
+                    unpacked_logprobs = torch.zeros(
+                        (batch_size, seq_dim_size),
+                        dtype=token_logprobs.dtype,
+                        device=token_logprobs.device,
+                    )
+                    cu_seqlens = flash_attn_kwargs.cu_seqlens_q
+                    for i in range(batch_size):
+                        start = cu_seqlens[i].item() + 1
+                        end = cu_seqlens[i + 1].item()
+                        seq_len_actual = input_lengths[i].item()
+                        unpacked_logprobs[i, 1:seq_len_actual] = token_logprobs[
+                            0, start:end
+                        ]
+                    token_logprobs = unpacked_logprobs
+
                 all_log_probs.append(token_logprobs)
 
         # Concatenate all batches
diff --git a/nemo_rl/models/policy/lm_policy.py b/nemo_rl/models/policy/lm_policy.py
index c77f2460e7..dbbf5ddc1e 100644
--- a/nemo_rl/models/policy/lm_policy.py
+++ b/nemo_rl/models/policy/lm_policy.py
@@ -24,6 +24,7 @@
 from nemo_rl.distributed.batched_data_dict import (
     BatchedDataDict,
     DynamicBatchingArgs,
+    SequencePackingArgs,
     SlicedDataDict,
 )
 from nemo_rl.distributed.named_sharding import NamedSharding
@@ -140,9 +141,35 @@ def __init__(
                 ],
                 "max_tokens_per_microbatch": 0,  # Override this in each different call (presumably different sizes)
             }
+            assert not config["sequence_packing"]["enabled"], (
+                "Dynamic Batching is exclusive of Sequence Packing. Please disable Sequence Packing to use Dynamic Batching"
+            )
         else:
             self.use_dynamic_batches = False
 
+        if config["sequence_packing"]["enabled"]:
+            assert (
+                config["megatron_cfg"]["enabled"] or config["dtensor_cfg"]["enabled"]
+            ), "Sequence packing requires Megatron or DTensor policies."
+            self.use_sequence_packing = True
+            self.sequence_packing_args: SequencePackingArgs = {
+                "train_mb_tokens": config["sequence_packing"]["train_mb_tokens"],
+                "logprob_mb_tokens": config["sequence_packing"].get(
+                    "logprob_mb_tokens", None
+                ),
+                "algorithm": config["sequence_packing"]["algorithm"],
+                "input_key": "input_ids",
+                "input_lengths_key": "input_lengths",
+                "sequence_length_pad_multiple": (cp_size * 2 * tp_size)
+                if cp_size > 1
+                else tp_size,
+            }
+            assert not config["dynamic_batching"]["enabled"], (
+                "Sequence Packing is exclusive of Dynamic Batching. Please disable Dynamic Batching"
+            )
+        else:
+            self.use_sequence_packing = False
+
         self.cfg = config
 
     def init_collective(
@@ -166,7 +193,6 @@ def get_logprobs(
           The logprob of input token i is specified at position i in the output logprobs tensor.
         """
         dp_size = self.sharding_annotations.get_axis_size("data_parallel")
-        cp_size = self.sharding_annotations.get_axis_size("context_parallel")
         sharded_data: list[SlicedDataDict]
         unsorted_data_indices: list[int]
 
@@ -175,32 +201,40 @@ def get_logprobs(
                 "dynamic_batching"
             ]["logprob_mb_tokens"]
             sharded_data, unsorted_data_indices = data.shard_by_batch_size(  # type: ignore
-                cp_size * dp_size,
+                dp_size,
                 batch_size=None,
                 dynamic_batching_args=self.dynamic_batching_args,
             )
+        elif self.use_sequence_packing:
+            self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[
+                "sequence_packing"
+            ]["logprob_mb_tokens"]
+            # we just shard into DP shards here as Sequence packing allows for CP.
+            sharded_data, unsorted_data_indices = data.shard_by_batch_size(
+                dp_size,
+                batch_size=None,
+                sequence_packing_args=self.sequence_packing_args,
+            )
         else:
             sharded_data = data.shard_by_batch_size(  # type: ignore
-                cp_size * dp_size,
+                dp_size,
                 batch_size=None,
             )
 
-        sharded_data_2d = []
-        shard_idx = 0
-        # Convert to 2d dim array
-        for _ in range(dp_size):
-            cp_data = []
-            for _ in range(cp_size):
-                cp_data.append(sharded_data[shard_idx])
-                shard_idx += 1
-            sharded_data_2d.append(cp_data)
-
         futures = self.worker_group.run_all_workers_sharded_data(
             "get_logprobs",
-            data=sharded_data_2d,
-            in_sharded_axes=["data_parallel", "context_parallel"],
-            replicate_on_axes=["tensor_parallel", "pipeline_parallel"],
-            output_is_replicated=["tensor_parallel", "pipeline_parallel"],
+            data=sharded_data,
+            in_sharded_axes=["data_parallel"],
+            replicate_on_axes=[
+                "context_parallel",
+                "tensor_parallel",
+                "pipeline_parallel",
+            ],
+            output_is_replicated=[
+                "context_parallel",
+                "tensor_parallel",
+                "pipeline_parallel",
+            ],
         )
         logprobs: BatchedDataDict[LogprobOutputSpec] = BatchedDataDict.from_batches(
             self.worker_group.get_all_worker_results(futures)
@@ -208,7 +242,7 @@ def get_logprobs(
 
         # dynamic batching sorts the inputs by sequence length to improve load balancing,
         # so change it back here
-        if self.use_dynamic_batches:
+        if self.use_dynamic_batches or self.use_sequence_packing:
             logprobs.reorder_data(unsorted_data_indices)
 
         return logprobs
@@ -223,7 +257,6 @@ def get_reference_policy_logprobs(
         Returns: Identical to get_logprobs.
         """
         dp_size = self.sharding_annotations.get_axis_size("data_parallel")
-        cp_size = self.sharding_annotations.get_axis_size("context_parallel")
         sharded_data: list[SlicedDataDict]
         unsorted_data_indices: list[int]
         if self.use_dynamic_batches:
@@ -231,32 +264,39 @@ def get_reference_policy_logprobs(
                 "dynamic_batching"
             ]["logprob_mb_tokens"]
             sharded_data, unsorted_data_indices = data.shard_by_batch_size(  # type: ignore
-                cp_size * dp_size,
+                dp_size,
                 batch_size=None,
                 dynamic_batching_args=self.dynamic_batching_args,
             )
+        elif self.use_sequence_packing:
+            self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[
+                "sequence_packing"
+            ]["logprob_mb_tokens"]
+            sharded_data, unsorted_data_indices = data.shard_by_batch_size(
+                dp_size,
+                batch_size=None,
+                sequence_packing_args=self.sequence_packing_args,
+            )
         else:
             sharded_data = data.shard_by_batch_size(  # type: ignore
-                cp_size * dp_size,
+                dp_size,
                 batch_size=None,
             )
 
-        sharded_data_2d = []
-        shard_idx = 0
-        # Convert to 2d dim array
-        for _ in range(dp_size):
-            cp_data = []
-            for _ in range(cp_size):
-                cp_data.append(sharded_data[shard_idx])
-                shard_idx += 1
-            sharded_data_2d.append(cp_data)
-
         futures = self.worker_group.run_all_workers_sharded_data(
             "get_reference_policy_logprobs",
-            data=sharded_data_2d,
-            in_sharded_axes=["data_parallel", "context_parallel"],
-            replicate_on_axes=["tensor_parallel", "pipeline_parallel"],
-            output_is_replicated=["tensor_parallel", "pipeline_parallel"],
+            data=sharded_data,
+            in_sharded_axes=["data_parallel"],
+            replicate_on_axes=[
+                "context_parallel",
+                "tensor_parallel",
+                "pipeline_parallel",
+            ],
+            output_is_replicated=[
+                "context_parallel",
+                "tensor_parallel",
+                "pipeline_parallel",
+            ],
             common_kwargs={"micro_batch_size": micro_batch_size},
         )
         logprobs: BatchedDataDict[ReferenceLogprobOutputSpec] = (
@@ -267,7 +307,7 @@ def get_reference_policy_logprobs(
 
         # dynamic batching sorts the inputs by sequence length to improve load balancing,
         # so change it back here
-        if self.use_dynamic_batches:
+        if self.use_dynamic_batches or self.use_sequence_packing:
             logprobs.reorder_data(unsorted_data_indices)
 
         return logprobs
@@ -294,6 +334,15 @@ def train(
                 batch_size=batch_size,
                 dynamic_batching_args=self.dynamic_batching_args,
             )
+        elif self.use_sequence_packing:
+            self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[
+                "sequence_packing"
+            ]["train_mb_tokens"]
+            sharded_data, _ = data.shard_by_batch_size(
+                dp_size,
+                batch_size=batch_size,
+                sequence_packing_args=self.sequence_packing_args,
+            )
         else:
             sharded_data = data.shard_by_batch_size(
                 dp_size,
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index b0d544aabb..867f27ea1d 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -39,6 +39,8 @@
 from megatron.core.models.gpt import GPTModel
 from megatron.core.optimizer import ChainedOptimizer
 from megatron.core.parallel_state import (
+    get_context_parallel_group,
+    get_context_parallel_rank,
     get_pipeline_model_parallel_group,
     get_pipeline_model_parallel_last_rank,
     get_pipeline_model_parallel_rank,
@@ -86,7 +88,10 @@
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
-from nemo_rl.distributed.model_utils import from_parallel_logits_to_logprobs
+from nemo_rl.distributed.model_utils import (
+    from_parallel_logits_to_logprobs,
+    from_parallel_logits_to_logprobs_packed_sequences,
+)
 from nemo_rl.distributed.named_sharding import NamedSharding
 from nemo_rl.models.generation.interfaces import (
     GenerationDatumSpec,
@@ -94,6 +99,7 @@
     verify_right_padding,
 )
 from nemo_rl.models.megatron.common import (
+    _pack_sequences_for_megatron,
     broadcast_tensor,
     forward_step_arbitrary_loss,
 )
@@ -452,12 +458,11 @@ def __init__(
         model_cfg.sequence_parallel = self.cfg["megatron_cfg"]["sequence_parallel"]
         model_cfg.context_parallel_size = self.cfg["megatron_cfg"][
             "context_parallel_size"
-        ]  # not supported right now
-        assert model_cfg.context_parallel_size == 1, (
-            "Context parallel is not supported right now"
-        )
-
-        ## moe-related
+        ]
+        if model_cfg.context_parallel_size > 1:
+            assert self.cfg["sequence_packing"]["enabled"], (
+                "Sequence Packing must be enabled to use Context Parallelism with MCore"
+            )
         model_cfg.expert_tensor_parallel_size = self.cfg["megatron_cfg"][
             "expert_tensor_parallel_size"
         ]
@@ -568,7 +573,6 @@ def __init__(
             ),
         )
         self.megatron_cfg.validate()
-        print(f"cfg: {self.megatron_cfg}")
         (
             self.mcore_state,
             self.model,
@@ -799,11 +803,32 @@ def train(
                     )
 
                 batch = data.get_batch(batch_idx=gb_idx, batch_size=local_gbs)
+                pack_seqs = False
+                seqlen_key = None
+                pad_factor = 1
+                pad_full_seq_to = None
                 if self.cfg["dynamic_batching"]["enabled"]:
                     data_iterator = batch.make_microbatch_iterator_with_dynamic_shapes()
                     data_iterator_len = (
                         batch.get_microbatch_iterator_dynamic_shapes_len()
                     )
+                elif self.cfg["sequence_packing"]["enabled"]:
+                    data_iterator = (
+                        batch.make_microbatch_iterator_for_packable_sequences()
+                    )
+                    data_iterator_len, seq_dim_size = (
+                        batch.get_microbatch_iterator_for_packable_sequences_len()
+                    )
+                    mbs = 1
+                    pack_seqs = True
+                    seqlen_key = "input_lengths"
+                    tp_size = self.cfg["megatron_cfg"]["tensor_model_parallel_size"]
+                    cp_size = self.cfg["megatron_cfg"]["context_parallel_size"]
+                    pad_factor = cp_size * 2 * tp_size if cp_size > 1 else tp_size
+                    if self.cfg["megatron_cfg"]["pipeline_model_parallel_size"] > 1:
+                        _, pad_full_seq_to = (
+                            batch.get_microbatch_iterator_for_packable_sequences_len()
+                        )
                 else:
                     data_iterator = batch.make_microbatch_iterator(mbs)
                     data_iterator_len = local_gbs // mbs
@@ -822,6 +847,10 @@ def train(
                             self.mcore_state,
                             global_valid_seqs,
                             global_valid_toks,
+                            pack_sequences=pack_seqs,
+                            seq_length_key=seqlen_key,
+                            pad_individual_seqs_to_multiple_of=pad_factor,
+                            pad_full_seq_to=pad_full_seq_to,
                         ),
                         data_iterator=data_iterator,
                         model=self.model,
@@ -929,7 +958,6 @@ def train(
         }
         return metrics
 
-    # Temporary fix, 'data' is a kwarg due to some sort of ray bug
     def get_logprobs(
         self, *, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
     ) -> BatchedDataDict[LogprobOutputSpec]:
@@ -969,33 +997,88 @@ def get_logprobs(
         pp_rank = get_pipeline_model_parallel_rank()
         pp_grp = get_pipeline_model_parallel_group()
         pp_size = get_pipeline_model_parallel_world_size()
+        cp_size = self.cfg["megatron_cfg"]["context_parallel_size"]
+        # if pp_size > 1, we need to pad the full sequence to the max sequence length to maintain a static PP buffer
+        if (
+            self.cfg["sequence_packing"]["enabled"]
+            and self.cfg["megatron_cfg"]["pipeline_model_parallel_size"] > 1
+        ):
+            _, pad_full_seq_to = (
+                data.get_microbatch_iterator_for_packable_sequences_len()
+            )
+            pp_seq_dim_size = pad_full_seq_to
+        else:
+            pad_full_seq_to = None
 
         def forward_step_fn(
             data_iterator: Iterator[BatchedDataDict[Any]], model: GPTModel
         ):
+            nonlocal pad_full_seq_to
             data_dict = next(data_iterator).to("cuda")
-            input_ids = data_dict["input_ids"]
-            attention_mask, _, position_ids = get_ltor_masks_and_position_ids(
-                input_ids, 0, False, False, False
-            )
+            if self.cfg["sequence_packing"]["enabled"]:
+                original_seq_length = data_dict["input_ids"].shape[1]
+                tp_size = self.cfg["megatron_cfg"]["tensor_model_parallel_size"]
+                pp_size = self.cfg["megatron_cfg"]["pipeline_model_parallel_size"]
+                cp_size = self.cfg["megatron_cfg"]["context_parallel_size"]
+                cp_rank = get_context_parallel_rank()
+                pad_factor = cp_size * 2 * tp_size if cp_size > 1 else tp_size
+                (
+                    input_ids,
+                    input_ids_cp_sharded,
+                    packed_seq_params,
+                    cu_seqlens,
+                    cu_seqlens_padded,
+                ) = _pack_sequences_for_megatron(
+                    data_dict["input_ids"].clone(),
+                    data_dict["input_lengths"],
+                    pad_individual_seqs_to_multiple_of=pad_factor,
+                    pad_packed_seq_to=pad_full_seq_to,
+                    cp_rank=cp_rank,
+                    cp_size=cp_size,
+                )
+                attention_mask, position_ids = None, None
+                unpacked_input_ids = data_dict["input_ids"]
+            else:
+                input_ids = data_dict["input_ids"]
+                input_ids_cp_sharded = input_ids
+                attention_mask, _, position_ids = get_ltor_masks_and_position_ids(
+                    input_ids, 0, False, False, False
+                )
+                packed_seq_params = None
+                unpacked_input_ids = input_ids
 
             output_tensor = model(
-                input_ids,
+                input_ids_cp_sharded,
                 position_ids,
                 attention_mask,
+                packed_seq_params=packed_seq_params,
             )
 
             def collection_fn(output_tensor):
+                stc = time.time()
                 tp_grp = get_tensor_model_parallel_group()
                 tp_rank = get_tensor_model_parallel_rank()
-                token_logprobs = from_parallel_logits_to_logprobs(
-                    output_tensor.to(torch.float32),
-                    target=input_ids,
-                    vocab_start_index=tp_rank * output_tensor.shape[-1],
-                    vocab_end_index=(tp_rank + 1) * output_tensor.shape[-1],
-                    tp_group=tp_grp,
-                    inference_only=True,
-                )
+                if self.cfg["sequence_packing"]["enabled"]:
+                    token_logprobs = from_parallel_logits_to_logprobs_packed_sequences(
+                        output_tensor,
+                        target=input_ids,
+                        cu_seqlens_padded=cu_seqlens_padded,
+                        unpacked_seqlen=original_seq_length,
+                        vocab_start_index=tp_rank * output_tensor.shape[-1],
+                        vocab_end_index=(tp_rank + 1) * output_tensor.shape[-1],
+                        group=tp_grp,
+                        inference_only=True,
+                        cp_group=get_context_parallel_group(),
+                    )
+                else:
+                    token_logprobs = from_parallel_logits_to_logprobs(
+                        output_tensor.to(torch.float32),
+                        target=unpacked_input_ids,
+                        vocab_start_index=tp_rank * output_tensor.shape[-1],
+                        vocab_end_index=(tp_rank + 1) * output_tensor.shape[-1],
+                        tp_group=tp_grp,
+                        inference_only=True,
+                    )
 
                 # Prepend 0 logprob for first token to maintain same sequence length as input
                 token_logprobs = torch.cat(
@@ -1010,10 +1093,17 @@ def collection_fn(output_tensor):
         if self.cfg["dynamic_batching"]["enabled"]:
             mb_iterator = data.make_microbatch_iterator_with_dynamic_shapes()
             data_iterator_len = data.get_microbatch_iterator_dynamic_shapes_len()
+            micro_batch_size = logprob_batch_size
+        elif self.cfg["sequence_packing"]["enabled"]:
+            mb_iterator = data.make_microbatch_iterator_for_packable_sequences()
+            data_iterator_len, _ = (
+                data.get_microbatch_iterator_for_packable_sequences_len()
+            )
+            micro_batch_size = 1
         else:
             mb_iterator = data.make_microbatch_iterator(logprob_batch_size)
             data_iterator_len = max(1, data.size // logprob_batch_size)
-        micro_batch_size = logprob_batch_size
+            micro_batch_size = logprob_batch_size
 
         forward_backward_func = get_forward_backward_func()
         list_of_logprobs = forward_backward_func(
diff --git a/pyproject.toml b/pyproject.toml
index 7480d6e590..75e41feba7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,6 @@
 requires = ["setuptools>=42", "wheel"]
 build-backend = "setuptools.build_meta"
 
-
 [tool.setuptools]
 packages = ["nemo_rl"]
 
diff --git a/tests/functional/test_converter_roundtrip.py b/tests/functional/test_converter_roundtrip.py
index 9679fcc724..c0c4b2fdd8 100644
--- a/tests/functional/test_converter_roundtrip.py
+++ b/tests/functional/test_converter_roundtrip.py
@@ -81,6 +81,7 @@ def create_test_config() -> Dict[str, Any]:
                 "custom_parallel_plan": None,
             },
             "dynamic_batching": {"enabled": False},
+            "sequence_packing": {"enabled": False},
             "make_sequence_length_divisible_by": 1,
             "max_grad_norm": 1.0,
             "optimizer": {
diff --git a/tests/unit/algorithms/__init__.py b/tests/unit/algorithms/__init__.py
index e69de29bb2..341a77c5bc 100644
--- a/tests/unit/algorithms/__init__.py
+++ b/tests/unit/algorithms/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit/algorithms/test_grpo.py b/tests/unit/algorithms/test_grpo.py
index c81cf8a686..b387d1e2f0 100644
--- a/tests/unit/algorithms/test_grpo.py
+++ b/tests/unit/algorithms/test_grpo.py
@@ -77,17 +77,7 @@ def create_mock_batch(
 
 
 @pytest.fixture(scope="module")
-def ray_init():
-    """Initialize Ray for testing."""
-    if not ray.is_initialized():
-        ray.init(ignore_reinit_error=True)
-    yield
-    if ray.is_initialized():
-        ray.shutdown()
-
-
-@pytest.fixture(scope="module")
-def mock_env(ray_init):
+def mock_env():
     """Create a mock environment for single task tests."""
     env = MockEnvironment.remote(rewards=[1.0, 2.0])
     yield env
@@ -95,7 +85,7 @@ def mock_env(ray_init):
 
 
 @pytest.fixture(scope="module")
-def mock_envs(ray_init):
+def mock_envs():
     """Create mock environments for multiple task tests."""
     math_env = MockEnvironment.remote(rewards=[1.0, 2.0])
     code_env = MockEnvironment.remote(rewards=[3.0, 4.0])
diff --git a/tests/unit/algorithms/test_sequence_packing_gradients.py b/tests/unit/algorithms/test_sequence_packing_gradients.py
new file mode 100644
index 0000000000..33d858fbe4
--- /dev/null
+++ b/tests/unit/algorithms/test_sequence_packing_gradients.py
@@ -0,0 +1,449 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test script to debug high gradients with sequence packing + context parallelism."""
+
+import os
+
+import pytest
+import ray
+import torch
+
+from nemo_rl.algorithms.loss_functions import (
+    ClippedPGLossFn,
+    SequencePackingLossWrapper,
+)
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.named_sharding import NamedSharding
+from nemo_rl.distributed.ray_actor_environment_registry import (
+    ACTOR_ENVIRONMENT_REGISTRY,
+    PY_EXECUTABLES,
+)
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.distributed.worker_groups import RayWorkerBuilder, RayWorkerGroup
+
+
+@ray.remote(num_gpus=1)
+class SequencePackingGradientTestActor:
+    def __init__(self, cp_size):
+        self.cp_size = cp_size
+        self.env_vars = dict(os.environ)
+
+    def test_sequence_packing_gradients(self):
+        from nemo_rl.distributed.model_utils import _get_tokens_on_this_cp_rank
+        from nemo_rl.models.megatron.common import (
+            _pack_sequences_for_megatron,
+            forward_step_arbitrary_loss,
+        )
+
+        # Initialize process group
+        torch.distributed.init_process_group(backend="nccl")
+
+        rank = int(os.environ["RANK"])
+        world_size = int(os.environ["WORLD_SIZE"])
+
+        # Create CP group - all ranks participate in CP
+        cp_group = torch.distributed.new_group(ranks=list(range(world_size)))
+
+        # Patch get_context_parallel_group to always return cp_group
+        # (Assume it's imported from nemo_rl.models.megatron.common)
+        import megatron.core.parallel_state as parallel_state
+
+        parallel_state._CONTEXT_PARALLEL_GROUP = cp_group
+        parallel_state._TENSOR_MODEL_PARALLEL_GROUP = torch.distributed.new_group(
+            ranks=[rank]
+        )
+
+        # Test parameters
+        batch_size = 4
+        max_seq_len = 512
+        vocab_size = 1000
+        cp_size = self.cp_size
+
+        # Ensure sequence length is compatible with CP load balancing
+        if max_seq_len % (2 * cp_size) != 0:
+            max_seq_len = (max_seq_len // (2 * cp_size) + 1) * (2 * cp_size)
+
+        # Create test data with varying sequence lengths
+        torch.manual_seed(42)  # For reproducibility
+        seq_lengths = torch.tensor(
+            [
+                max_seq_len // 4,
+                max_seq_len * 1 // 4,
+                max_seq_len // 4,
+                max_seq_len * 3 // 4,
+            ]
+        )
+
+        # Create input data
+        input_ids = torch.zeros(
+            batch_size, max_seq_len, dtype=torch.long, device="cuda"
+        )
+        token_mask = torch.zeros(
+            batch_size, max_seq_len, dtype=torch.float, device="cuda"
+        )
+
+        # Fill with random tokens up to seq_length
+        for i in range(batch_size):
+            length = seq_lengths[i]
+            input_ids[i, :length] = torch.randint(
+                0, vocab_size, (length,), device="cuda"
+            )
+            token_mask[i, :length] = 1.0
+
+        # Create other required tensors
+        sample_mask = torch.ones(batch_size, dtype=torch.float, device="cuda")
+        advantages = torch.randn(batch_size, max_seq_len, device="cuda")
+        prev_logprobs = torch.randn(batch_size, max_seq_len, device="cuda")
+        generation_logprobs = torch.randn(batch_size, max_seq_len, device="cuda")
+        reference_policy_logprobs = generation_logprobs.clone()
+
+        original_data = {
+            "input_ids": input_ids,
+            "input_lengths": seq_lengths,
+            "token_mask": token_mask,
+            "sample_mask": sample_mask,
+            "advantages": advantages,
+            "prev_logprobs": prev_logprobs,
+            "generation_logprobs": generation_logprobs,
+            "reference_policy_logprobs": reference_policy_logprobs,
+        }
+
+        # ===== TEST 1: Baseline (no sequence packing) =====
+        print(f"Rank {rank}: Testing baseline (no sequence packing)")
+
+        baseline_logits = torch.randn(
+            batch_size, max_seq_len, vocab_size, requires_grad=True, device="cuda"
+        )
+
+        loss_config = {
+            "reference_policy_kl_penalty": 0.1,
+            "ratio_clip_min": 0.2,
+            "ratio_clip_max": 0.2,
+            "ratio_clip_c": 3.0,
+            "use_on_policy_kl_approximation": False,
+            "use_importance_sampling_correction": False,
+            "token_level_loss": True,
+        }
+
+        base_loss_fn = ClippedPGLossFn(loss_config)
+        data_dict = BatchedDataDict(original_data)
+
+        global_valid_toks = torch.tensor(
+            sum(seq_lengths).item(), dtype=torch.float, device="cuda"
+        )
+        global_valid_seqs = torch.tensor(batch_size, dtype=torch.float, device="cuda")
+
+        # Forward pass
+        baseline_loss, baseline_metrics = base_loss_fn(
+            baseline_logits,
+            data_dict,
+            global_valid_seqs,
+            global_valid_toks,
+        )
+
+        # Backward pass
+        baseline_loss.backward()
+
+        # Check baseline gradients
+        baseline_grad_norm = torch.norm(baseline_logits.grad).item()
+        baseline_grad_max = torch.max(torch.abs(baseline_logits.grad)).item()
+        baseline_grad_mean = torch.mean(torch.abs(baseline_logits.grad)).item()
+        baseline_grad_store = baseline_logits.grad.clone()
+        baseline_logits.grad.zero_()
+
+        print(
+            f"Rank {rank}: Baseline gradient stats - norm: {baseline_grad_norm:.4f}, max: {baseline_grad_max:.4f}, mean: {baseline_grad_mean:.4f}"
+        )
+
+        # ===== TEST 2: Sequence packing with context parallelism =====
+        print(f"Rank {rank}: Testing with sequence packing + CP")
+
+        # Pack sequences
+        pad_to_multiple = cp_size * 2  # Common requirement for CP
+        (
+            packed_input_ids,
+            packed_input_ids_cp,
+            packed_seq_params,
+            cu_seqlens,
+            cu_seqlens_padded,
+        ) = _pack_sequences_for_megatron(
+            input_ids,
+            seq_lengths,
+            pad_individual_seqs_to_multiple_of=pad_to_multiple,
+            pad_packed_seq_to=max_seq_len * batch_size if cp_size > 1 else None,
+            cp_rank=rank,
+            cp_size=cp_size,
+        )
+
+        # For CP, logits are sharded across context parallel ranks
+        def make_packed_logits(logits):
+            packed_logits = torch.zeros(
+                1, packed_input_ids_cp.shape[1], vocab_size, device="cuda"
+            )
+            run_seq = 0
+            for i, seq_len in enumerate(seq_lengths):
+                padded_seqlen = cu_seqlens_padded[i + 1] - cu_seqlens_padded[i]
+                if padded_seqlen > baseline_logits.shape[1]:
+                    # pad the logits with zeros
+                    tmp_logits = torch.zeros(
+                        1, padded_seqlen, vocab_size, device="cuda"
+                    )
+                    tmp_logits[:, :seq_len] = baseline_logits[i : i + 1, :seq_len]
+                else:
+                    tmp_logits = baseline_logits[i : i + 1, :padded_seqlen]
+                packed_logits[
+                    :, run_seq // cp_size : (run_seq + padded_seqlen) // cp_size, :
+                ] = _get_tokens_on_this_cp_rank(tmp_logits, rank, cp_size)
+                run_seq += padded_seqlen
+            return packed_logits
+
+        packed_logits = make_packed_logits(baseline_logits)
+
+        # Create sequence packing wrapper
+        wrapper = SequencePackingLossWrapper(
+            loss_fn=base_loss_fn,
+            cu_seqlens_q=cu_seqlens,
+            cu_seqlens_q_padded=cu_seqlens_padded,
+        )
+
+        # Create data dict for packed sequences
+        packed_data_dict = BatchedDataDict(original_data)
+
+        tp_group = torch.distributed.new_group(ranks=[rank])
+
+        # Forward pass
+        packed_loss, packed_metrics = wrapper(
+            packed_logits,
+            packed_data_dict,
+            global_valid_seqs,
+            global_valid_toks,
+            vocab_parallel_rank=0,
+            vocab_parallel_group=tp_group,
+            context_parallel_group=cp_group,
+        )
+
+        # Backward pass
+        packed_loss /= cp_size
+        packed_loss.backward()
+
+        # Check packed gradients
+        packed_grad = baseline_logits.grad.clone()
+        # all-reduce across cp ranks
+        torch.distributed.all_reduce(packed_grad, op=torch.distributed.ReduceOp.SUM)
+
+        packed_grad_norm = torch.norm(packed_grad).item()
+        packed_grad_max = torch.max(torch.abs(packed_grad)).item()
+        packed_grad_mean = torch.mean(torch.abs(packed_grad)).item()
+        # print(f"max grad on dims {torch.max(torch.abs(packed_grad), dim=0)}, {torch.max(torch.abs(packed_grad), dim=1)}, {torch.max(torch.abs(packed_grad), dim=2)}")
+
+        print(
+            f"Rank {rank}: Packed gradient stats - norm: {packed_grad_norm:.4f}, max: {packed_grad_max:.4f}, mean: {packed_grad_mean:.4f}"
+        )
+
+        # ===== ANALYSIS =====
+        gradient_ratio_norm = (
+            packed_grad_norm / baseline_grad_norm
+            if baseline_grad_norm > 0
+            else float("inf")
+        )
+        gradient_ratio_max = (
+            packed_grad_max / baseline_grad_max
+            if baseline_grad_max > 0
+            else float("inf")
+        )
+        gradient_ratio_mean = (
+            packed_grad_mean / baseline_grad_mean
+            if baseline_grad_mean > 0
+            else float("inf")
+        )
+
+        print(
+            f"Rank {rank}: Gradient ratios - norm: {gradient_ratio_norm:.4f}, max: {gradient_ratio_max:.4f}, mean: {gradient_ratio_mean:.4f}"
+        )
+
+        print(
+            f"differences by token: {torch.sum(torch.abs(packed_grad - baseline_grad_store), dim=-1)}"
+        )
+
+        torch.testing.assert_close(
+            packed_grad, baseline_grad_store, atol=1e-5, rtol=1e-5
+        )
+
+        # test 3: with forward_step_arbitrary_loss
+        # reset grad
+        baseline_logits.grad.zero_()
+        packed_logits = make_packed_logits(baseline_logits)
+
+        # mock model forward
+        class MockModel:
+            def __init__(self):
+                self.logits = packed_logits
+
+            def __call__(self, *args, **kwargs):
+                return self.logits
+
+            def forward(
+                self, input_ids, position_ids, attention_mask, packed_seq_params=None
+            ):
+                return self.logits
+
+        class MockMcoreState:
+            def __init__(self):
+                # context that does nothing, but supports both with straggler_timer and with straggler_timer(bdata=True)
+                from contextlib import nullcontext
+
+                class DummyStragglerTimer:
+                    def __call__(self, *args, **kwargs):
+                        return nullcontext()
+
+                    def __enter__(self):
+                        return self
+
+                    def __exit__(self, exc_type, exc_val, exc_tb):
+                        pass
+
+                self.straggler_timer = DummyStragglerTimer()
+
+        output_tensor, wrapped_loss_fn = forward_step_arbitrary_loss(
+            MockMcoreState(),
+            global_valid_seqs,
+            global_valid_toks,
+            data_iterator=iter([packed_data_dict]),
+            model=MockModel(),
+            loss_fn=base_loss_fn,
+            pack_sequences=True,
+            seq_length_key="input_lengths",
+            pad_individual_seqs_to_multiple_of=pad_to_multiple,
+            pad_full_seq_to=max_seq_len * batch_size if cp_size > 1 else None,
+            cp_normalize=True,
+        )
+        loss, metrics = wrapped_loss_fn(output_tensor)
+
+        loss.backward()
+
+        # Check packed gradients
+        packed_grad = baseline_logits.grad.clone()
+        # all-reduce across cp ranks
+        torch.distributed.all_reduce(packed_grad, op=torch.distributed.ReduceOp.SUM)
+
+        packed_grad_norm = torch.norm(packed_grad).item()
+        packed_grad_max = torch.max(torch.abs(packed_grad)).item()
+        packed_grad_mean = torch.mean(torch.abs(packed_grad)).item()
+        print(
+            f"Rank {rank}: Packed gradient stats - norm: {packed_grad_norm:.4f}, max: {packed_grad_max:.4f}, mean: {packed_grad_mean:.4f}"
+        )
+
+        gradient_ratio_norm = (
+            packed_grad_norm / baseline_grad_norm
+            if baseline_grad_norm > 0
+            else float("inf")
+        )
+        gradient_ratio_max = (
+            packed_grad_max / baseline_grad_max
+            if baseline_grad_max > 0
+            else float("inf")
+        )
+
+        print(
+            f"Rank {rank}: Gradient ratios - norm: {gradient_ratio_norm:.4f}, max: {gradient_ratio_max:.4f}"
+        )
+        print(
+            f"differences by token: {torch.sum(torch.abs(packed_grad - baseline_grad_store), dim=-1)}"
+        )
+
+
+SEQUENCE_PACKING_GRADIENT_TEST_ACTOR_FQN = (
+    f"{SequencePackingGradientTestActor.__module__}.SequencePackingGradientTestActor"
+)
+
+
+@pytest.fixture
+def register_sequence_packing_gradient_test_actor():
+    """Register the SequencePackingGradientTestActor for use in tests."""
+    original_registry_value = ACTOR_ENVIRONMENT_REGISTRY.get(
+        SEQUENCE_PACKING_GRADIENT_TEST_ACTOR_FQN
+    )
+    ACTOR_ENVIRONMENT_REGISTRY[SEQUENCE_PACKING_GRADIENT_TEST_ACTOR_FQN] = (
+        PY_EXECUTABLES.MCORE
+    )
+
+    yield SEQUENCE_PACKING_GRADIENT_TEST_ACTOR_FQN
+
+    # Clean up registry
+    if SEQUENCE_PACKING_GRADIENT_TEST_ACTOR_FQN in ACTOR_ENVIRONMENT_REGISTRY:
+        if original_registry_value is None:
+            del ACTOR_ENVIRONMENT_REGISTRY[SEQUENCE_PACKING_GRADIENT_TEST_ACTOR_FQN]
+        else:
+            ACTOR_ENVIRONMENT_REGISTRY[SEQUENCE_PACKING_GRADIENT_TEST_ACTOR_FQN] = (
+                original_registry_value
+            )
+
+
+@pytest.fixture(scope="function")
+def cluster_fixture(request):
+    """Create and teardown a virtual cluster for CP tests."""
+    cp_size = request.node.callspec.params["cp_size"]
+
+    # Skip if not enough GPUs
+    if not torch.cuda.is_available() or torch.cuda.device_count() < cp_size:
+        pytest.skip(
+            f"Not enough GPUs available. Need {cp_size}, got {torch.cuda.device_count()}"
+        )
+
+    # Mysteriously, Ray is not initialized in this test, so we need to initialize it here.
+    if not ray.is_initialized():
+        print("Ray not initialized, initializing now...")
+        from nemo_rl.distributed.virtual_cluster import init_ray
+
+        init_ray()
+        print("Ray initialized successfully")
+    else:
+        print("Ray is already initialized")
+
+    cluster_name = f"test-sequence-packing-cp{cp_size}"
+    print(f"Creating virtual cluster '{cluster_name}' for {cp_size} GPUs...")
+
+    cluster = RayVirtualCluster(
+        name=cluster_name, bundle_ct_per_node_list=[cp_size], use_gpus=True
+    )
+    yield cluster
+    print(f"Shutting down cluster '{cluster_name}'...")
+    cluster.shutdown()
+
+
+@pytest.mark.parametrize("cp_size", [1, 2])
+def test_sequence_packing_gradients_with_cp(
+    cluster_fixture, register_sequence_packing_gradient_test_actor, cp_size
+):
+    """Test sequence packing gradients with context parallelism."""
+    cluster = cluster_fixture
+    actor_fqn = register_sequence_packing_gradient_test_actor
+
+    # For CP, all ranks are in a single group
+    sharding = NamedSharding(layout=list(range(cp_size)), names=["cp"])
+    builder = RayWorkerBuilder(actor_fqn, cp_size)
+
+    worker_group = RayWorkerGroup(
+        cluster=cluster,
+        remote_worker_builder=builder,
+        workers_per_node=None,
+        sharding_annotations=sharding,
+    )
+
+    # Run the test on all workers
+    futures = worker_group.run_all_workers_single_data(
+        "test_sequence_packing_gradients"
+    )
+    _ = ray.get(futures)
+    worker_group.shutdown(force=True)
diff --git a/tests/unit/data/packing/__init__.py b/tests/unit/data/packing/__init__.py
new file mode 100644
index 0000000000..913e5a1c57
--- /dev/null
+++ b/tests/unit/data/packing/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for sequence packing algorithms."""
diff --git a/tests/unit/data/packing/test_algorithms.py b/tests/unit/data/packing/test_algorithms.py
new file mode 100644
index 0000000000..a47951969e
--- /dev/null
+++ b/tests/unit/data/packing/test_algorithms.py
@@ -0,0 +1,326 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for sequence packing algorithms."""
+
+import random
+from typing import Dict, List
+
+import pytest
+
+from nemo_rl.data.packing.algorithms import (
+    PackingAlgorithm,
+    SequencePacker,
+    get_packer,
+)
+
+
+def validate_solution(
+    sequence_lengths: List[int], bins: List[List[int]], bin_capacity: int
+) -> bool:
+    """Validate that a packing solution is valid.
+
+    Args:
+        sequence_lengths: The original list of sequence lengths.
+        bins: The packing solution, where each bin is a list of indices into sequence_lengths.
+        bin_capacity: The maximum capacity of each bin.
+
+    Returns:
+        True if the packing is valid, False otherwise.
+    """
+    # Check that all sequences are packed
+    all_indices = set()
+    for bin_indices in bins:
+        all_indices.update(bin_indices)
+
+    if len(all_indices) != len(sequence_lengths):
+        return False
+
+    # Check that each bin doesn't exceed capacity
+    for bin_indices in bins:
+        bin_load = sum(sequence_lengths[idx] for idx in bin_indices)
+        if bin_load > bin_capacity:
+            return False
+
+    return True
+
+
+class TestSequencePacker:
+    """Test suite for sequence packing algorithms."""
+
+    @pytest.fixture
+    def bin_capacity(self) -> int:
+        """Fixture for bin capacity."""
+        return 100
+
+    @pytest.fixture
+    def small_sequence_lengths(self) -> List[int]:
+        """Fixture for a small list of sequence lengths."""
+        return [10, 20, 30, 40, 50, 60, 70, 80, 90]
+
+    @pytest.fixture
+    def medium_sequence_lengths(self) -> List[int]:
+        """Fixture for a medium-sized list of sequence lengths."""
+        return [25, 35, 45, 55, 65, 75, 85, 95, 15, 25, 35, 45, 55, 65, 75, 85, 95]
+
+    @pytest.fixture
+    def large_sequence_lengths(self) -> List[int]:
+        """Fixture for a large list of sequence lengths."""
+        # Set a seed for reproducibility
+        random.seed(42)
+        return [random.randint(10, 90) for _ in range(100)]
+
+    @pytest.fixture
+    def edge_cases(self) -> Dict[str, List[int]]:
+        """Fixture for edge cases."""
+        return {
+            "empty": [],
+            "single_item": [50],
+            "all_same_size": [30, 30, 30, 30, 30],
+            "max_size": [100, 100, 100],
+            "mixed_sizes": [10, 50, 100, 20, 80, 30, 70, 40, 60, 90],
+        }
+
+    # TODO(ahmadki): use the function to specify all test algorithms ins tead of lists below
+    @pytest.fixture
+    def algorithms(self) -> List[PackingAlgorithm]:
+        """Fixture for packing algorithms."""
+        return [
+            PackingAlgorithm.CONCATENATIVE,
+            PackingAlgorithm.FIRST_FIT_DECREASING,
+            PackingAlgorithm.FIRST_FIT_SHUFFLE,
+            PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING,
+        ]
+
+    def test_get_packer(self, bin_capacity: int, algorithms: List[PackingAlgorithm]):
+        """Test the get_packer factory function."""
+        # Test that each algorithm name returns the correct packer
+        for algorithm in algorithms:
+            packer = get_packer(algorithm, bin_capacity)
+            assert isinstance(packer, SequencePacker)
+
+        # Test with an invalid algorithm value
+        with pytest.raises(ValueError):
+            # Create a non-existent enum value by using an arbitrary object
+            invalid_algorithm = object()
+            get_packer(invalid_algorithm, bin_capacity)  # type: ignore
+
+    @pytest.mark.parametrize(
+        "algorithm",
+        [
+            PackingAlgorithm.CONCATENATIVE,
+            PackingAlgorithm.FIRST_FIT_DECREASING,
+            PackingAlgorithm.FIRST_FIT_SHUFFLE,
+            PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING,
+        ],
+    )
+    def test_small_sequences(
+        self,
+        bin_capacity: int,
+        small_sequence_lengths: List[int],
+        algorithm: PackingAlgorithm,
+    ):
+        """Test packing small sequences with all algorithms."""
+        packer = get_packer(algorithm, bin_capacity)
+        bins = packer.pack(small_sequence_lengths)
+
+        # Validate the packing
+        assert validate_solution(small_sequence_lengths, bins, bin_capacity)
+
+        # Print the number of bins used (for information)
+        print(f"{algorithm.name} used {len(bins)} bins for small sequences")
+
+    @pytest.mark.parametrize(
+        "algorithm",
+        [
+            PackingAlgorithm.CONCATENATIVE,
+            PackingAlgorithm.FIRST_FIT_DECREASING,
+            PackingAlgorithm.FIRST_FIT_SHUFFLE,
+            PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING,
+        ],
+    )
+    def test_medium_sequences(
+        self,
+        bin_capacity: int,
+        medium_sequence_lengths: List[int],
+        algorithm: PackingAlgorithm,
+    ):
+        """Test packing medium-sized sequences with all algorithms."""
+        packer = get_packer(algorithm, bin_capacity)
+        bins = packer.pack(medium_sequence_lengths)
+
+        # Validate the packing
+        assert validate_solution(medium_sequence_lengths, bins, bin_capacity)
+
+        # Print the number of bins used (for information)
+        print(f"{algorithm.name} used {len(bins)} bins for medium sequences")
+
+    @pytest.mark.parametrize(
+        "algorithm",
+        [
+            PackingAlgorithm.CONCATENATIVE,
+            PackingAlgorithm.FIRST_FIT_DECREASING,
+            PackingAlgorithm.FIRST_FIT_SHUFFLE,
+            PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING,
+        ],
+    )
+    def test_large_sequences(
+        self,
+        bin_capacity: int,
+        large_sequence_lengths: List[int],
+        algorithm: PackingAlgorithm,
+    ):
+        """Test packing large sequences with all algorithms."""
+        packer = get_packer(algorithm, bin_capacity)
+        bins = packer.pack(large_sequence_lengths)
+
+        # Validate the packing
+        assert validate_solution(large_sequence_lengths, bins, bin_capacity)
+
+        # Print the number of bins used (for information)
+        print(f"{algorithm.name} used {len(bins)} bins for large sequences")
+
+    @pytest.mark.parametrize(
+        "algorithm",
+        [
+            PackingAlgorithm.CONCATENATIVE,
+            PackingAlgorithm.FIRST_FIT_DECREASING,
+            PackingAlgorithm.FIRST_FIT_SHUFFLE,
+            PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING,
+        ],
+    )
+    # TODO(ahmadki): use the function to specify all test algorithms instead of lists below
+    @pytest.mark.parametrize(
+        "case_name, sequence_lengths",
+        [
+            ("single_item", [50]),
+            ("all_same_size", [30, 30, 30, 30, 30]),
+            ("max_size", [100, 100, 100]),
+            ("mixed_sizes", [10, 50, 100, 20, 80, 30, 70, 40, 60, 90]),
+        ],
+    )
+    def test_edge_cases(
+        self,
+        bin_capacity: int,
+        algorithm: PackingAlgorithm,
+        case_name: str,
+        sequence_lengths: List[int],
+    ):
+        """Test edge cases with all algorithms."""
+        packer = get_packer(algorithm, bin_capacity)
+        bins = packer.pack(sequence_lengths)
+
+        # Validate the packing
+        assert validate_solution(sequence_lengths, bins, bin_capacity)
+
+        # For single item, check that only one bin is created
+        if case_name == "single_item":
+            assert len(bins) == 1
+
+    @pytest.mark.parametrize(
+        "algorithm",
+        [
+            PackingAlgorithm.CONCATENATIVE,
+            PackingAlgorithm.FIRST_FIT_DECREASING,
+            PackingAlgorithm.FIRST_FIT_SHUFFLE,
+            PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING,
+        ],
+    )
+    def test_empty_list(self, bin_capacity: int, algorithm: PackingAlgorithm):
+        """Test empty list with algorithms that can handle it."""
+        packer = get_packer(algorithm, bin_capacity)
+        bins = packer.pack([])
+
+        # For empty list, check that no bins are created
+        assert len(bins) == 0
+
+    @pytest.mark.parametrize(
+        "algorithm",
+        [
+            PackingAlgorithm.CONCATENATIVE,
+            PackingAlgorithm.FIRST_FIT_DECREASING,
+            PackingAlgorithm.FIRST_FIT_SHUFFLE,
+            PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING,
+        ],
+    )
+    def test_error_cases(self, bin_capacity: int, algorithm: PackingAlgorithm):
+        """Test error cases with all algorithms."""
+        # Test with a sequence length that exceeds bin capacity
+        sequence_lengths = [50, 150, 70]  # 150 > bin_capacity (100)
+
+        packer = get_packer(algorithm, bin_capacity)
+        with pytest.raises(ValueError):
+            packer.pack(sequence_lengths)
+
+    @pytest.mark.parametrize(
+        "algorithm",
+        [
+            PackingAlgorithm.CONCATENATIVE,
+            PackingAlgorithm.FIRST_FIT_DECREASING,
+            PackingAlgorithm.MODIFIED_FIRST_FIT_DECREASING,
+        ],
+    )
+    def test_deterministic(
+        self,
+        bin_capacity: int,
+        medium_sequence_lengths: List[int],
+        algorithm: PackingAlgorithm,
+    ):
+        """Test that deterministic algorithms produce the same result on multiple runs."""
+        packer = get_packer(algorithm, bin_capacity)
+
+        # Run the algorithm twice and check that the results are the same
+        bins1 = packer.pack(medium_sequence_lengths)
+        bins2 = packer.pack(medium_sequence_lengths)
+
+        # Convert to a format that can be compared (sort each bin and then sort the bins)
+        sorted_bins1 = sorted([sorted(bin_indices) for bin_indices in bins1])
+        sorted_bins2 = sorted([sorted(bin_indices) for bin_indices in bins2])
+
+        assert sorted_bins1 == sorted_bins2
+
+    @pytest.mark.parametrize(
+        "algorithm",
+        [
+            PackingAlgorithm.FIRST_FIT_SHUFFLE,
+        ],
+    )
+    def test_randomized(
+        self,
+        bin_capacity: int,
+        medium_sequence_lengths: List[int],
+        algorithm: PackingAlgorithm,
+    ):
+        """Test that randomized algorithms can produce different results on multiple runs."""
+        # Note: This test might occasionally fail due to randomness
+
+        # Set different seeds to ensure different random behavior
+        random.seed(42)
+        packer1 = get_packer(algorithm, bin_capacity)
+        bins1 = packer1.pack(medium_sequence_lengths)
+
+        random.seed(43)
+        packer2 = get_packer(algorithm, bin_capacity)
+        bins2 = packer2.pack(medium_sequence_lengths)
+
+        # Convert to a format that can be compared
+        sorted_bins1 = sorted([sorted(bin_indices) for bin_indices in bins1])
+        sorted_bins2 = sorted([sorted(bin_indices) for bin_indices in bins2])
+
+        # Check if the results are different
+        # This is a weak test, as randomness might still produce the same result
+        if sorted_bins1 == sorted_bins2:
+            print(
+                f"Warning: {algorithm.name} produced the same result with different seeds"
+            )
diff --git a/tests/unit/distributed/test_batched_data_dict.py b/tests/unit/distributed/test_batched_data_dict.py
index 6b6c95c092..eaebf2dd8a 100644
--- a/tests/unit/distributed/test_batched_data_dict.py
+++ b/tests/unit/distributed/test_batched_data_dict.py
@@ -14,7 +14,11 @@
 import pytest
 import torch
 
-from nemo_rl.distributed.batched_data_dict import BatchedDataDict, DynamicBatchingArgs
+from nemo_rl.distributed.batched_data_dict import (
+    BatchedDataDict,
+    DynamicBatchingArgs,
+    SequencePackingArgs,
+)
 
 
 def test_shard_by_batch_size_basic():
@@ -236,3 +240,367 @@ def test_shard_by_batch_size_dynamic():
             batch_size, seqlen = mb["data"].shape
             assert seqlen % 4 == 0
             assert seqlen <= 32
+
+
+def test_sequence_packing_basic():
+    """Test basic functionality of sequence packing with modified FFD algorithm."""
+    # Create sample data with varying sequence lengths
+    batch_size = 8
+    max_seq_length = 512
+
+    # Generate random sequence lengths between 50 and 400
+    torch.manual_seed(42)
+    sequence_lengths = torch.randint(50, 400, (batch_size,))
+
+    # Create input tensors with padding
+    input_ids = []
+    for seq_len in sequence_lengths:
+        # Create a sequence with actual tokens up to seq_len, then padding
+        seq = torch.cat(
+            [
+                torch.randint(1, 1000, (seq_len,)),  # Actual tokens
+                torch.zeros(max_seq_length - seq_len, dtype=torch.long),  # Padding
+            ]
+        )
+        input_ids.append(seq)
+
+    input_ids = torch.stack(input_ids)
+
+    # Create batch data dict
+    batch_data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "sequence_lengths": sequence_lengths,
+            "problem_ids": torch.arange(batch_size),
+        }
+    )
+
+    # Configure sequence packing
+    sequence_packing_args = SequencePackingArgs(
+        max_tokens_per_microbatch=1024,
+        input_key="input_ids",
+        input_lengths_key="sequence_lengths",
+        algorithm="modified_first_fit_decreasing",
+        sequence_length_pad_multiple=1,
+    )
+
+    # Shard the batch with sequence packing
+    shards = 2
+    sharded_batches, sorted_indices = batch_data.shard_by_batch_size(
+        shards=shards, sequence_packing_args=sequence_packing_args
+    )
+
+    # Verify output structure
+    assert len(sharded_batches) == shards
+    assert len(sorted_indices) == batch_size
+
+    # Verify each shard has microbatch indices and lengths
+    for shard in sharded_batches:
+        assert hasattr(shard, "micro_batch_indices")
+        assert hasattr(shard, "micro_batch_lengths")
+        assert len(shard.micro_batch_indices) > 0
+        assert len(shard.micro_batch_lengths) > 0
+
+        problem_ids_seen = set()
+
+        # Verify microbatch structure
+        for chunk_indices, chunk_lengths in zip(
+            shard.micro_batch_indices, shard.micro_batch_lengths
+        ):
+            assert len(chunk_indices) == len(chunk_lengths)
+
+            # Verify each microbatch respects the token limit
+            for (start_idx, end_idx), packed_len in zip(chunk_indices, chunk_lengths):
+                assert packed_len <= sequence_packing_args["max_tokens_per_microbatch"]
+
+        for s in sharded_batches:
+            for mb in s.make_microbatch_iterator_for_packable_sequences():
+                mb_len = mb["sequence_lengths"].sum().item()
+                assert mb_len <= sequence_packing_args["max_tokens_per_microbatch"]
+                for i in range(mb["input_ids"].shape[0]):
+                    problem_id = mb["problem_ids"][i].item()
+                    assert problem_id not in problem_ids_seen, (
+                        f"Problem ID {problem_id} seen twice"
+                    )
+                    problem_ids_seen.add(problem_id)
+        assert len(problem_ids_seen) == batch_size
+
+
+def test_sequence_packing_uniform_lengths():
+    """Test sequence packing when all sequences have the same length."""
+    batch_size = 12
+    seq_length = 256
+
+    batch_data = BatchedDataDict(
+        {
+            "input_ids": torch.ones(batch_size, seq_length, dtype=torch.long),
+            "sequence_lengths": torch.full((batch_size,), seq_length),
+            "problem_ids": torch.arange(batch_size),
+        }
+    )
+
+    sequence_packing_args = SequencePackingArgs(
+        max_tokens_per_microbatch=1024,
+        input_key="input_ids",
+        input_lengths_key="sequence_lengths",
+        algorithm="modified_first_fit_decreasing",
+        sequence_length_pad_multiple=1,
+    )
+
+    sharded_batches, sorted_indices = batch_data.shard_by_batch_size(
+        shards=2, sequence_packing_args=sequence_packing_args
+    )
+
+    # With uniform lengths, sequences should be efficiently packed
+    assert len(sharded_batches) == 2
+    len_0 = len(
+        list(sharded_batches[0].make_microbatch_iterator_for_packable_sequences())
+    )
+    len_1 = len(
+        list(sharded_batches[1].make_microbatch_iterator_for_packable_sequences())
+    )
+    assert len_0 + len_1 == 3
+    assert min(len_0, len_1) == 1
+
+    # Each microbatch should pack as many sequences as possible
+    for shard in sharded_batches:
+        for chunk_indices, chunk_lengths in zip(
+            shard.micro_batch_indices, shard.micro_batch_lengths
+        ):
+            for (start_idx, end_idx), packed_len in zip(chunk_indices, chunk_lengths):
+                # With 256 tokens per sequence and 1024 max, should pack 4 sequences
+                assert packed_len <= 1024
+                num_seqs = end_idx - start_idx
+                assert num_seqs <= 4  # Can fit at most 4 sequences of length 256
+
+    problem_ids_seen = set()
+    for s in sharded_batches:
+        for mb in s.make_microbatch_iterator_for_packable_sequences():
+            mb_len = mb["sequence_lengths"].sum().item()
+            assert mb_len <= sequence_packing_args["max_tokens_per_microbatch"]
+            for i in range(mb["input_ids"].shape[0]):
+                problem_id = mb["problem_ids"][i].item()
+                assert problem_id not in problem_ids_seen, (
+                    f"Problem ID {problem_id} seen twice"
+                )
+                problem_ids_seen.add(problem_id)
+    assert len(problem_ids_seen) == batch_size
+
+
+def test_sequence_packing_long_sequences():
+    """Test sequence packing with very long sequences that require individual microbatches."""
+    batch_size = 4
+
+    batch_data = BatchedDataDict(
+        {
+            "input_ids": torch.ones(batch_size, 2048, dtype=torch.long),
+            "sequence_lengths": torch.tensor([900, 850, 1000, 950]),
+            "problem_ids": torch.arange(batch_size),
+        }
+    )
+
+    sequence_packing_args = SequencePackingArgs(
+        max_tokens_per_microbatch=1024,
+        input_key="input_ids",
+        input_lengths_key="sequence_lengths",
+        algorithm="modified_first_fit_decreasing",
+        sequence_length_pad_multiple=1,
+    )
+
+    sharded_batches, sorted_indices = batch_data.shard_by_batch_size(
+        shards=2, sequence_packing_args=sequence_packing_args
+    )
+
+    # Each sequence should be in its own microbatch due to length
+    for shard in sharded_batches:
+        for chunk_indices, chunk_lengths in zip(
+            shard.micro_batch_indices, shard.micro_batch_lengths
+        ):
+            for (start_idx, end_idx), max_len in zip(chunk_indices, chunk_lengths):
+                num_seqs = end_idx - start_idx
+                # Each long sequence should be alone in its microbatch
+                assert num_seqs == 1
+
+    problem_ids_seen = set()
+    for s in sharded_batches:
+        for mb in s.make_microbatch_iterator_for_packable_sequences():
+            mb_len = mb["sequence_lengths"].sum().item()
+            assert mb_len <= sequence_packing_args["max_tokens_per_microbatch"]
+            for i in range(mb["input_ids"].shape[0]):
+                problem_id = mb["problem_ids"][i].item()
+                assert problem_id not in problem_ids_seen, (
+                    f"Problem ID {problem_id} seen twice"
+                )
+                problem_ids_seen.add(problem_id)
+    assert len(problem_ids_seen) == batch_size
+
+
+def test_sequence_packing_with_dynamic_batching_conflict():
+    """Test that sequence packing and dynamic batching cannot be used together."""
+    batch_data = BatchedDataDict(
+        {
+            "input_ids": torch.ones(4, 100, dtype=torch.long),
+            "sequence_lengths": torch.tensor([50, 60, 70, 80]),
+        }
+    )
+
+    sequence_packing_args = SequencePackingArgs(
+        max_tokens_per_microbatch=1024,
+        input_key="input_ids",
+        input_lengths_key="sequence_lengths",
+        algorithm="modified_first_fit_decreasing",
+    )
+
+    dynamic_batching_args: DynamicBatchingArgs = {
+        "input_key": "input_ids",
+        "input_lengths_key": "sequence_lengths",
+        "sequence_length_round": 4,
+        "max_tokens_per_microbatch": 1024,
+    }
+
+    with pytest.raises(
+        AssertionError,
+        match="dynamic_batching_args and sequence_packing_args cannot be passed together",
+    ):
+        batch_data.shard_by_batch_size(
+            shards=2,
+            sequence_packing_args=sequence_packing_args,
+            dynamic_batching_args=dynamic_batching_args,
+        )
+
+
+@pytest.mark.parametrize("pad_to_multiple_of", [1, 32, 64, 256])
+def test_sequence_packing_microbatch_boundaries(pad_to_multiple_of):
+    """Test that microbatch boundaries are correctly maintained across chunks with random sequences."""
+    # Create a large batch with random sequence lengths to test boundary handling
+    torch.manual_seed(123)  # For reproducible tests
+    batch_size = 1024
+    num_global_batches = 4
+    max_seq_length = 1024
+    max_tokens_per_microbatch = 1200
+
+    def _get_padded_seqlen(seqlen: int) -> int:
+        return (seqlen + (pad_to_multiple_of - 1)) // pad_to_multiple_of
+
+    # Generate random sequence lengths with good variety
+    sequence_lengths = torch.randint(50, 800, (batch_size,))
+
+    # Create input tensors with padding
+    input_ids = []
+    for i, seq_len in enumerate(sequence_lengths):
+        # Create a sequence with actual tokens up to seq_len, then padding
+        seq = torch.cat(
+            [
+                torch.randint(1, 1000, (seq_len,)),  # Actual tokens
+                torch.zeros(max_seq_length - seq_len, dtype=torch.long),  # Padding
+            ]
+        )
+        input_ids.append(seq)
+
+    input_ids = torch.stack(input_ids)
+
+    batch_data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "sequence_lengths": sequence_lengths,
+            "problem_ids": torch.arange(batch_size),
+        }
+    )
+
+    sequence_packing_args = SequencePackingArgs(
+        max_tokens_per_microbatch=max_tokens_per_microbatch,
+        input_key="input_ids",
+        input_lengths_key="sequence_lengths",
+        algorithm="modified_first_fit_decreasing",
+        sequence_length_pad_multiple=pad_to_multiple_of,
+    )
+
+    # Test with multiple shards and explicit batch_size to create chunks
+    shards = 4
+    chunk_batch_size = batch_size // num_global_batches
+    sharded_batches, sorted_indices = batch_data.shard_by_batch_size(
+        shards=shards,
+        batch_size=chunk_batch_size,
+        sequence_packing_args=sequence_packing_args,
+    )
+
+    # Verify output structure
+    assert len(sharded_batches) == shards
+    assert len(sorted_indices) == batch_size
+
+    # Track all problem IDs to ensure completeness and no duplicates
+    problem_ids_seen = set()
+
+    for gb_idx in range(num_global_batches):
+        mb_count_for_gb = 0
+        min_mb_count = 100000000  # arbitrary large number
+        max_mb_count = 0
+        legal_problem_ids = set(
+            range(gb_idx * chunk_batch_size, (gb_idx + 1) * chunk_batch_size)
+        )
+        for shard_idx in range(shards):
+            shard_batch = sharded_batches[shard_idx].get_batch(gb_idx)
+            mb_count = 0
+            for mb in shard_batch.make_microbatch_iterator_for_packable_sequences():
+                mb_count += 1
+                for i in range(mb["input_ids"].shape[0]):
+                    problem_id = mb["problem_ids"][i].item()
+                    assert problem_id in legal_problem_ids, (
+                        f"Problem ID {problem_id} not in legal problem IDs"
+                    )
+                    assert problem_id not in problem_ids_seen, (
+                        f"Problem ID {problem_id} seen twice"
+                    )
+                    problem_ids_seen.add(problem_id)
+                assert (
+                    _get_padded_seqlen(mb["sequence_lengths"]).sum().item()
+                    <= max_tokens_per_microbatch
+                ), (
+                    f"Sequence length {_get_padded_seqlen(mb['sequence_lengths']).sum().item()} is greater than max tokens per microbatch {max_tokens_per_microbatch}"
+                )
+
+            min_mb_count = min(min_mb_count, mb_count)
+            max_mb_count = max(max_mb_count, mb_count)
+            mb_count_for_gb += mb_count
+        assert max_mb_count - min_mb_count <= 1
+
+        num_actual_tokens = sum(
+            sequence_lengths[
+                gb_idx * chunk_batch_size : (gb_idx + 1) * chunk_batch_size
+            ]
+        )
+        packing_efficiency = num_actual_tokens / (
+            mb_count_for_gb * max_tokens_per_microbatch
+        )
+
+        pack_efficiency_standards = {
+            1: (0.97, 1.0),
+            32: (0.92, 0.97),
+            64: (0.85, 0.92),
+            256: (0.60, 0.80),
+        }
+        assert packing_efficiency >= pack_efficiency_standards[pad_to_multiple_of][0], (
+            f"We expect packing efficiency to be above {pack_efficiency_standards[pad_to_multiple_of][0]} for these nice random inputs with padding to multiples of {pad_to_multiple_of}. Got {packing_efficiency}"
+        )
+        assert packing_efficiency <= pack_efficiency_standards[pad_to_multiple_of][1], (
+            f"We expect packing efficiency to be below {pack_efficiency_standards[pad_to_multiple_of][1]} for these nice random inputs with padding to multiples of {pad_to_multiple_of}. Got {packing_efficiency}"
+        )
+
+    assert len(problem_ids_seen) == batch_size
+
+    # Finally, test that we can reorder everything back to how it was before
+    reconstructed = BatchedDataDict.from_batches(sharded_batches)
+    # check that it's different from the original
+    assert not torch.all(reconstructed["problem_ids"] == batch_data["problem_ids"])
+    assert not torch.all(reconstructed["input_ids"] == batch_data["input_ids"])
+    assert not torch.all(
+        reconstructed["sequence_lengths"] == batch_data["sequence_lengths"]
+    )
+
+    reconstructed.reorder_data(sorted_indices)
+    # check that it's the same as the original
+    assert torch.all(reconstructed["problem_ids"] == batch_data["problem_ids"])
+    assert torch.all(reconstructed["input_ids"] == batch_data["input_ids"])
+    assert torch.all(
+        reconstructed["sequence_lengths"] == batch_data["sequence_lengths"]
+    )
diff --git a/tests/unit/distributed/test_model_utils.py b/tests/unit/distributed/test_model_utils.py
new file mode 100644
index 0000000000..cee92c49b0
--- /dev/null
+++ b/tests/unit/distributed/test_model_utils.py
@@ -0,0 +1,424 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import pytest
+import ray
+import torch
+
+from nemo_rl.distributed.model_utils import (
+    _get_tokens_on_this_cp_rank,
+    allgather_cp_sharded_tensor,
+    from_parallel_logits_to_logprobs,
+    from_parallel_logits_to_logprobs_packed_sequences,
+)
+from nemo_rl.distributed.named_sharding import NamedSharding
+from nemo_rl.distributed.ray_actor_environment_registry import (
+    ACTOR_ENVIRONMENT_REGISTRY,
+    PY_EXECUTABLES,
+)
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.distributed.worker_groups import RayWorkerBuilder, RayWorkerGroup
+
+
+@ray.remote(num_gpus=1)
+class ModelUtilsTestActor:
+    def __init__(self, tp_size, cp_size, sharding):
+        self.tp_size = tp_size
+        self.cp_size = cp_size
+        self.sharding = sharding
+        self.env_vars = dict(os.environ)
+
+    def test_packed_sequences_equivalence(self):
+        """Test that packed and unpacked functions return the same results."""
+        # Initialize worker groups
+        torch.distributed.init_process_group(backend="nccl")
+
+        tp_rank = int(os.environ["RANK"]) % self.tp_size
+        cp_rank = int(os.environ["RANK"]) // self.tp_size
+        tp_ranks = self.sharding.get_ranks(tp=tp_rank)
+        if type(tp_ranks) != int:
+            tp_ranks = tp_ranks.layout.tolist()
+        else:
+            tp_ranks = [tp_ranks]
+        cp_ranks = self.sharding.get_ranks(cp=cp_rank)
+        if type(cp_ranks) != int:
+            cp_ranks = cp_ranks.layout.tolist()
+        else:
+            cp_ranks = [cp_ranks]
+
+        tp_group = torch.distributed.new_group(ranks=cp_ranks)
+        cp_group = torch.distributed.new_group(ranks=tp_ranks)  # this is correct
+
+        # Test parameters
+        batch_size = 4
+        seq_len = 32
+        vocab_size = 1024
+
+        if self.cp_size > 1 and seq_len % (2 * self.cp_size) != 0:
+            seq_len = (seq_len // (2 * self.cp_size) + 1) * (2 * self.cp_size)
+
+        vocab_part_size = vocab_size // self.tp_size
+        vocab_start_index = tp_rank * vocab_part_size
+        vocab_end_index = (tp_rank + 1) * vocab_part_size
+
+        unpacked_seq_len = seq_len
+
+        # Create random data
+        torch.manual_seed(42)  # For reproducibility
+        unpacked_logits = torch.randn(
+            batch_size, unpacked_seq_len, vocab_part_size, device="cuda"
+        )
+        unpacked_target_ids = (
+            torch.arange(batch_size * seq_len).reshape(batch_size, seq_len).to("cuda")
+        )
+
+        # 1. Get expected logprobs from non-packed function
+        expected_logprobs = from_parallel_logits_to_logprobs(
+            unpacked_logits,
+            unpacked_target_ids,
+            vocab_start_index,
+            vocab_end_index,
+            tp_group,
+            cp_group=None,
+        )
+
+        # 1.5 get with_cp logprobs
+        with_cp_logprobs = from_parallel_logits_to_logprobs(
+            _get_tokens_on_this_cp_rank(
+                unpacked_logits, cp_rank, self.cp_size, seq_dim=1
+            ),
+            unpacked_target_ids,
+            vocab_start_index,
+            vocab_end_index,
+            tp_group,
+            cp_group=cp_group,
+        )
+
+        torch.testing.assert_close(
+            with_cp_logprobs, expected_logprobs, rtol=1e-5, atol=1e-5
+        )
+
+        # 2. Prepare inputs for packed function
+        # For simplicity, all sequences have the same length
+        seq_lengths = torch.full((batch_size,), seq_len, dtype=torch.int32)
+        cu_seqlens = torch.nn.functional.pad(
+            torch.cumsum(seq_lengths, dim=0, dtype=torch.int32), (1, 0)
+        ).to("cuda")
+
+        # Pack the logits and target_ids
+        packed_logits = _get_tokens_on_this_cp_rank(
+            unpacked_logits, cp_rank, self.cp_size, seq_dim=1
+        ).reshape(1, -1, vocab_part_size)
+        packed_target_ids = unpacked_target_ids.reshape(1, -1)
+
+        # 3. Get actual logprobs from packed function
+        actual_logprobs = from_parallel_logits_to_logprobs_packed_sequences(
+            packed_logits,
+            packed_target_ids,
+            cu_seqlens,
+            seq_len,  # unpacked_seqlen
+            vocab_start_index,
+            vocab_end_index,
+            tp_group,
+            cp_group=cp_group,
+        )
+
+        # 4. Compare results
+        torch.testing.assert_close(
+            actual_logprobs, expected_logprobs, rtol=1e-5, atol=1e-5
+        )
+        return {"success": True, "error": None}
+
+
+MODEL_UTILS_TEST_ACTOR_FQN = f"{ModelUtilsTestActor.__module__}.ModelUtilsTestActor"
+
+
+@pytest.fixture
+def register_model_utils_test_actor():
+    """Register the ModelUtilsTestActor for use in tests."""
+    original_registry_value = ACTOR_ENVIRONMENT_REGISTRY.get(MODEL_UTILS_TEST_ACTOR_FQN)
+    ACTOR_ENVIRONMENT_REGISTRY[MODEL_UTILS_TEST_ACTOR_FQN] = PY_EXECUTABLES.SYSTEM
+
+    yield MODEL_UTILS_TEST_ACTOR_FQN
+
+    # Clean up registry
+    if MODEL_UTILS_TEST_ACTOR_FQN in ACTOR_ENVIRONMENT_REGISTRY:
+        if original_registry_value is None:
+            del ACTOR_ENVIRONMENT_REGISTRY[MODEL_UTILS_TEST_ACTOR_FQN]
+        else:
+            ACTOR_ENVIRONMENT_REGISTRY[MODEL_UTILS_TEST_ACTOR_FQN] = (
+                original_registry_value
+            )
+
+
+@pytest.fixture
+def virtual_cluster_2_gpus():
+    """Create a virtual cluster with 2 GPU bundles."""
+    cluster = RayVirtualCluster(bundle_ct_per_node_list=[2], use_gpus=True)
+    yield cluster
+    cluster.shutdown()
+
+
+@pytest.fixture
+def virtual_cluster_4_gpus():
+    """Create a virtual cluster with 4 GPU bundles."""
+    cluster = RayVirtualCluster(bundle_ct_per_node_list=[4], use_gpus=True)
+    yield cluster
+    cluster.shutdown()
+
+
+import numpy as np
+
+
+@pytest.mark.parametrize(
+    "tp_cp_config",
+    [
+        (2, 1),  # TP=2, CP=1
+        (1, 2),  # TP=1, CP=2
+    ],
+)
+def test_from_parallel_logits_to_logprobs_packed_sequences(
+    register_model_utils_test_actor, tp_cp_config
+):
+    """Test packed sequences function against unpacked version."""
+    tp_size, cp_size = tp_cp_config
+    world_size = tp_size * cp_size
+
+    # Skip if not enough GPUs
+    if not torch.cuda.is_available() or torch.cuda.device_count() < world_size:
+        pytest.skip(
+            f"Not enough GPUs available. Need {world_size}, got {torch.cuda.device_count()}"
+        )
+
+    # Create appropriate virtual cluster
+    cluster = RayVirtualCluster(bundle_ct_per_node_list=[2], use_gpus=True)
+
+    try:
+        actor_fqn = register_model_utils_test_actor
+
+        sharding = NamedSharding(
+            layout=np.arange(world_size).reshape(tp_size, cp_size), names=["tp", "cp"]
+        )
+        builder = RayWorkerBuilder(actor_fqn, tp_size, cp_size, sharding)
+
+        worker_group = RayWorkerGroup(
+            cluster=cluster,
+            remote_worker_builder=builder,
+            workers_per_node=None,
+            sharding_annotations=sharding,
+        )
+
+        # Run the test on all workers
+        futures = worker_group.run_all_workers_single_data(
+            "test_packed_sequences_equivalence"
+        )
+        results = ray.get(futures)
+
+        # Check that all workers succeeded
+        for i, result in enumerate(results):
+            assert result["success"], f"Worker {i} failed: {result['error']}"
+
+        worker_group.shutdown(force=True)
+
+    finally:
+        cluster.shutdown()
+
+
+@ray.remote(num_gpus=1)
+class AllGatherCPTestActor:
+    def __init__(self, cp_size):
+        self.cp_size = cp_size
+        self.env_vars = dict(os.environ)
+
+    def test_allgather_cp_tensor(self):
+        """Test that allgather_cp_sharded_tensor correctly reconstructs tensors."""
+        # Initialize process group
+        torch.distributed.init_process_group(backend="nccl")
+
+        rank = int(os.environ["RANK"])
+        world_size = int(os.environ["WORLD_SIZE"])
+
+        # Create CP group - all ranks participate in CP
+        cp_group = torch.distributed.new_group(ranks=list(range(world_size)))
+
+        # Test parameters
+        batch_size = 2
+        original_seq_len = 8
+        hidden_size = 16
+
+        # Ensure sequence length is compatible with CP load balancing
+        if original_seq_len % (2 * self.cp_size) != 0:
+            original_seq_len = (original_seq_len // (2 * self.cp_size) + 1) * (
+                2 * self.cp_size
+            )
+
+        # Create original tensor (same on all ranks for testing)
+        torch.manual_seed(42)  # Same seed for reproducibility
+        original_tensor = (
+            torch.arange(
+                batch_size * original_seq_len * hidden_size, dtype=torch.float32
+            )
+            .reshape(batch_size, original_seq_len, hidden_size)
+            .to("cuda")
+        )
+        original_tensor.requires_grad = True
+
+        # Shard the tensor using CP logic
+        sharded_tensor = _get_tokens_on_this_cp_rank(
+            original_tensor, rank, self.cp_size, seq_dim=1
+        )
+
+        # Test 1: Gather sharded tensor and verify it matches original
+        gathered_tensor = allgather_cp_sharded_tensor(
+            sharded_tensor, cp_group, seq_dim=1
+        )
+
+        # Verify shapes match
+        if gathered_tensor.shape != original_tensor.shape:
+            return {
+                "success": False,
+                "error": f"Shape mismatch: expected {original_tensor.shape}, got {gathered_tensor.shape}",
+            }
+
+        # Verify content matches (should be identical)
+        torch.testing.assert_close(
+            gathered_tensor, original_tensor, rtol=1e-5, atol=1e-5
+        )
+
+        # test backward
+        def loss_fn(x):
+            return torch.sum(x**2)
+
+        loss = loss_fn(gathered_tensor)
+        loss.backward()
+        grad = original_tensor.grad / self.cp_size
+        grad_sharded = _get_tokens_on_this_cp_rank(grad, rank, self.cp_size, seq_dim=1)
+
+        torch.testing.assert_close(
+            grad_sharded,
+            _get_tokens_on_this_cp_rank(
+                2 * original_tensor, rank, self.cp_size, seq_dim=1
+            ),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+        torch.testing.assert_close(
+            _get_tokens_on_this_cp_rank(
+                grad, (rank + 1) % self.cp_size, self.cp_size, seq_dim=1
+            ),
+            torch.zeros_like(sharded_tensor),
+            rtol=1e-5,
+            atol=1e-5,
+        )
+
+        # Test 2: Test with different sequence dimension (seq_dim=0)
+        # Create a tensor with sequence dimension at dim=0
+        original_tensor_dim0 = torch.randn(
+            original_seq_len, batch_size, hidden_size, device="cuda"
+        )
+
+        sharded_tensor_dim0 = _get_tokens_on_this_cp_rank(
+            original_tensor_dim0, rank, self.cp_size, seq_dim=0
+        )
+
+        gathered_tensor_dim0 = allgather_cp_sharded_tensor(
+            sharded_tensor_dim0, cp_group, seq_dim=0
+        )
+
+        # Verify shapes and content match
+        if gathered_tensor_dim0.shape != original_tensor_dim0.shape:
+            return {
+                "success": False,
+                "error": f"Shape mismatch for seq_dim=0: expected {original_tensor_dim0.shape}, got {gathered_tensor_dim0.shape}",
+            }
+
+        torch.testing.assert_close(
+            gathered_tensor_dim0, original_tensor_dim0, rtol=1e-5, atol=1e-5
+        )
+
+        # Test 3: Test with different tensor shapes
+        # Test with 2D tensor
+        original_2d = torch.randn(original_seq_len, hidden_size, device="cuda")
+        sharded_2d = _get_tokens_on_this_cp_rank(
+            original_2d, rank, self.cp_size, seq_dim=0
+        )
+        gathered_2d = allgather_cp_sharded_tensor(sharded_2d, cp_group, seq_dim=0)
+
+        torch.testing.assert_close(gathered_2d, original_2d, rtol=1e-5, atol=1e-5)
+
+        return {"success": True, "error": None}
+
+
+ALLGATHER_CP_TEST_ACTOR_FQN = f"{AllGatherCPTestActor.__module__}.AllGatherCPTestActor"
+
+
+@pytest.fixture
+def register_allgather_cp_test_actor():
+    """Register the AllGatherCPTestActor for use in tests."""
+    original_registry_value = ACTOR_ENVIRONMENT_REGISTRY.get(
+        ALLGATHER_CP_TEST_ACTOR_FQN
+    )
+    ACTOR_ENVIRONMENT_REGISTRY[ALLGATHER_CP_TEST_ACTOR_FQN] = PY_EXECUTABLES.SYSTEM
+
+    yield ALLGATHER_CP_TEST_ACTOR_FQN
+
+    # Clean up registry
+    if ALLGATHER_CP_TEST_ACTOR_FQN in ACTOR_ENVIRONMENT_REGISTRY:
+        if original_registry_value is None:
+            del ACTOR_ENVIRONMENT_REGISTRY[ALLGATHER_CP_TEST_ACTOR_FQN]
+        else:
+            ACTOR_ENVIRONMENT_REGISTRY[ALLGATHER_CP_TEST_ACTOR_FQN] = (
+                original_registry_value
+            )
+
+
+@pytest.mark.parametrize("cp_size", [2])
+def test_allgather_cp_sharded_tensor(register_allgather_cp_test_actor, cp_size):
+    """Test allgather_cp_sharded_tensor function."""
+    # Skip if not enough GPUs
+    if not torch.cuda.is_available() or torch.cuda.device_count() < cp_size:
+        pytest.skip(
+            f"Not enough GPUs available. Need {cp_size}, got {torch.cuda.device_count()}"
+        )
+
+    # Create virtual cluster
+    cluster = RayVirtualCluster(bundle_ct_per_node_list=[cp_size], use_gpus=True)
+
+    try:
+        actor_fqn = register_allgather_cp_test_actor
+
+        # For CP, all ranks are in a single group
+        sharding = NamedSharding(layout=list(range(cp_size)), names=["cp"])
+        builder = RayWorkerBuilder(actor_fqn, cp_size)
+
+        worker_group = RayWorkerGroup(
+            cluster=cluster,
+            remote_worker_builder=builder,
+            workers_per_node=None,
+            sharding_annotations=sharding,
+        )
+
+        # Run the test on all workers
+        futures = worker_group.run_all_workers_single_data("test_allgather_cp_tensor")
+        results = ray.get(futures)
+
+        # Check that all workers succeeded
+        for i, result in enumerate(results):
+            assert result["success"], f"Worker {i} failed: {result['error']}"
+
+        worker_group.shutdown(force=True)
+
+    finally:
+        cluster.shutdown()
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 8d6cff05f8..8a38e5c61e 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -107,6 +107,9 @@
         "logprob_mb_tokens": 40,
         "sequence_length_round": 4,
     },
+    "sequence_packing": {
+        "enabled": False,
+    },
     "max_grad_norm": 1.0,
     "make_sequence_length_divisible_by": 1,
     "generation": deepcopy(basic_vllm_test_config),
@@ -139,6 +142,9 @@ def get_basic_megatron_test_config(
         "dynamic_batching": {
             "enabled": False,  # Start with simple batching
         },
+        "sequence_packing": {
+            "enabled": False,
+        },
         "megatron_cfg": {
             "enabled": True,
             "empty_unused_memory_level": 0,
diff --git a/tests/unit/models/megatron/__init__.py b/tests/unit/models/megatron/__init__.py
new file mode 100644
index 0000000000..341a77c5bc
--- /dev/null
+++ b/tests/unit/models/megatron/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit/models/megatron/test_common.py b/tests/unit/models/megatron/test_common.py
new file mode 100644
index 0000000000..cc1214566a
--- /dev/null
+++ b/tests/unit/models/megatron/test_common.py
@@ -0,0 +1,707 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+
+import pytest
+import ray
+import torch
+
+from nemo_rl.distributed.named_sharding import NamedSharding
+from nemo_rl.distributed.ray_actor_environment_registry import (
+    ACTOR_ENVIRONMENT_REGISTRY,
+    PY_EXECUTABLES,
+)
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.distributed.worker_groups import RayWorkerBuilder, RayWorkerGroup
+
+
+@ray.remote(num_gpus=1)
+class PackSequencesTestActor:
+    def __init__(self, cp_size):
+        self.cp_size = cp_size
+        self.env_vars = dict(os.environ)
+
+    def run_all_pack_sequences_tests(self):
+        """Run all sequence packing tests in a single call to avoid expensive reinitializations."""
+        from nemo_rl.distributed.model_utils import _get_tokens_on_this_cp_rank
+        from nemo_rl.models.megatron.common import _pack_sequences_for_megatron
+
+        # Initialize process group if CP > 1
+        if self.cp_size > 1:
+            torch.distributed.init_process_group(backend="nccl")
+            rank = int(os.environ["RANK"])
+        else:
+            rank = 0
+
+        results = {}
+
+        # Test 1: Basic packing functionality
+        results["basic"] = self._test_basic_packing(_pack_sequences_for_megatron)
+        if not results["basic"]["success"]:
+            return results["basic"]
+
+        # Test 2: Variable sequence lengths
+        results["variable_lengths"] = self._test_variable_lengths(
+            _pack_sequences_for_megatron
+        )
+        if not results["variable_lengths"]["success"]:
+            return results["variable_lengths"]
+
+        # Test 3: Content preservation and consistency
+        results["consistency"] = self._test_consistency(_pack_sequences_for_megatron)
+        if not results["consistency"]["success"]:
+            return results["consistency"]
+
+        # Test 4: Edge cases
+        results["edge_cases"] = self._test_edge_cases(_pack_sequences_for_megatron)
+        if not results["edge_cases"]["success"]:
+            return results["edge_cases"]
+
+        # Test 5: Context parallelism (only if CP > 1)
+        if self.cp_size > 1:
+            results["context_parallel"] = self._test_context_parallel(
+                _pack_sequences_for_megatron, _get_tokens_on_this_cp_rank, rank
+            )
+            if not results["context_parallel"]["success"]:
+                return results["context_parallel"]
+        else:
+            results["context_parallel"] = {
+                "success": True,
+                "error": None,
+                "skipped": "CP=1",
+            }
+
+        return {"success": True, "error": None, "detailed_results": results}
+
+    def _test_basic_packing(self, _pack_sequences_for_megatron):
+        """Test basic sequence packing without context parallelism."""
+        try:
+            # Test parameters
+            batch_size = 3
+            max_seq_len = 10
+            vocab_size = 100
+
+            # Create test data with variable sequence lengths
+            input_ids = torch.randint(
+                0, vocab_size, (batch_size, max_seq_len), device="cuda"
+            )
+            seq_lengths = torch.tensor([8, 5, 7], device="cuda")
+
+            # Test 1: Basic packing without CP
+            packed_input_ids, _, packed_seq_params, cu_seqlens, cu_seqlens_padded = (
+                _pack_sequences_for_megatron(
+                    input_ids, seq_lengths, cp_rank=0, cp_size=1
+                )
+            )
+
+            # Verify shapes
+            expected_total_tokens = seq_lengths.sum().item()
+            if packed_input_ids.shape != (1, expected_total_tokens):
+                return {
+                    "success": False,
+                    "error": f"Basic packing shape mismatch: expected (1, {expected_total_tokens}), got {packed_input_ids.shape}",
+                }
+
+            # Verify cu_seqlens
+            expected_cu_seqlens = torch.tensor(
+                [0, 8, 13, 20], device="cuda", dtype=torch.int32
+            )
+            if not torch.equal(cu_seqlens, expected_cu_seqlens):
+                return {
+                    "success": False,
+                    "error": f"cu_seqlens mismatch: expected {expected_cu_seqlens}, got {cu_seqlens}",
+                }
+
+            # Verify PackedSeqParams
+            if packed_seq_params.qkv_format != "thd":
+                return {
+                    "success": False,
+                    "error": f"Wrong qkv_format: expected 'thd', got {packed_seq_params.qkv_format}",
+                }
+
+            if packed_seq_params.max_seqlen_q != 8:
+                return {
+                    "success": False,
+                    "error": f"Wrong max_seqlen_q: expected 8, got {packed_seq_params.max_seqlen_q}",
+                }
+
+            # Test 2: Packing with individual sequence padding
+            (
+                packed_input_ids_pad,
+                _,
+                packed_seq_params_pad,
+                cu_seqlens_pad,
+                cu_seqlens_padded_pad,
+            ) = _pack_sequences_for_megatron(
+                input_ids,
+                seq_lengths,
+                pad_individual_seqs_to_multiple_of=4,
+                cp_rank=0,
+                cp_size=1,
+            )
+
+            # With padding to multiple of 4: [8, 5, 7] -> [8, 8, 8] = 24 tokens
+            expected_total_tokens_pad = 24
+            if packed_input_ids_pad.shape != (1, expected_total_tokens_pad):
+                return {
+                    "success": False,
+                    "error": f"Padded packing shape mismatch: expected (1, {expected_total_tokens_pad}), got {packed_input_ids_pad.shape}",
+                }
+
+            # Verify padded cu_seqlens
+            expected_cu_seqlens_padded = torch.tensor(
+                [0, 8, 16, 24], device="cuda", dtype=torch.int32
+            )
+            if not torch.equal(cu_seqlens_padded_pad, expected_cu_seqlens_padded):
+                return {
+                    "success": False,
+                    "error": f"Padded cu_seqlens mismatch: expected {expected_cu_seqlens_padded}, got {cu_seqlens_padded_pad}",
+                }
+
+            return {"success": True, "error": None}
+
+        except Exception as e:
+            return {"success": False, "error": f"Basic packing test failed: {str(e)}"}
+
+    def _test_variable_lengths(self, _pack_sequences_for_megatron):
+        """Test sequence packing with variable sequence lengths."""
+        try:
+            # Test parameters
+            batch_size = 4
+            max_seq_len = 12
+            vocab_size = 50
+
+            # Create test data with highly variable sequence lengths
+            input_ids = torch.randint(
+                0, vocab_size, (batch_size, max_seq_len), device="cuda"
+            )
+            seq_lengths = torch.tensor([12, 3, 8, 1], device="cuda")
+
+            # Test 1: Variable lengths without padding
+            packed_input_ids, _, packed_seq_params, cu_seqlens, cu_seqlens_padded = (
+                _pack_sequences_for_megatron(
+                    input_ids, seq_lengths, cp_rank=0, cp_size=1
+                )
+            )
+
+            # Verify total tokens
+            expected_total_tokens = seq_lengths.sum().item()  # 12 + 3 + 8 + 1 = 24
+            if packed_input_ids.shape != (1, expected_total_tokens):
+                return {
+                    "success": False,
+                    "error": f"Variable lengths shape mismatch: expected (1, {expected_total_tokens}), got {packed_input_ids.shape}",
+                }
+
+            # Verify cu_seqlens
+            expected_cu_seqlens = torch.tensor(
+                [0, 12, 15, 23, 24], device="cuda", dtype=torch.int32
+            )
+            if not torch.equal(cu_seqlens, expected_cu_seqlens):
+                return {
+                    "success": False,
+                    "error": f"Variable lengths cu_seqlens mismatch: expected {expected_cu_seqlens}, got {cu_seqlens}",
+                }
+
+            # Test 2: Variable lengths with padding
+            (
+                packed_input_ids_pad,
+                _,
+                packed_seq_params_pad,
+                cu_seqlens_pad,
+                cu_seqlens_padded_pad,
+            ) = _pack_sequences_for_megatron(
+                input_ids,
+                seq_lengths,
+                pad_individual_seqs_to_multiple_of=4,
+                cp_rank=0,
+                cp_size=1,
+            )
+
+            # With padding to multiple of 4: [12, 3, 8, 1] -> [12, 4, 8, 4] = 28 tokens
+            expected_total_tokens_pad = 28
+            if packed_input_ids_pad.shape != (1, expected_total_tokens_pad):
+                return {
+                    "success": False,
+                    "error": f"Variable lengths padded shape mismatch: expected (1, {expected_total_tokens_pad}), got {packed_input_ids_pad.shape}",
+                }
+
+            # Verify padded cu_seqlens
+            expected_cu_seqlens_padded = torch.tensor(
+                [0, 12, 16, 24, 28], device="cuda", dtype=torch.int32
+            )
+            if not torch.equal(cu_seqlens_padded_pad, expected_cu_seqlens_padded):
+                return {
+                    "success": False,
+                    "error": f"Variable lengths padded cu_seqlens mismatch: expected {expected_cu_seqlens_padded}, got {cu_seqlens_padded_pad}",
+                }
+
+            # Verify max_seqlen
+            if packed_seq_params.max_seqlen_q != 12:
+                return {
+                    "success": False,
+                    "error": f"Variable lengths wrong max_seqlen_q: expected 12, got {packed_seq_params.max_seqlen_q}",
+                }
+
+            if packed_seq_params_pad.max_seqlen_q != 12:
+                return {
+                    "success": False,
+                    "error": f"Variable lengths padded wrong max_seqlen_q: expected 12, got {packed_seq_params_pad.max_seqlen_q}",
+                }
+
+            return {"success": True, "error": None}
+
+        except Exception as e:
+            return {
+                "success": False,
+                "error": f"Variable lengths test failed: {str(e)}",
+            }
+
+    def _test_consistency(self, _pack_sequences_for_megatron):
+        """Test that packing produces consistent results and that content is preserved."""
+        try:
+            # Test parameters
+            batch_size = 2
+            seq_len = 8
+            vocab_size = 20
+
+            # Create deterministic test data
+            torch.manual_seed(123)
+            input_ids = torch.randint(
+                0, vocab_size, (batch_size, seq_len), device="cuda"
+            )
+            seq_lengths = torch.tensor([6, 4], device="cuda")
+
+            # Test consistency between multiple calls
+            (
+                packed_input_ids_1,
+                _,
+                packed_seq_params_1,
+                cu_seqlens_1,
+                cu_seqlens_padded_1,
+            ) = _pack_sequences_for_megatron(
+                input_ids, seq_lengths, cp_rank=0, cp_size=1
+            )
+
+            (
+                packed_input_ids_2,
+                _,
+                packed_seq_params_2,
+                cu_seqlens_2,
+                cu_seqlens_padded_2,
+            ) = _pack_sequences_for_megatron(
+                input_ids, seq_lengths, cp_rank=0, cp_size=1
+            )
+
+            # Verify consistency
+            if not torch.equal(packed_input_ids_1, packed_input_ids_2):
+                return {
+                    "success": False,
+                    "error": "Inconsistent packed_input_ids between calls",
+                }
+
+            if not torch.equal(cu_seqlens_1, cu_seqlens_2):
+                return {
+                    "success": False,
+                    "error": "Inconsistent cu_seqlens between calls",
+                }
+
+            # Verify content preservation
+            # Extract the first sequence (length 6) and compare with original
+            first_seq_packed = packed_input_ids_1[0, :6]
+            first_seq_original = input_ids[0, :6]
+
+            if not torch.equal(first_seq_packed, first_seq_original):
+                return {
+                    "success": False,
+                    "error": "Content not preserved in first sequence",
+                }
+
+            # Extract the second sequence (length 4) and compare with original
+            second_seq_packed = packed_input_ids_1[0, 6:10]
+            second_seq_original = input_ids[1, :4]
+
+            if not torch.equal(second_seq_packed, second_seq_original):
+                return {
+                    "success": False,
+                    "error": "Content not preserved in second sequence",
+                }
+
+            return {"success": True, "error": None}
+
+        except Exception as e:
+            return {"success": False, "error": f"Consistency test failed: {str(e)}"}
+
+    def _test_edge_cases(self, _pack_sequences_for_megatron):
+        """Test edge cases and error conditions."""
+        try:
+            # Test 1: Single sequence
+            batch_size = 1
+            seq_len = 10
+            vocab_size = 50
+
+            input_ids = torch.randint(
+                0, vocab_size, (batch_size, seq_len), device="cuda"
+            )
+            seq_lengths = torch.tensor([seq_len], device="cuda")
+
+            packed_input_ids, _, packed_seq_params, cu_seqlens, cu_seqlens_padded = (
+                _pack_sequences_for_megatron(
+                    input_ids, seq_lengths, cp_rank=0, cp_size=1
+                )
+            )
+
+            # Verify single sequence packing
+            if packed_input_ids.shape != (1, seq_len):
+                return {
+                    "success": False,
+                    "error": f"Single sequence shape mismatch: expected (1, {seq_len}), got {packed_input_ids.shape}",
+                }
+
+            expected_cu_seqlens = torch.tensor(
+                [0, seq_len], device="cuda", dtype=torch.int32
+            )
+            if not torch.equal(cu_seqlens, expected_cu_seqlens):
+                return {
+                    "success": False,
+                    "error": f"Single sequence cu_seqlens mismatch: expected {expected_cu_seqlens}, got {cu_seqlens}",
+                }
+
+            # Test 2: Empty sequences (length 0)
+            batch_size = 3
+            max_seq_len = 5
+            input_ids = torch.randint(
+                0, vocab_size, (batch_size, max_seq_len), device="cuda"
+            )
+            seq_lengths = torch.tensor([3, 0, 2], device="cuda")
+
+            packed_input_ids, _, packed_seq_params, cu_seqlens, cu_seqlens_padded = (
+                _pack_sequences_for_megatron(
+                    input_ids, seq_lengths, cp_rank=0, cp_size=1
+                )
+            )
+
+            # Should handle empty sequences gracefully
+            expected_total_tokens = 5  # 3 + 0 + 2
+            if packed_input_ids.shape != (1, expected_total_tokens):
+                return {
+                    "success": False,
+                    "error": f"Empty sequence shape mismatch: expected (1, {expected_total_tokens}), got {packed_input_ids.shape}",
+                }
+
+            expected_cu_seqlens = torch.tensor(
+                [0, 3, 3, 5], device="cuda", dtype=torch.int32
+            )
+            if not torch.equal(cu_seqlens, expected_cu_seqlens):
+                return {
+                    "success": False,
+                    "error": f"Empty sequence cu_seqlens mismatch: expected {expected_cu_seqlens}, got {cu_seqlens}",
+                }
+
+            # Test 3: Large padding values
+            batch_size = 2
+            seq_len = 4
+            input_ids = torch.randint(
+                0, vocab_size, (batch_size, seq_len), device="cuda"
+            )
+            seq_lengths = torch.tensor([3, 2], device="cuda")
+
+            packed_input_ids, _, packed_seq_params, cu_seqlens, cu_seqlens_padded = (
+                _pack_sequences_for_megatron(
+                    input_ids,
+                    seq_lengths,
+                    pad_individual_seqs_to_multiple_of=8,
+                    cp_rank=0,
+                    cp_size=1,
+                )
+            )
+
+            # With padding to multiple of 8: [3, 2] -> [8, 8] = 16 tokens
+            expected_total_tokens = 16
+            if packed_input_ids.shape != (1, expected_total_tokens):
+                return {
+                    "success": False,
+                    "error": f"Large padding shape mismatch: expected (1, {expected_total_tokens}), got {packed_input_ids.shape}",
+                }
+
+            return {"success": True, "error": None}
+
+        except Exception as e:
+            return {"success": False, "error": f"Edge cases test failed: {str(e)}"}
+
+    def _test_context_parallel(
+        self, _pack_sequences_for_megatron, _get_tokens_on_this_cp_rank, rank
+    ):
+        """Test sequence packing with context parallelism."""
+        # Test parameters
+        batch_size = 2
+        seq_len = 16  # Ensure divisible by cp_size * 2
+        vocab_size = 100
+
+        # Ensure sequence length is compatible with CP
+        if seq_len % (2 * self.cp_size) != 0:
+            seq_len = (seq_len // (2 * self.cp_size) + 1) * (2 * self.cp_size)
+
+        # Create test data
+        torch.manual_seed(42)  # For reproducibility
+        input_ids = torch.arange(seq_len * batch_size, device="cuda").reshape(
+            batch_size, seq_len
+        )
+        seq_lengths = torch.tensor([seq_len, seq_len], device="cuda")
+
+        # Test 1: CP packing with individual sequence padding
+        (
+            packed_input_ids,
+            packed_input_ids_cp_sharded,
+            packed_seq_params,
+            cu_seqlens,
+            cu_seqlens_padded,
+        ) = _pack_sequences_for_megatron(
+            input_ids,
+            seq_lengths,
+            pad_individual_seqs_to_multiple_of=self.cp_size * 2,
+            cp_rank=rank,
+            cp_size=self.cp_size,
+        )
+
+        # Verify the packed tensor shape
+        expected_tokens_per_rank = seq_len // self.cp_size
+        expected_total_tokens = batch_size * expected_tokens_per_rank
+        if packed_input_ids_cp_sharded.shape != (1, expected_total_tokens):
+            return {
+                "success": False,
+                "error": f"CP packing shape mismatch: expected (1, {expected_total_tokens}), got {packed_input_ids_cp_sharded.shape}",
+            }
+
+        # Verify cu_seqlens for original sequences
+        expected_cu_seqlens = torch.tensor(
+            [0, seq_len, seq_len * 2], device="cuda", dtype=torch.int32
+        )
+        if not torch.equal(cu_seqlens, expected_cu_seqlens):
+            return {
+                "success": False,
+                "error": f"CP cu_seqlens mismatch: expected {expected_cu_seqlens}, got {cu_seqlens}",
+            }
+
+        # Verify PackedSeqParams
+        if packed_seq_params.qkv_format != "thd":
+            return {
+                "success": False,
+                "error": f"CP wrong qkv_format: expected 'thd', got {packed_seq_params.qkv_format}",
+            }
+
+        # Test 2: CP packing with full sequence padding
+        pad_full_seq_to = (batch_size * seq_len) + 8  # Add some padding
+        (
+            packed_input_ids_full,
+            packed_input_ids_cp_sharded,
+            packed_seq_params_full,
+            cu_seqlens_full,
+            cu_seqlens_padded_full,
+        ) = _pack_sequences_for_megatron(
+            input_ids,
+            seq_lengths,
+            pad_individual_seqs_to_multiple_of=self.cp_size * 2,
+            pad_packed_seq_to=pad_full_seq_to,
+            cp_rank=rank,
+            cp_size=self.cp_size,
+        )
+
+        # Verify the packed tensor shape with full padding
+        expected_tokens_per_rank_full = pad_full_seq_to // self.cp_size
+        if packed_input_ids_cp_sharded.shape != (1, expected_tokens_per_rank_full):
+            return {
+                "success": False,
+                "error": f"CP full padding shape mismatch: expected (1, {expected_tokens_per_rank_full}), got {packed_input_ids_cp_sharded.shape}",
+            }
+
+        # Verify cu_seqlens_padded for full padding
+        expected_cu_seqlens_padded_full = torch.tensor(
+            [0, seq_len, pad_full_seq_to], device="cuda", dtype=torch.int32
+        )
+        if not torch.equal(cu_seqlens_padded_full, expected_cu_seqlens_padded_full):
+            return {
+                "success": False,
+                "error": f"CP full padding cu_seqlens_padded mismatch: expected {expected_cu_seqlens_padded_full}, got {cu_seqlens_padded_full}",
+            }
+
+        correct_ids_0 = torch.tensor(
+            [0, 1, 2, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 0, 0, 0, 0, 0, 0],
+            device="cuda",
+        )
+        correct_ids_1 = torch.tensor(
+            [4, 5, 6, 7, 8, 9, 10, 11, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 0],
+            device="cuda",
+        )
+
+        if (
+            rank == 0
+            and torch.sum(torch.abs(packed_input_ids_cp_sharded - correct_ids_0)).item()
+            != 0
+        ):
+            return {
+                "success": False,
+                "error": f"CP full padding ids mismatch: expected {correct_ids_0}, got {packed_input_ids_cp_sharded[0, :20]}",
+            }
+        if (
+            rank == 1
+            and torch.sum(torch.abs(packed_input_ids_cp_sharded - correct_ids_1)).item()
+            != 0
+        ):
+            return {
+                "success": False,
+                "error": f"CP full padding ids mismatch: expected {correct_ids_1}, got {packed_input_ids_cp_sharded[0, 20:]}",
+            }
+
+        return {"success": True, "error": None}
+
+
+PACK_SEQUENCES_TEST_ACTOR_FQN = (
+    f"{PackSequencesTestActor.__module__}.PackSequencesTestActor"
+)
+
+
+@pytest.fixture
+def register_pack_sequences_test_actor():
+    """Register the PackSequencesTestActor for use in tests."""
+    original_registry_value = ACTOR_ENVIRONMENT_REGISTRY.get(
+        PACK_SEQUENCES_TEST_ACTOR_FQN
+    )
+    ACTOR_ENVIRONMENT_REGISTRY[PACK_SEQUENCES_TEST_ACTOR_FQN] = PY_EXECUTABLES.MCORE
+
+    yield PACK_SEQUENCES_TEST_ACTOR_FQN
+
+    # Clean up registry
+    if PACK_SEQUENCES_TEST_ACTOR_FQN in ACTOR_ENVIRONMENT_REGISTRY:
+        if original_registry_value is None:
+            del ACTOR_ENVIRONMENT_REGISTRY[PACK_SEQUENCES_TEST_ACTOR_FQN]
+        else:
+            ACTOR_ENVIRONMENT_REGISTRY[PACK_SEQUENCES_TEST_ACTOR_FQN] = (
+                original_registry_value
+            )
+
+
+@pytest.fixture
+def pack_sequences_setup(request):
+    """Setup and teardown for pack sequences tests - creates a virtual cluster and reusable actor."""
+    # Get parameters from request
+    if hasattr(request, "param") and request.param is not None:
+        cp_size = request.param
+    else:
+        cp_size = 1
+
+    cluster = None
+    worker_group = None
+
+    try:
+        # Skip if not enough GPUs
+        if not torch.cuda.is_available() or torch.cuda.device_count() < cp_size:
+            pytest.skip(
+                f"Not enough GPUs available. Need {cp_size}, got {torch.cuda.device_count()}"
+            )
+
+        cluster_name = f"test-pack-sequences-cp{cp_size}"
+        print(f"Creating virtual cluster '{cluster_name}' for {cp_size} GPUs...")
+
+        cluster = RayVirtualCluster(
+            name=cluster_name,
+            bundle_ct_per_node_list=[cp_size],
+            use_gpus=True,
+            max_colocated_worker_groups=1,
+        )
+
+        actor_fqn = PACK_SEQUENCES_TEST_ACTOR_FQN
+
+        # Register the actor
+        original_registry_value = ACTOR_ENVIRONMENT_REGISTRY.get(actor_fqn)
+        ACTOR_ENVIRONMENT_REGISTRY[actor_fqn] = PY_EXECUTABLES.MCORE
+
+        try:
+            # For CP tests
+            sharding = NamedSharding(layout=list(range(cp_size)), names=["cp"])
+            builder = RayWorkerBuilder(actor_fqn, cp_size)
+
+            worker_group = RayWorkerGroup(
+                cluster=cluster,
+                remote_worker_builder=builder,
+                workers_per_node=None,
+                sharding_annotations=sharding,
+            )
+
+            yield worker_group
+
+        finally:
+            # Clean up registry
+            if actor_fqn in ACTOR_ENVIRONMENT_REGISTRY:
+                if original_registry_value is None:
+                    del ACTOR_ENVIRONMENT_REGISTRY[actor_fqn]
+                else:
+                    ACTOR_ENVIRONMENT_REGISTRY[actor_fqn] = original_registry_value
+
+    finally:
+        print("Cleaning up pack sequences test resources...")
+        if worker_group:
+            worker_group.shutdown(force=True)
+        if cluster:
+            cluster.shutdown()
+
+
+@pytest.mark.parametrize("pack_sequences_setup", [1], indirect=True, ids=["cp1"])
+def test_pack_sequences_comprehensive(pack_sequences_setup):
+    """Comprehensive test of pack sequences functionality without context parallelism."""
+    worker_group = pack_sequences_setup
+
+    # Run all tests in a single call to the actor
+    futures = worker_group.run_all_workers_single_data("run_all_pack_sequences_tests")
+    results = ray.get(futures)
+
+    # Check that all workers succeeded
+    for i, result in enumerate(results):
+        assert result["success"], f"Worker {i} failed: {result['error']}"
+
+        # Print detailed results for debugging
+        if "detailed_results" in result:
+            detailed = result["detailed_results"]
+            print(f"Worker {i} detailed results:")
+            for test_name, test_result in detailed.items():
+                status = "PASSED" if test_result["success"] else "FAILED"
+                print(f"  {test_name}: {status}")
+                if not test_result["success"]:
+                    print(f"    Error: {test_result['error']}")
+
+
+@pytest.mark.parametrize("pack_sequences_setup", [2], indirect=True, ids=["cp2"])
+def test_pack_sequences_with_context_parallel(pack_sequences_setup):
+    """Test pack sequences functionality with context parallelism."""
+    worker_group = pack_sequences_setup
+
+    # Run all tests including CP tests
+    futures = worker_group.run_all_workers_single_data("run_all_pack_sequences_tests")
+    results = ray.get(futures)
+
+    # Check that all workers succeeded
+    for i, result in enumerate(results):
+        assert result["success"], f"Worker {i} failed: {result['error']}"
+
+        # Print detailed results for debugging
+        if "detailed_results" in result:
+            detailed = result["detailed_results"]
+            print(f"Worker {i} detailed results:")
+            for test_name, test_result in detailed.items():
+                if "skipped" in test_result:
+                    print(f"  {test_name}: SKIPPED ({test_result['skipped']})")
+                else:
+                    status = "PASSED" if test_result["success"] else "FAILED"
+                    print(f"  {test_name}: {status}")
+                    if not test_result["success"]:
+                        print(f"    Error: {test_result['error']}")
diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py
index fcd0977117..c176082698 100644
--- a/tests/unit/models/policy/test_dtensor_worker.py
+++ b/tests/unit/models/policy/test_dtensor_worker.py
@@ -84,6 +84,9 @@ def create_test_config(
             "logprob_mb_tokens": 128,
             "sequence_length_round": 4,
         },
+        "sequence_packing": {
+            "enabled": False,
+        },
         "optimizer": {
             "name": "torch.optim.AdamW",
             "kwargs": {
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index ea1c70f9b3..a399bca0d5 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -74,6 +74,9 @@ def create_megatron_test_config(
         "dynamic_batching": {
             "enabled": False,  # Start with simple batching
         },
+        "sequence_packing": {
+            "enabled": False,  # Start with simple batching
+        },
         "megatron_cfg": {
             "enabled": True,
             "empty_unused_memory_level": 0,
@@ -1318,3 +1321,413 @@ def test_megatron_sft_training():
     finally:
         policy.shutdown()
         cluster.shutdown()
+
+
+@pytest.mark.timeout(300)
+def test_megatron_context_parallel_logprob_agreement():
+    """Test that CP and non-CP models produce identical logprobs with sequence packing enabled."""
+    num_gpus = 2
+    batch_size = 4
+    seq_len = 64
+    vocab_size = 32000
+
+    # Create test data with varying sequence lengths to test sequence packing
+    torch.manual_seed(42)  # Fixed seed for reproducibility
+    input_ids = torch.arange(seq_len * batch_size, device="cuda").reshape(
+        batch_size, seq_len
+    )
+    # Create varied sequence lengths for more realistic sequence packing test
+    input_lengths = torch.tensor([31, 21, 29, 56], dtype=torch.int32)
+    attention_mask = torch.zeros(batch_size, seq_len)
+    for i, length in enumerate(input_lengths):
+        attention_mask[i, :length] = 1
+
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "attention_mask": attention_mask,
+        }
+    )
+
+    # Test 1: Non-CP model (context_parallel_size=1) with sequence packing
+    print(
+        "=== Testing Non-CP model (context_parallel_size=1) with sequence packing ==="
+    )
+    cluster_no_cp = RayVirtualCluster(
+        name="test-no-cp-packing",
+        bundle_ct_per_node_list=[num_gpus],
+        use_gpus=True,
+        num_gpus_per_node=num_gpus,
+        max_colocated_worker_groups=1,
+    )
+
+    config_no_cp = create_megatron_test_config(tp=1, pp=1, precision="bfloat16")
+    # Ensure context parallel is disabled
+    config_no_cp["megatron_cfg"]["context_parallel_size"] = 1
+
+    # Enable sequence packing
+    config_no_cp["sequence_packing"] = {
+        "enabled": True,
+        "train_mb_tokens": seq_len,
+        "logprob_mb_tokens": seq_len,
+        "algorithm": "modified_first_fit_decreasing",
+    }
+
+    tokenizer = get_tokenizer(config_no_cp["tokenizer"])
+    config_no_cp["generation"] = configure_generation_config(
+        config_no_cp["generation"], tokenizer
+    )
+
+    policy_no_cp = Policy(
+        cluster=cluster_no_cp,
+        config=config_no_cp,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+
+    # Get logprobs from non-CP model with sequence packing
+    policy_no_cp.prepare_for_lp_inference()
+    logprobs_no_cp = policy_no_cp.get_logprobs(data)["logprobs"]
+    logprobs_no_cp = logprobs_no_cp * attention_mask
+    print(f"Non-CP logprobs shape: {logprobs_no_cp.shape}")
+    print(f"Non-CP logprobs sample: {logprobs_no_cp[0, :5]}")
+
+    # Cleanup non-CP resources
+    policy_no_cp.shutdown()
+
+    config_no_cp_no_packing = config_no_cp.copy()
+    config_no_cp_no_packing["sequence_packing"] = {
+        "enabled": False,
+    }
+    policy_no_cp_no_packing = Policy(
+        cluster=cluster_no_cp,
+        config=config_no_cp_no_packing,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+    # Get logprobs from non-CP model with sequence packing
+    policy_no_cp_no_packing.prepare_for_lp_inference()
+    logprobs_no_cp_no_packing = policy_no_cp_no_packing.get_logprobs(data)["logprobs"]
+    logprobs_no_cp_no_packing = logprobs_no_cp_no_packing * attention_mask
+    print(f"Non-CP logprobs no packing shape: {logprobs_no_cp_no_packing.shape}")
+    print(f"Non-CP logprobs no packing sample: {logprobs_no_cp_no_packing[0, :5]}")
+
+    cluster_no_cp.shutdown()
+
+    # Verify logprobs match between CP and non-CP models with sequence packing
+    print("=== Comparing logprobs ===")
+
+    # Check shapes match
+    print(f"diff packing {logprobs_no_cp - logprobs_no_cp_no_packing}")
+    assert logprobs_no_cp.shape == logprobs_no_cp_no_packing.shape, (
+        f"Logprob shapes should match: {logprobs_no_cp.shape} vs {logprobs_no_cp_no_packing.shape}"
+    )
+    (
+        torch.testing.assert_close(
+            logprobs_no_cp, logprobs_no_cp_no_packing, rtol=1e-3, atol=1e-3
+        ),
+        (
+            "Logprobs should match between non-CP and non-CP models with sequence packing"
+        ),
+    )
+
+    # Test 2: CP model (context_parallel_size=2) with sequence packing
+    print("=== Testing CP model (context_parallel_size=2) with sequence packing ===")
+    cluster_cp = RayVirtualCluster(
+        name="test-cp-packing",
+        bundle_ct_per_node_list=[num_gpus],
+        use_gpus=True,
+        num_gpus_per_node=num_gpus,
+        max_colocated_worker_groups=1,
+    )
+
+    config_cp = create_megatron_test_config(tp=1, pp=1, precision="bfloat16")
+    # Enable context parallel
+    config_cp["megatron_cfg"]["context_parallel_size"] = 2
+
+    # Enable sequence packing
+    config_cp["sequence_packing"] = {
+        "enabled": True,
+        "train_mb_tokens": seq_len,
+        "logprob_mb_tokens": seq_len,
+        "algorithm": "modified_first_fit_decreasing",
+    }
+
+    config_cp["generation"] = configure_generation_config(
+        config_cp["generation"], tokenizer
+    )
+
+    policy_cp = Policy(
+        cluster=cluster_cp,
+        config=config_cp,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+
+    # Get logprobs from CP model with sequence packing
+    policy_cp.prepare_for_lp_inference()
+    logprobs_cp = policy_cp.get_logprobs(data)["logprobs"]
+    print(f"CP logprobs shape: {logprobs_cp.shape}")
+    print(f"CP logprobs sample: {logprobs_cp[0, :5]}")
+
+    # Cleanup CP resources
+    policy_cp.shutdown()
+    cluster_cp.shutdown()
+
+    # Verify logprobs match between CP and non-CP models with sequence packing
+    print("=== Comparing logprobs ===")
+
+    # Check shapes match
+    assert logprobs_no_cp.shape == logprobs_cp.shape, (
+        f"Logprob shapes should match: {logprobs_no_cp.shape} vs {logprobs_cp.shape}"
+    )
+
+    # Check that neither contains NaN or Inf
+    assert not torch.isnan(logprobs_no_cp).any(), (
+        "Non-CP logprobs should not contain NaN"
+    )
+    assert not torch.isinf(logprobs_no_cp).any(), (
+        "Non-CP logprobs should not contain Inf"
+    )
+    assert not torch.isnan(logprobs_cp).any(), "CP logprobs should not contain NaN"
+    assert not torch.isinf(logprobs_cp).any(), "CP logprobs should not contain Inf"
+
+    # Check that first token logprobs are zero (by convention)
+    assert torch.all(logprobs_no_cp[:, 0] == 0), (
+        "First token logprobs should be zero (non-CP)"
+    )
+    assert torch.all(logprobs_cp[:, 0] == 0), "First token logprobs should be zero (CP)"
+
+    # Compare logprobs with tight tolerance
+    logprobs_cp = logprobs_cp * attention_mask
+    print(f"diff {logprobs_no_cp_no_packing - logprobs_cp}")
+    max_diff = torch.max(torch.abs(logprobs_no_cp_no_packing - logprobs_cp)).item()
+    mean_diff = torch.mean(torch.abs(logprobs_no_cp_no_packing - logprobs_cp)).item()
+    print(f"Max difference: {max_diff}")
+    print(f"Mean difference: {mean_diff}")
+
+    # Assert logprobs are identical (or very close due to floating point)
+    torch.testing.assert_close(
+        logprobs_no_cp_no_packing,
+        logprobs_cp,
+        rtol=1e-3,
+        atol=1e-2,
+        msg="CP and non-CP models should produce identical logprobs with sequence packing",
+    )
+
+    print(
+        "✓ SUCCESS: CP and non-CP models produce identical logprobs with sequence packing"
+    )
+
+
+@pytest.mark.timeout(300)
+def test_megatron_context_parallel_training_agreement():
+    """Test that CP and non-CP models produce consistent training results with ClippedPG loss and sequence packing."""
+    num_gpus = 2
+    batch_size = 2
+    seq_len = 64
+    vocab_size = 32000
+
+    # Create test data with varying sequence lengths to test sequence packing
+    torch.manual_seed(42)  # Fixed seed for reproducibility
+    input_ids = torch.arange(seq_len * batch_size, device="cuda").reshape(
+        batch_size, seq_len
+    )
+
+    # Create varied sequence lengths for more realistic sequence packing test
+    input_lengths = torch.tensor([33, 48], dtype=torch.int32)
+    attention_mask = torch.zeros(batch_size, seq_len)
+    for i, length in enumerate(input_lengths):
+        attention_mask[i, :length] = 1
+
+    # Create additional data required for ClippedPG loss
+    token_mask = torch.zeros(batch_size, seq_len)
+    sample_mask = torch.ones(batch_size)
+    advantages = torch.randn(batch_size, seq_len)
+    prev_logprobs = torch.randn(batch_size, seq_len)
+    generation_logprobs = prev_logprobs.clone()
+    reference_policy_logprobs = prev_logprobs.clone()
+    labels = torch.randint(0, vocab_size, (batch_size, seq_len))
+
+    for i in range(batch_size):
+        token_mask[i, : input_lengths[i]] = 1
+
+    base_data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "attention_mask": attention_mask,
+            "token_mask": token_mask,
+            "sample_mask": sample_mask,
+            "advantages": advantages,
+            "prev_logprobs": prev_logprobs,
+            "generation_logprobs": generation_logprobs,
+            "reference_policy_logprobs": reference_policy_logprobs,
+            "labels": labels,
+        }
+    )
+
+    # Test 1: Non-CP model (context_parallel_size=1) with sequence packing
+    print(
+        "=== Testing Non-CP model (context_parallel_size=1) with sequence packing ==="
+    )
+    cluster_no_cp = RayVirtualCluster(
+        name="test-no-cp-training",
+        bundle_ct_per_node_list=[1],
+        use_gpus=True,
+        num_gpus_per_node=1,
+        max_colocated_worker_groups=1,
+    )
+
+    config_no_cp = create_megatron_test_config(tp=1, pp=1, precision="bfloat16")
+    # Ensure context parallel is disabled
+    config_no_cp["megatron_cfg"]["context_parallel_size"] = 1
+    config_no_cp["train_global_batch_size"] = 2
+
+    # Enable sequence packing
+    config_no_cp["sequence_packing"] = {
+        "enabled": True,
+        "train_mb_tokens": seq_len,
+        "logprob_mb_tokens": seq_len,
+        "algorithm": "modified_first_fit_decreasing",
+    }
+
+    tokenizer = get_tokenizer(config_no_cp["tokenizer"])
+    config_no_cp["generation"] = configure_generation_config(
+        config_no_cp["generation"], tokenizer
+    )
+
+    policy_no_cp = Policy(
+        cluster=cluster_no_cp,
+        config=config_no_cp,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+
+    # Create ClippedPG loss function
+    loss_fn = ClippedPGLossFn(
+        {
+            "ratio_clip_min": 0.2,
+            "ratio_clip_max": 0.2,
+            "ratio_clip_c": None,
+            "reference_policy_kl_penalty": 0.1,
+            "disable_ppo_ratio": False,
+            "use_on_policy_kl_approximation": False,
+            "use_importance_sampling_correction": False,
+            "token_level_loss": True,
+        }
+    )
+
+    # Train non-CP model
+    policy_no_cp.prepare_for_training()
+    no_cp_results = policy_no_cp.train(base_data, loss_fn)
+    no_cp_loss = no_cp_results["loss"]
+    no_cp_metrics = no_cp_results["all_mb_metrics"]
+
+    print(f"Non-CP training loss: {no_cp_loss}")
+    print(f"Non-CP metrics: {no_cp_metrics}")
+
+    # Cleanup non-CP resources
+    policy_no_cp.shutdown()
+    cluster_no_cp.shutdown()
+
+    # Test 2: CP model (context_parallel_size=2) with sequence packing
+    print("=== Testing CP model (context_parallel_size=2) with sequence packing ===")
+    cluster_cp = RayVirtualCluster(
+        name="test-cp-training",
+        bundle_ct_per_node_list=[num_gpus],
+        use_gpus=True,
+        num_gpus_per_node=num_gpus,
+        max_colocated_worker_groups=1,
+    )
+
+    config_cp = create_megatron_test_config(tp=1, pp=1, precision="bfloat16")
+    # Enable context parallel
+    config_cp["megatron_cfg"]["context_parallel_size"] = 2
+    config_cp["train_global_batch_size"] = 2
+
+    # Enable sequence packing
+    config_cp["sequence_packing"] = {
+        "enabled": True,
+        "train_mb_tokens": seq_len,
+        "logprob_mb_tokens": seq_len,
+        "algorithm": "modified_first_fit_decreasing",
+    }
+
+    config_cp["generation"] = configure_generation_config(
+        config_cp["generation"], tokenizer
+    )
+
+    policy_cp = Policy(
+        cluster=cluster_cp,
+        config=config_cp,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+
+    # Train CP model
+    policy_cp.prepare_for_training()
+    cp_results = policy_cp.train(base_data, loss_fn)
+    cp_loss = cp_results["loss"]
+    cp_metrics = cp_results["all_mb_metrics"]
+
+    print(f"CP training loss: {cp_loss}")
+    print(f"CP metrics: {cp_metrics}")
+
+    # Cleanup CP resources
+    policy_cp.shutdown()
+    cluster_cp.shutdown()
+
+    # Compare training results
+    print("=== Comparing training results ===")
+
+    # Check that neither contains NaN or Inf
+    assert not torch.isnan(no_cp_loss).any(), "Non-CP loss should not contain NaN"
+    assert not torch.isinf(no_cp_loss).any(), "Non-CP loss should not contain Inf"
+    assert not torch.isnan(cp_loss).any(), "CP loss should not contain NaN"
+    assert not torch.isinf(cp_loss).any(), "CP loss should not contain Inf"
+
+    # Check shapes match
+    assert no_cp_loss.shape == cp_loss.shape, (
+        f"Loss shapes should match: {no_cp_loss.shape} vs {cp_loss.shape}"
+    )
+
+    # Compare loss values with tolerance
+    loss_diff = torch.abs(no_cp_loss - cp_loss)
+    max_loss_diff = torch.max(loss_diff).item()
+    mean_loss_diff = torch.mean(loss_diff).item()
+
+    print(f"Loss difference - Max: {max_loss_diff:.6f}, Mean: {mean_loss_diff:.6f}")
+
+    # Check key metrics are similar
+    key_metrics = ["probs_ratio", "grad_norm", "kl_penalty", "approx_entropy"]
+    for metric in key_metrics:
+        if metric in no_cp_metrics and metric in cp_metrics:
+            no_cp_val = no_cp_metrics[metric]
+            cp_val = cp_metrics[metric]
+            if metric == "grad_norm":
+                diff = abs(sum(no_cp_val) - sum(cp_val) * 2)
+            else:
+                diff = abs(sum(no_cp_val) - sum(cp_val))
+            print(
+                f"Metric {metric}: Non-CP={sum(no_cp_val):.6f}, CP={sum(cp_val):.6f}, Diff={diff:.6f}"
+            )
+
+            # Allow some tolerance for floating point differences
+            assert diff < 0.01 * sum(no_cp_val) or diff < 1e-4, (
+                f"Metric {metric} differs too much: {diff:.6f}"
+            )
+
+    # Assert losses are very close (accounting for minor floating point differences)
+    torch.testing.assert_close(
+        no_cp_loss,
+        cp_loss,
+        rtol=1e-2,
+        atol=1e-2,
+        msg="CP and non-CP models should produce very similar training losses with sequence packing",
+    )
+
+    print(
+        "✓ SUCCESS: CP and non-CP models produce consistent training results with ClippedPG loss and sequence packing"
+    )
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 31f4b16321..11515ec661 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -30,6 +30,7 @@ def __call__(
         global_valid_toks: torch.Tensor | None,
         vocab_parallel_rank: Optional[int] = None,
         vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+        context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> tuple[torch.Tensor, dict[str, Any]]:
         # Just return mean of logprobs as the loss for testing
         loss = next_token_logits.mean()
@@ -53,6 +54,7 @@ def __call__(
         global_valid_toks: torch.Tensor | None,
         vocab_parallel_rank: Optional[int] = None,
         vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+        context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
     ) -> tuple[torch.Tensor, dict[str, Any]]:
         # logits shape: [batch_size, seq_len, vocab_size]
         # Get the next token logits for each position
diff --git a/tests/unit/utils/test_native_checkpoint.py b/tests/unit/utils/test_native_checkpoint.py
index feca16365d..eb7c7a19f0 100755
--- a/tests/unit/utils/test_native_checkpoint.py
+++ b/tests/unit/utils/test_native_checkpoint.py
@@ -65,6 +65,9 @@
     "dynamic_batching": {
         "enabled": False,
     },
+    "sequence_packing": {
+        "enabled": False,
+    },
     "max_grad_norm": 1.0,
     "generation": {
         "backend": "vllm",

From 71ed6e767dd5c4baccf78f29c34237213246f2d9 Mon Sep 17 00:00:00 2001
From: Rayen <130129397+RayenTian@users.noreply.github.com>
Date: Wed, 23 Jul 2025 00:31:18 +0800
Subject: [PATCH 55/59] fix: SyntaxWarning: invalid escape sequence '\s' (#705)

Signed-off-by: ruit <ruit@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/environments/math_environment.py |  2 +-
 nemo_rl/evals/answer_parsing.py          | 84 ++++++++++++------------
 2 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/nemo_rl/environments/math_environment.py b/nemo_rl/environments/math_environment.py
index 068eb9f35c..6d4553640b 100644
--- a/nemo_rl/environments/math_environment.py
+++ b/nemo_rl/environments/math_environment.py
@@ -152,7 +152,7 @@ def verify(
             ground_truth = answer_parsing.normalize_response(ground_truth)
             response = answer_parsing.normalize_response(response)
             extracted_answer = None
-            match = re.search("(?i)Answer\s*:[ \t]*([A-Z])", response)
+            match = re.search(r"(?i)Answer\s*:[ \t]*([A-Z])", response)
             if match:
                 extracted_answer = answer_parsing.normalize_extracted_answer(
                     match.group(1)
diff --git a/nemo_rl/evals/answer_parsing.py b/nemo_rl/evals/answer_parsing.py
index dcf020774a..2719decce6 100644
--- a/nemo_rl/evals/answer_parsing.py
+++ b/nemo_rl/evals/answer_parsing.py
@@ -19,48 +19,48 @@
 )
 # All the different ways "Answer" is written in different languages
 MULTILINGUAL_ANSWER_REGEXES = [
-    "Answer\s*:",
-    "Answer\s*:​​​​​​",  # Korean invisible character
-    "উত্তর\s*:",
-    "उत्तर\s*:",
-    "উত্তরঃ",
-    "উত্তর\s*:",
-    "Antwort\s*:",
-    "답변\s*:",
-    "정답\s*:",
-    "답\s*:",
-    "答案\s*：",
-    "答案\s*:",
-    "答\s*：",
-    "答\s*:",
-    "答复\s*：",
-    "答曰\s*：",
-    "الإجابة:",
-    "الجواب:",
-    "إجابة:",
-    "الإجابة النهائية:",
-    "الإجابة الصحيحة:",
-    "الإجابة الصحيحة هي:",
-    "الإجابة هي:",
-    "الجواب النهائي:",
-    "Respuesta\s*:",
-    "Risposta\s*:",
-    "答え\s*:",
-    "答え\s*：",
-    "回答\s*:",
-    "回答\s*：",
-    "解答\s*:",
-    "Jawaban\s*:",
-    "Réponse\s*:",
-    "Resposta\s*:",
-    "Jibu\s*:",
-    "Idahun\s*:",
-    "Ìdáhùn\s*:",
-    "Idáhùn\s*:",
-    "Àmọ̀nà\s*:",
-    "Àdáhùn\s*:",
-    "Ànúgọ\s*:",
-    "Àṣàyàn\s*:",
+    r"Answer\s*:",
+    r"Answer\s*:​​​​​​",  # Korean invisible character
+    r"উত্তর\s*:",
+    r"उत्तर\s*:",
+    r"উত্তরঃ",
+    r"উত্তর\s*:",
+    r"Antwort\s*:",
+    r"답변\s*:",
+    r"정답\s*:",
+    r"답\s*:",
+    r"答案\s*：",
+    r"答案\s*:",
+    r"答\s*：",
+    r"答\s*:",
+    r"答复\s*：",
+    r"答曰\s*：",
+    r"الإجابة:",
+    r"الجواب:",
+    r"إجابة:",
+    r"الإجابة النهائية:",
+    r"الإجابة الصحيحة:",
+    r"الإجابة الصحيحة هي:",
+    r"الإجابة هي:",
+    r"الجواب النهائي:",
+    r"Respuesta\s*:",
+    r"Risposta\s*:",
+    r"答え\s*:",
+    r"答え\s*：",
+    r"回答\s*:",
+    r"回答\s*：",
+    r"解答\s*:",
+    r"Jawaban\s*:",
+    r"Réponse\s*:",
+    r"Resposta\s*:",
+    r"Jibu\s*:",
+    r"Idahun\s*:",
+    r"Ìdáhùn\s*:",
+    r"Idáhùn\s*:",
+    r"Àmọ̀nà\s*:",
+    r"Àdáhùn\s*:",
+    r"Ànúgọ\s*:",
+    r"Àṣàyàn\s*:",
 ]
 
 
From 6c1898a8e3d2d49eb096c61253b2637572d08621 Mon Sep 17 00:00:00 2001
From: Jialei Chen <jialeic@google.com>
Date: Wed, 23 Jul 2025 03:51:56 +0000
Subject: [PATCH 56/59] cleanup and add docstring

Signed-off-by: Jialei Chen <jialeic@google.com>
---
 examples/configs/grpo_adk_llama8b.yaml        |  4 +-
 examples/run_grpo_unique_numbers_w_adk.py     | 36 ++++++-
 .../environments/simulated_user/adk_utils.py  | 97 +++++++++++++------
 3 files changed, 105 insertions(+), 32 deletions(-)

diff --git a/examples/configs/grpo_adk_llama8b.yaml b/examples/configs/grpo_adk_llama8b.yaml
index 6274b18c07..2c116fdfcf 100644
--- a/examples/configs/grpo_adk_llama8b.yaml
+++ b/examples/configs/grpo_adk_llama8b.yaml
@@ -35,8 +35,10 @@ logger:
 
 policy:
   train_global_batch_size: 512
+  dynamic_batching:
+    enabled: False
   tokenizer:
     chat_template: "{% for message in messages %}{% if loop.first %}<|begin_of_text|>{% endif %}<|start_header_id|>{{ message['role'] }}<|end_header_id|>\n{{ message['content'] }}<|eot_id|>{% endfor %}<|start_header_id|>assistant<|end_header_id|>\n"
    
 cluster:
-  gpus_per_node: 8
\ No newline at end of file
+  gpus_per_node: 8
diff --git a/examples/run_grpo_unique_numbers_w_adk.py b/examples/run_grpo_unique_numbers_w_adk.py
index 74d48736de..c2401d35d4 100644
--- a/examples/run_grpo_unique_numbers_w_adk.py
+++ b/examples/run_grpo_unique_numbers_w_adk.py
@@ -1,3 +1,37 @@
+"""Run GRPO with the Unique Numbers Simulator using ADK.
+
+This script sets up and executes the Group Relative Policy Optimization (GRPO) algorithm
+in a multi-turn conversational environment powered by the ADK framework.
+
+### Task Overview
+The objective is to train an agent to guess the number of unique integers in a list generated by a simulated user.
+The interaction is structured as a turn-based dialogue:
+- The user generates a list of integers.
+- The agent queries specific positions in the list (by index).
+- The user replies with the value at that index (if available).
+- The agent continues the interaction until it makes a final guess at the number of unique integers.
+
+### Environment Details
+The environment is a simulated user that:
+- Randomly generates a list of integers at setup.
+- Responds to the agent's queries using an LLM via the ADK endpoint.
+- Optionally evaluates the agent's final guess using an LLM-based grader (included for extensibility, though not essential for this task).
+
+### Example Usage
+    uv run python examples/run_grpo_unique_numbers_w_adk.py
+
+### Requirements
+- A working ADK environment with access to a compatible LLM endpoint.
+  For the default Gemini endpoint, the following environment variables must be set:
+    - `GOOGLE_GENAI_USE_VERTEXAI=1`
+    - `GOOGLE_CLOUD_PROJECT="your-project-id"`
+    - `GOOGLE_CLOUD_LOCATION="your-location"`
+
+- A properly configured GRPO YAML file.
+  By default, the script uses:
+    `examples/configs/grpo_adk_llama8b.yaml`
+"""
+
 import argparse
 import itertools
 import os
@@ -137,7 +171,7 @@ def main():
     args, overrides = parse_args()
     if not args.config:
         args.config = os.path.join(
-            os.path.dirname(__file__), "configs", "grpo_unique_numbers_gemma1b.yaml"
+            os.path.dirname(__file__), "configs", "grpo_adk_llama8b.yaml"
         )
     config = load_config(args.config)
     if overrides:
diff --git a/nemo_rl/environments/simulated_user/adk_utils.py b/nemo_rl/environments/simulated_user/adk_utils.py
index 231fb816b6..6bc79114e2 100644
--- a/nemo_rl/environments/simulated_user/adk_utils.py
+++ b/nemo_rl/environments/simulated_user/adk_utils.py
@@ -8,22 +8,26 @@
 from google.genai import types
 from google.genai.errors import ServerError
 
-
 # Initialize logging
 logging.basicConfig(
-    format='[%(asctime)s] [%(levelname)s] %(message)s',
+    format="[%(asctime)s] [%(levelname)s] %(message)s",
     level=logging.WARNING,
 )
 logger = logging.getLogger(__name__)
 
 
 # Define the agents
-def create_agent(instruction: str | None = None, name: str = "simulated_user", model: str = 'gemini-2.0-flash') -> Agent:
+def create_agent(
+    instruction: str | None = None,
+    name: str = "simulated_user",
+    model: str = "gemini-2.0-flash",
+) -> Agent:
     return Agent(
         model=model,
         name=name,
         description="Agent",
-        instruction=instruction or "You are a helpful assistant that help people answer questions.",
+        instruction=instruction
+        or "You are a helpful assistant that help people answer questions.",
         generate_content_config=types.GenerateContentConfig(
             safety_settings=[
                 types.SafetySetting(
@@ -34,6 +38,7 @@ def create_agent(instruction: str | None = None, name: str = "simulated_user", m
         ),
     )
 
+
 def get_session_from_runner(runner: Runner, user_id: str):
     app_session_map = runner.session_service.sessions
     assert len(app_session_map) == 1, "Expected exactly one app in session_service"
@@ -42,13 +47,15 @@ def get_session_from_runner(runner: Runner, user_id: str):
     assert len(sessions) == 1, "Expected exactly one user in app session"
     return next(iter(sessions.values()))
 
+
 def get_agent_instruction_from_runner(runner: Runner):
     return runner.agent.instruction
 
+
 def extract_conversation_history(runner: Runner, user_id: str, silence: bool = True):
     session = get_session_from_runner(runner, user_id)
     instruction = get_agent_instruction_from_runner(runner)
-    convo = [{"role": "instruction", "content":instruction}]
+    convo = [{"role": "instruction", "content": instruction}]
     for event in session.events:
         if event.content.parts and event.content.parts[0].text:
             convo.append({"role": event.author, "content": event.content.parts[0].text})
@@ -57,13 +64,18 @@ def extract_conversation_history(runner: Runner, user_id: str, silence: bool = T
     return session.id, convo
 
 
-async def run_prompt_async(runner: Runner, user_id: str, new_message: str, silence: bool = True,
-                        max_retries: int = 3, initial_delay: float = 2) -> str:
-            
+async def run_prompt_async(
+    runner: Runner,
+    user_id: str,
+    new_message: str,
+    silence: bool = True,
+    max_retries: int = 3,
+    initial_delay: float = 2,
+) -> str:
     new_message = new_message.strip()
-    content = types.Content(role='user', parts=[types.Part.from_text(text=new_message)])
+    content = types.Content(role="user", parts=[types.Part.from_text(text=new_message)])
     if not silence:
-        logger.info(f'** [User]->|||{new_message}|||')
+        logger.info(f"** [User]->|||{new_message}|||")
 
     session = get_session_from_runner(runner, user_id)
 
@@ -78,35 +90,41 @@ async def run_prompt_async(runner: Runner, user_id: str, new_message: str, silen
             ):
                 if event.content.parts and event.content.parts[0].text:
                     if not silence:
-                        logger.info(f'** [{event.author}]->|||{event.content.parts[0].text.strip()}|||')
+                        logger.info(
+                            f"** [{event.author}]->|||{event.content.parts[0].text.strip()}|||"
+                        )
                     return event.content.parts[0].text.strip()
                 else:
                     return "<Empty response>"
         except ServerError as e:
             retries += 1
             delay_with_jitter = delay + (random.random() * 2 - 1) * (delay * 0.5)
-            logger.error(f"Gemini API call (with message {new_message}) failed with ServerError {e} (attempt {retries}/{max_retries}). Retrying in {delay_with_jitter} seconds...")
+            logger.error(
+                f"Gemini API call (with message {new_message}) failed with ServerError {e} (attempt {retries}/{max_retries}). Retrying in {delay_with_jitter} seconds..."
+            )
             await asyncio.sleep(delay_with_jitter)
             delay *= 2  # Exponential backoff
         except Exception as e:
-            logger.error(f"Gemini API call (with message {new_message}) failed with an unexpected error: {e}.")
+            logger.error(
+                f"Gemini API call (with message {new_message}) failed with an unexpected error: {e}."
+            )
             return f"<No response due to unexpected error: {e}>"
 
-    logger.error(f"Gemini API call (with message {new_message}) reached maximum retries ({max_retries}) without success.")
+    logger.error(
+        f"Gemini API call (with message {new_message}) reached maximum retries ({max_retries}) without success."
+    )
     return f"<No response due after {max_retries} retries>"
 
+
 async def setup_runner_async(agent: Agent, app_name: str, user_id: str):
     runner = Runner(
-        agent=agent,
-        app_name=app_name,
-        session_service=InMemorySessionService()
+        agent=agent, app_name=app_name, session_service=InMemorySessionService()
     )
     await runner.session_service.create_session(app_name=app_name, user_id=user_id)
     return runner
 
 
 async def main():
-
     sample_id_1 = "sample_1"
     sample_id_2 = "sample_2"
 
@@ -115,35 +133,54 @@ async def main():
     simulated_user_runner = Runner(
         agent=create_agent(name="simulated_user"),
         app_name=simulated_user_app_name,
-        session_service=InMemorySessionService()
+        session_service=InMemorySessionService(),
     )
 
-    await simulated_user_runner.session_service.create_session(app_name=simulated_user_app_name, user_id=sample_id_1)
-    await simulated_user_runner.session_service.create_session(app_name=simulated_user_app_name, user_id=sample_id_2)
+    await simulated_user_runner.session_service.create_session(
+        app_name=simulated_user_app_name, user_id=sample_id_1
+    )
+    await simulated_user_runner.session_service.create_session(
+        app_name=simulated_user_app_name, user_id=sample_id_2
+    )
 
     # setup grader runner
     grader_app_name = "grader_app"
     grader_instruction = "You are a helpful agent that can grade the correctness and coherent of a conversation. Please only give an integer as the score."
-    grader_runner = await setup_runner_async(agent=create_agent(name="grader", instruction=grader_instruction), app_name=grader_app_name, user_id=sample_id_1)
+    grader_runner = await setup_runner_async(
+        agent=create_agent(name="grader", instruction=grader_instruction),
+        app_name=grader_app_name,
+        user_id=sample_id_1,
+    )
 
     # Simulated user interactions
-    await run_prompt_async(simulated_user_runner, sample_id_1, 'what is 2*3+5?', silence=False)
-    await run_prompt_async(simulated_user_runner, sample_id_2, 'what is 2*3-5?')
-    await run_prompt_async(simulated_user_runner, sample_id_1, 'Now add another 10.')
-    await run_prompt_async(simulated_user_runner, sample_id_2, 'Now add another 100.')
+    await run_prompt_async(
+        simulated_user_runner, sample_id_1, "what is 2*3+5?", silence=False
+    )
+    await run_prompt_async(simulated_user_runner, sample_id_2, "what is 2*3-5?")
+    await run_prompt_async(simulated_user_runner, sample_id_1, "Now add another 10.")
+    await run_prompt_async(simulated_user_runner, sample_id_2, "Now add another 100.")
 
     # Print conversation
     logger.info("-" * 100)
-    _, convo1 = extract_conversation_history(simulated_user_runner, sample_id_1, silence=False)
+    _, convo1 = extract_conversation_history(
+        simulated_user_runner, sample_id_1, silence=False
+    )
     logger.info("-" * 100)
-    _, convo2 = extract_conversation_history(simulated_user_runner, sample_id_2, silence=False)
+    _, convo2 = extract_conversation_history(
+        simulated_user_runner, sample_id_2, silence=False
+    )
     logger.info("-" * 100)
 
     # Grade conversation
-    await run_prompt_async(grader_runner, sample_id_1, f'Grade the above conversation and give a score between 0-10. \n\n{convo1}', silence=False)
+    await run_prompt_async(
+        grader_runner,
+        sample_id_1,
+        f"Grade the above conversation and give a score between 0-10. \n\n{convo1}",
+        silence=False,
+    )
     logger.info("-" * 100)
     logger.info("DONE!")
 
 
 if __name__ == "__main__":
-    asyncio.run(main())
\ No newline at end of file
+    asyncio.run(main())

From af6fa8d39e7e449b1ad564dcd55f689aee91fca4 Mon Sep 17 00:00:00 2001
From: Jialei Chen <jialeic@google.com>
Date: Wed, 23 Jul 2025 04:34:09 +0000
Subject: [PATCH 57/59] remove image

Signed-off-by: Jialei Chen <jialeic@google.com>
---
 image.png | Bin 20259 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 image.png

diff --git a/image.png b/image.png
deleted file mode 100644
index 8c71e34373da9c530f3a34138c02f5b68157f18b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 20259
zcmeFZg<n)%*Edd#Qqn0M(j`cDiqc58bm!0w(nu*K9nvM;AtBw}-6ag2zk_()_x1js
z_xTTAK4+Mjb9S!1_FCT+dqU)8C6E#E5uu=<kl()(`v3(6eG0r7;GY9`*c*hqfj4OI
z2MH0Vl0m{9;ETSI+IwRe87Mm7GdvV@kQo%{u?X<O2VPK6pf7<?aKJkT@RCo5hJpp&
zp&y^8!~9bc`ZOK(Kc5+Z=TO3mqVL}W?}`TYMn=|PQyYiklGI3`;-s09nuD5*G>?Ie
z6_cK!jlL0+i<RwT6(~Lz9^j*uk%Jzoi<PA{n8$^m{7(rU;Pc~cW^&R$MI0>n$<<`!
zNkwh!jYv6|UNgNW7eFK>CFQd>H0Jpr_V#ae;2S@=se^+p4>Pl~von+P8zvij6J{1}
zZf@q+tjw&ej6exSu&cF$o(rQjnBu9Cf7%f<0vp(y**cinSd%`stEX?{=)g}-{@Brf
z{+`BZ<YM;Uo~*%trv*%q`SA%e3)5@n|FjKM<$Ju#BX8znWT`G@W(AN3=tJPm>(_jL
z%Kwii|LyT#HP!xGll}j!`L8Gc|C-8RBYROBE1*jUf&Z?|-){f?@NY#v=Es@;ixN*{
z{&N?gvj8F=^MBS%0MYUF3v(zaA*lCa!b&dC2Pp_HN)t0(OZ)C3I55zCOwjK^N;s<W
zKc8vYLYMhjRHaRnVBJ(zkxb5N{7@;Zo}H|(q6;OcO(*|=Z5T#|heLrz%0T8D73KM*
zf1UT}<>2~Ye@ajP`k-OlgKKI}(}m?l@>1Qm2hSz&VG7OHp7Hnaq);$V7nuwx&v(tp
zU!S1heg0hUV9G*89Ua*xCMOF-Erp>#LVqs5C?R8Gs*TM}o+^e=?L&r5!sjT2?-Udi
zWWS+7|80wO_y)wo%eyn2AzDrrk;$ZeX!%XF-`zWX7zPar`th=r@Cmn`wBV1dw?ZL`
ztK;>52m9wfl<zX>vSg{1A2H#882wX^z<&=iDB+6${NAUL`3Oa!`=rCo_@s0H9sV*B
zG<-etxcu{f2KgBZ#NYiKGywh7Dzq-V(6DDlAnV`qXoeutZD9^lQ<T4xy%d4+J49lS
z`CHzXlmbc^wH#U){C5bgxG-oAx_KEd{+0*5gobyELWWO-{X6AmG@tYv5oDvk<w?7N
z4nJ864HN#I@b3U9&;qVG%|GRZ-$MBj8^Zc+{+*H!GcclNdC`}D%fqw6pq-Kups~UI
zoznj!!GE|w?3J&PO|!VFHke;oS+(!J^!7JJqW2z=!+JNmHgPGVyXa<Vi9kWpqPC&)
znX~Gm^7UPqmlR3S?XWWID!|AhF#van!J!PG@cHKCP|<E0SE#4XTb4!5%nPTaTw9r$
zo5x)-;#hdyX-!URH#m<iT-*moOpH&LJF2!YX5sSApEFO{q?Z}UU1VaF`$56`!}w0i
zo4%Tbhxz$#IrL>brngo=9*rvP;PDMTiu`dL2J1KJ>W;(R8lzKvldeIVJh`{ekqKY7
zhJENpgds&k1bxg1YNN>r4@95VRS*W-)PGa)IxQ<6j8yd`Uz_G(S}YInx`y0rm(!Qz
zB-uO=H{A^)Y6^6vIIA?$_tD0?X=)NMUX7)hU2mukBXbVq9?Ut<V%sTmHcUR8tShn}
zMS9iy9NrzvZ8RI)5fKrcUU&=8FI>hCG=>tpxv;Rau}u%M9L3F<b-Evok=)mcRX^ai
z)Tf;vl>W$u4CV|ZnYH;>7|#C`4cd10zNF9w-yxR|dh^{d>ofZ!x?db*r7@_z!_{@;
z)^=_WKV9i6H2mP{$<N<-iJoXT!BBtOTPQyFK(}@k=s$zwsmu_$Cz~AARoZf#!?$@q
z56Ma!j{8!~w)M;PWWKr5x9_*>;?2kGqWFj3c#B*|IJ~GqjOe_iXk@_tKxZA^&WUYZ
z7XFmi)RZE?Ln}*o6OH!i@{+5!XE4WV&RO1isrj^m<RMCyNFe$CvO8^i`KM_8<?ilo
zuz0Cx?E*L7+3B0p&AFrDx~*gxu*p!I*{om5E-{Y-d^p<Hs_o9{j>8P)*dv|1Z*By)
z1LE&Rzf#Gj7nU^osd$zrx(H}JvqnO)%1wYxaGZ!ey~4FHn|aTo?8;%qlo?`@LW>$n
zRG5KphY9Yosu@n6nHm8zH;Scs)W6Lk64a4edidFNdQhV60BKAzz3#+SY?;?mzwm6i
zW%net7GX-}2{^1hQ|ay7WA0<)<m?lG#KA84*e01<SZuId<Qncxowgo%$tmV)3B6%%
ziSaONr-g<4AR^+oFo3gUX-gPM1_Lif2VFZ);kZswyA&_ol5h_>DeP#o`ank28^
zbFYyiXV4*ZSs@q-ufVRgzH_VBJ3nJxoFEBHkn=XVYH-*MMn7$}@nWo)K!i33(|G3W
zi2L~^#a6Y+V6F5N(cl@g<~db3e^Zp#U7v0@ou-CHaIr4VK!#{wcWsz|cfqNnZb>eq
zW=+8u=GIb~0ksSl?e<y+-}PFg0D1APcEg3dlS-Ur`^2j8%EHA`E++M^Sx<XQE-h}d
zO<|V9oRgW}((5fZyD<obW@9cd!^skEk%^Yqx8mR$t7X5b{(8y9#vfIwjA9<(5mkqa
zi=)bk5F)f-F+f(nBRqpYf>4g-zPE5_|0RRP2uji37-On`_hF{mlb8Qwd(Gh;eCPH^
zmf?Ixdr$+Vil!#@R*Sl=_A}kR&FcsyWrd{Q44=6V*fh@!eg%7yZDZ@-Ya_Td{`jEW
z-P`;9q{)3EBRPwN!xAq$H@A<gI{AT=;W|8tV=V91L;WE`bh4RF^-kR+RgZp%_vYd#
zz6*1(Qsc@ggqZj<0q11gSUiXAic}|FdEvvFa-rGuFxe=<>pT5Dyji*~6nHVb$5pdz
z9nt=>+7)-oQ#PAatX3T{W7S`>o3?Zz)!W<u9d*$&DDL(cm3Xapa4_@!?izh)WG~=w
zHzFe9ODBVIn2ud<Pfs)_$;O5@jJA#r6Xe8|AAz9r_Ue1c<P~_ZWqqV;JE@%24N|4L
zO(G{~wDoeEey~e#i_^%y@ctBg44u%)3lbh$ZiR`7x#1;hkTqYw8&>QyeVmP~B|u9X
z!qjAlUwn0BQmHpah0p24kY8=K*ChZih5=~8SK%Dj<`k{^tvJ{51RSH3V@z?!x9?Qp
z1iw&Kp4o1oyD9XmT8O@N3*_UoT~6nit@b{j`D(g1m3`VfI%Rs*a5SQM#88v%s9J-O
z6-(kNh7E@@-J73sS<6*_nTeF2SDT>YWkh={Qt!O<AXI-T@fokAyl%mI@ix}B#u<Uf
zoytWcqd7xivgYXf*+8sx!=;ET>`#^7xE50*1ruqbCTZSe+V>u?$8l;5-FG(yE=Wz+
zvwJ6vxd@SOrKMjqUEE7v>o4%IsNP4-o!e$ITgk`jfG3pgBOb=sAFy#69A7BoSCgWJ
z03s-wYj^vN3@-T~Qd<OxAd|m+!7B^`0;!q>`!`nTg;h4m&TokLo%#pOQ(mt~o6!oM
zY;`#xcsU;;k4l=Ldz_TYjk#{6TJ=q7FYsLq=zQ|#e;_|LcTBZ#dU3&cNt?p&iot(X
zle^JY-gtkvvl2ucL%y3Zb)2C3o1~<u(*6GS7s2Mc(hldXB@vsjtA}HJ)yr;0xomNL
zMyQ5s@B1*_QJYSYYW_CfR+Y=uAd)YU&5T~_3fDhTy&1$)=8}?=r6J9{@lN~m?_2H%
zU>y!;Oy-WkmY5_rqtBJ)yzbv|E4L2a=sK(@VTUuqi!u1p9M4;re6wx9#;H%e2~*?Q
zb`ir^?ld5rF5bgat92>EYnav!$dZWs5U2L>*JOc#ttw~3hAWFsDB7>eO17@d>a}0L
zsLDRHgtK;z`{epEy6JqoUtjaCrf&;7kPYq{X1zHr3sZ$FkeMer$1oL@8L+D>=VNCV
zCtxJ;3=4~N?zM@34_>w)nBMytS%6H$<B#)@;cnxz88iLNH;P(bLqqaP{~(N`!yNSo
zDhz2l^Li+f=ImidGKBV`jxK9qci~muDa|nzzT9SeF^oBLW&w)oT(~}^py1&qo{K^{
zJ<C?(;p}>-sAQ<K6(k#(#Pc>pD`co}p2TCBGh(5Q1zzkGz}{jgygWo^py{fVsilfe
za7Jt3);9O8*>U^m0WqT0TLR(MC4e>VsXku|lfuKVNAj`5Yv^fV5?>#XuhEIhkfKq6
ze$X&UKgU>jLchV5fWYb|_C6*41MWzr0kGI^NIHW22mJYVQ$Yzw?jK7WJ%R2ve*muh
zkIE0fcRh0hoF2}oy$%#}FZ0KHV~4KJ2s5-?*9!E_uN#$|gd3LjJj;XQ&l%i$N(Z+0
zUyp@kKb-Ax%X7omQuuUD&W{Ld3}bLLn^6+o-7@wHY)STVZ_&*R${tTv80@JG2fGs=
zRWGCv9Vp%Jm*$n-T1fJqy})<7IijJXiaQr*3C4H!G*b2IX!6eZ4n291J!PXSrxn*8
zVzrmII?;K1j*ZWEG!-LYPOdJ^s8r^_l7e=1DZaB*r$=U#!oE-<;5b~{;9oG+_?G2;
zetBc)RH14%w{@h)Eyl#rv=D{&3Fo(QkMz&y_j^-@)>b){w-u(_#VxT1V1}(ygRvSo
zS{};i4@@+#4w_pAb5(8Daw(NA+Pd_U`(s!xL?m0x%RM@+)+BTlT9y`zR6A%(4rAr=
zSMFr6zQ(@2BTkTGnb~pJm+Q+r+bbdIS`n#3T@*PMEgHJ0@u&*pnJX^cx0=y<%kJ%|
zde0;@7+IRpVK=$A)v=cr=EnIYJ0oD!SoV8AZ@t%BmAT+atDC%)1!Sz0WTBbNM#Pd`
z<(=7s5Ti6jsblNihu;UYruNu2@d$8a?*YvJr9JR%QUvk$^J{$b(!z6uUn>{)vLgsR
zY)(BAm0Kh&=@cYQLauL`B|dH5Co@n#YpBbVhCe5vrM1aBAnXbg$Z71ep_Xb0aS}Yk
zq;uIO(l7M9vDq4aDOZwp9=H~hz?mF4f9tKJt0Knx@H&mE`G@H3VLim+qYQgc&*-Fx
zRiZ#`LA(tIp{T`%&d4EdNJsc*MEA6YwC98ML9B>Fx#fZj@2ffsX(9b)blm$jDON-d
zrx~iI!*gLYbR2<n#)4wBiios46IblbB_Au}4zmMelXhZoaX*ahGAi8#snQ;e94SsN
zsLp~KtS<D(rs-+C<V%VZbB3C81i-l;Of-3w@0O$-dLc+1ReBks&it~U;=>5&(HW2j
z*nS@g;&6H$DB0Z|(YT(NO5(klRtcKj$t$6dvLJ9I<ju&`n(VuCcyWL|@uBv;3svP$
z<wg$FbJ2JoNR8O`Ic28i;tbjSRiuhTA8GXw>9II&X;Nf>*~J)A-o`T1u4z%$^tiyd
z^R8uY!#)R3<LXPE^9@A3EA|mmJxAj-&u8rLKMQ7lr=m@26fE6S8X-!txsSe>eduj3
zzh7~_kNyyd?Uo!l)N-_h8giQy0QFY!LCQL#S+Qiw%3`sfw?|2FCz+~f!;_hx#(!UN
zWJrzY3@);VgUh&RhSV#6Kgc_J<Su7H>_*2$2+>XY1AcyKm#7WD@I2fIp5}2}uRYnc
zXtsRLM%sG>ZdfCYlXL~87BGboRx<r8zwWPW6cVYC<5g<$E9wCUn2Jyw1BJ7l!s>C%
zY>NP+11;vMs*9_Kuc`hCV#nsn3R{OJIHR;v_rWO5BOB2_f}I<wH<7<C29Bezt;<9f
zpIAFF3=|vY*EDx68mSz=DJ}EtbYIHS+8mbI&MU|=@b>yNi%OWC(9hO;o%St8jmA2r
zW1xG5<USUoD>4el%yQ|)?oA)T&h(m1Pow%$+IzEjxu`ZZ$h78oxnpD_2dlBJkeRBq
zJrAzt#>mSatglocEiF;IPuE|v%O>cyGY7iFhz2Zb&=JrLE%^1uos)M>vB*=sCT30o
zNgc0WIym&~eUP4kq>CJYNfZuCt+BSYVMdx;G?JO)6Pu^shUB7VP2W#m+);nVdsnRK
zq&VvDkt9z1m1faVP;LH&1GcJipVhAT8Vwy&;O^4ZD9%!2S-(*eyBt&QxKxMZSMSh9
zjsousRwIPHhf6`#*<J{H7Qs?1tUH-3utQg2^AeXT50MMJk7ymY?UTGZn__r28tQkt
zCYwYz<EVJ<B1z=6d%W|_&*(q&s}t@yTc@`zX+FE#yJ5-_Sj&;TU~Q=F;`{M=wdKP7
zdN;L4<lU-abMHl3tO&a_JWoRge@~Z+>G}H0Hkn%=QwdlrZ5P^&Wbg12$HF;i^q#}t
zF&WR;4DauhD(ol^?r%i!8!kn2_j3$_XfjIA`uJF!(T>YF*kZ$6Ka#=se3F>UOu9Jw
z9wR(yVC6cPUi_(ezd-pRXO_`IjMs~zb3)}kCvVBF%uCil9(?nFAK#3S_s!+dInj4=
zWvUCKZK`|kPx<j(&QLpIS~c(;)3zXYXEr62iZj9u*QY4uhs}Pg&o{Rz>pq9c%Eai$
zy*|M*M_*OBDMh6{I&Ro#l7;-Tp)oke>cvnwTOqj%k0=+voqmwN$uLklOTx`gm*^Bz
z+^sy0fk5_pM#m-LHt&5$jf0)X!dOP1J1OK2B){6zV~N0Kg4Fr~YoA`R!0;?mdpX>q
z!=dZbL&j0#iIkOH-I9X3RF8;<5!zWrL(`fXe<yNxoI;Kb3OwLjx>PLY1QX6wOKHhb
z6n7CxM1(bJ3oc^P4=yq7Dhc=nqK4&N^&Ma_tG!Czv`p#yx_xV>T6!f$;g~pUn2*`l
zobF<W8SV1Ch3JRWuy*HOGNJ;RB)|iiuv^pn3H|q2NegGs))Go0xYG(-OE&$RS5b8R
z{pXLSiyGFF1T$*|?$$k+bH%B%<@;9n8|gtuY;(8vp=d2d2=VWrWicT=>;#MPu8loi
zZGhC93fYS1#ka_=X(ew&{qRv-{F{kv?qxp{XYK)r+W9gk{pf0{m!jII<*tvvjeqei
zb@P4lta+g!pXNHlZEyI1=DDpYY)wXK?YH5;HQnM&=@CJcWI;NP!8l9RpdbD`0fk=~
zcdl(X6qk@nvVJvX6QBK5ZrW#jc^0;{aiw-TQ^4%;+iKUeq^8Q(*+n4b<%HFp)?k+A
zth9c_;u&*|8B*7}p?v=c6K*Mo%t6K0D;`>5BDY4PJq{|0*j=6b7-?SmJV%SVVg~mQ
zA97NeB4^4eS8^g`<#PPl?2r)^OgP<?Jc_VdudxZbD-B4#?Iq<E-v&GQCOIMcVv}9@
z!Gyk-%iyq6{9>=;n+zK{fZ66|FX^W}X7+}vx6u+>^={Wy-5K&L#ZbU}Bf;ySPT#Wg
zYPG%@KU+$3{zaaAi+<CYv50suBQr?)oBjdp5*JtO)IDK|De4=`C0+4N4YH25!eNFY
zHxz_i<k2&H+r$Pp@KH-h1zMQb9q-<D%oa|RdjDAAGM!Gg0Rb_nzGU|Iwp_<K`Oaxr
zupmD|V{OrTcvvTHP)H(ubB3bvgObYBj2?!rJOWm{b(Q?{fh%sxa{^0D8b{gcLFHiM
z=t>7qgaVhv`raw8RXnSQ1ucA+i3NS88-X$|tFX&*%r*iddaWrH^pMe7t}4gf1ISgD
ze?zi(aHs0TPuI2U^<ILw%YH9?;(MCTkYbJ>5}6N1HRf6CN>D>`;AMj3P1vQ`(K#`q
z9fQH>u6<(?+s+iR)ry0M5^SXPW~?#XZ`F{7y-y8p6Fm{dnoLc^b2!@6n>TZLRpx@8
z6_+ab_I<EQs@Ko31uC_Si#;CF9G1@{%%)RN3p1^2_GdRE1x2JrY8>{NZ4?JlO?trl
zxR3!i-_EaFDMn~2Kd=<-J{bxu3I!c{`LWu3W7;a$l)89N_DZi#f>N#J*Jb<)zQDAY
zU0ZAli*v!P64NDb84~|S=CM=m8Ua&v`HayZCsqN<AQru~C69T%b<E{F3-?a-NwI#0
z`mbc#YJ){HbWWOYHR?9!`Zqa>;43}Ndi8N^@Z`HWmL;X%TbE-;J<7)~!Aw6=()iI!
zM4Zp;>z<eWF0H6aLGe0bajh8h`i-Z2zi!5zoq;=~gW>-ha1f)WmF?TfP16mgI=n<i
z^TUsiGpZ08>q@3ChUnD@zFPY(zsqp4YX*ydVVV8?UNl?W!rBZAso&fvmR3}j;@$<L
z)@sxE_*rr?6+ehCvVRhsR(7u2%4cMIT4AIQJ=4@d9Nb@~*ie8AGiq;I(`cp;-FV5w
z#NV}fKSaY7MAxAKt&@P867^~d;RSA6&mQ7x#kG_iTq(kGr~AmNarf9G@bVmMyiVEL
zo=F)bLD_elK3>++YibeVYgwXgL?4YD(5D&f3p<2)siw4X)%m{aMn->;N4vJ|E7c|@
zGYy2J(t1hmcxi_|AWHi%4pXKa(P;(Jh2wuVM~jo3->PqA#_h^5$`H<IFVM8}RbLV-
zY~(VK?8<VFEtKx7$GO>PuTi{bl3LXec%}LZ%z|Qd6Cs~$8ivR)U8bJTap66xex(HY
zo{)+$=^T?lZ(<|1q&rAC*rgNj9YMv;M$jc@HE82~ynr001FqIY--e5_$L;X_NnntT
z8<zvTO`o0}=6A6<<;MQcf_lP$48>_Tm8PB^(7Br-7_W3r(t=K9*bIZ~Z~SE6*~aD-
zvsXwW9RnE~Ies70vP|+TSxFHo47-$W>G&pz=53KsWZw@r5S#kC^<Bi7y|3EbF&*vX
z?e^3T^;XXstNr#I^&vKn<r};DLZ?*w0sb}uTM8m(4az-2Mv%fY9kgzhYWp5A-_}?O
zj){%<;%DCADa?)>qtH&Xi{2#o)avQ0F3s=ai*AOcYM7)rjv-xLgs`RbG))h5b`ZTL
zyP0}h9^?j#5Ep;zM(d`1<E``5vwmaO_GL8^66%mn+s4;rr}u>ca2cgqxKR&dCa2}s
zj7)l`&i)CExdM>fXeU7#hUtR(hiB$GEuZwk0X1!5%3Jf(hISOTT19k%9z%+4+C<DG
z)MI-f<`~|SmWRw<l811H0V>zj%D%xE^(*ws0^Jr;mB=LjushJ1E_ovj51DxFA%P%M
zChQV+DzaXW1&QOEDNZB{9$75{?Kp=qlk6A|uZ-fHgnp&oTDt-Uk}>K4R+|>3>MYUu
zU_x&vN_GESzpVy?>+g~oHQ0Q0W3(zYB#mcs`o%&>-}VyM-F%xbyyG#D0=FmoIt#q6
z2#xNRC0kt82<L*=mS3S6v3}#}>+Q|^@ng^L#b8dR@;y~6{ft3?yir7pe7!yW(oN8h
zj~J%EWhMhe8ARf$RcJrnTZU=qEqw+5*yz`JzN6x(h%RVM1*z+4e{U4INSSKmEj<`!
zQIXgg{^>_RcC2-n3B9b^=|1H$0j92>PSdTwuhz<<OaR|oW0so1F~lbHX;vdLgY#KW
zr|b%~nIdfxd=s2FSLwjKhNRXZ*NR-9tyQ8=)iM!gvj(muUkf*?jw5a)4sWP81Z*oj
zjJ1-C&CvH4{M-GC9jun!>Z};Hn?X%(8v|i#Xu)XQAW11HT0z03lJ+0N2exA8&_kWS
z;C=frh8Q99;JxTO{n)E24w`+9&Bzv5W~e<gdC1+u#9BxKB$MY{ayuly<YSnPU24+N
z`{6>>qg%JFk4JtUTC&V6P<ECX1?dYWg&dNQ?fBlcfk{l3t?NwnU~Q09l9plgNNTba
z)3j8uGX}WChabW1=U2D0-h8emH&;hN82DC^F82S#=iUkpi5w-uBbE)O*Zn7Irw#ZW
zv>ATD{S(BK0`^_GXi(taD4q~5;Jv^_+V_15>IrpY_@oEz4`;0WZD3d&%5Sj~#{HkL
zSUJ!f8LxvW*WWOg0}y9(E^8(J`_-QTXs#QD2p{Edf7&3C^NG^f*oksnSJx&;^DdMB
z^iv@3^%tSJMT+rLGlb(f*oQBKaE=V9Y~D(4^F1EP9d>j&U)hVCMOB8|-IRv8AH5#I
zuVEyMhywUFGVNsBicSiwZr-nKmD8zte^ez>f3~Rbo0~mZjKpt|eerJx@UNk9j3m&9
z8rN8BL<+^!tca3|{B^9jQ^eB*>15@8bEjo(H}Jr1O+1$h3(*L34%6KT$>}(5r=49<
zJ+E}Vt3nmNz*Qed`ZJSssn*zFu(8}TrJB{R(FG_~6McpL+E(uR8(rsp-wh(0g;NfQ
z#L8I48;=QkukWSfTWWKsh$&V{x8KT(5j|1Q8i59VOIlJ)-S$jt*P5P{Mj@nWmPe&<
zHm^E)F!N}zc*r%g!)C$R$w6Lo23nGkGdT+e=LPmDyT#<_=0ImmVxlBub0FvRlr5gf
zbt9IOmXk9mM<!{N6&2$ts3;QegCT08z58?6fb!J5aEj`X#Cw)L;)~gkUSWcBc^;;t
zMn@VHTsz3~Xc3W)ZdL>dXI_+&a?W&Tv}UKI!FuvwaC1P@bgG0Oi(U)!G4k6ak*>f%
zKB_@z$!NFC%;KzT*fgb|%#8*AqdOoo*u78XE=?^_MkU@|6HZ)QLP1H3fijt+dv4b)
zdFiz<Gc9d)&sS=Bz=0Z0qWi8djxpDXB@K<5CkGiB83+Rf74Ebl4dqfu6jZ*ShJ!|f
zM}sbog2aE(fd7tzY=-+ncpa9FhRh3}AAQTButsawx8o(?K;cFz;jN=#l4J-aK%Wg;
zHMiKK5%l&q#A8&sDr;zJZuTv&cymzOe1C0Uf6^<JpVe-Odvxey1EUpCU+<Rt{rh*Z
zTrM#nrixbOr6XCK7AuKW_6baDCrQ?ca%duo>3nDw!w|u9t{M_~rj6R`r%FDwp&z8y
z*X=3eswqAl0uQsxvok&A*u<ryy6?jAZMH2<LYl2p8jm(yZL)O{{(KXRs?iEFFj<G#
zXu~lVc0O4=Czfi8&sid0yVa12t7N2Qm6fU09ugXAPnUzc6@!3IiY5%Y466}`=k`7>
z$D`(kucHX9C2KRsB|&+wC>yWIG~~PYf%6%w$n6TxL^$+RINBnpatiYzrc3Z3&aB#k
zw9%cUvFVNU@Eg|AcNXs<UT$s$+C!Yy3^|F3Yn6sQU&qJC84hQq?V%SwzZ}~?;U@l`
zQ$FyKWW4UO@o|x_L6z3>PTo!z9;IH4lhc<wCexJQh1-?ZlIzK#=C~VkB-N%4bPn&z
zDGX;xZ8>v6OCJ0-A{OakiO9j+vpvq9D~M<i9qpS6R7xtU4^mR}%F4=PQDmR?aeB%E
zbaa{{>oFl{#$h?1gi6tey{-zFbr$ho)(Uj-RFNNT7i=|}m*|uhMO%q#|3Vwqktlde
z-0&^|SM?4#yI9XpNB(taXjskYYNuS3IKxz$pm)T&yhi;el@cwPFdW7gluV}d@cyr$
z4{%?5AD?%SUp(_s$y1aoNl+y(^$b)U=mL9Dp3t>0Z;8KLga7SfXRDQsg3cLH)?yxt
zVSO}MUd~F#z-GKySXZa6u{kRa-AGDKMz%OCu-v8oqBEpdB-tk`3hpMJxwL#MU1h@J
zm_B-Hk<fIk?1%(r)*^VQ_4d;p&k1VCa9&kqr4obKP>n^A9!`A$JNJxh^PDM*>Q);L
zh&;t-E&jDPbdX>6-t^l;IBzZ*DIN7=!mhhjsOWgS7gt#>4dor**hNgyUuzAE7T%aL
z!uwPC%46HBR-XB5(4Ry=jtht>d3vpej_G8rlo#L4(v@urKm?Xv<)_Oi+Vvw&)@GvD
zYr?y2M-XWcXMoHf&N2oPk+H+(<1_T56*7w$+={!wDUx|H%i!e6@+|{piQ3?1l_-96
z3*%N{yJg0TVPZ}2vP~W)ApEc4Gw;wvTaOQ{qfHgc;Aw#{lnbSkd7rDu`_rm}bMc}o
zMa<AhUL88r>Xk<jN4yrS<Ie&OU$^zCutUm9JJzslJk*3#OxIl?xT-K(*h3ERUAv2*
z8VR>Xf=JizKlS+?$&E}^YtgUIx*S{)F<Mw9R=+qGZY2UTG<bR5!TRcuw!);V5JPa?
zU)4{#LpO}ke)Dv0>ov&6wlvzYp#P+1VfJw;?a`n9J*-~16$vIngm^o?&1FQ%kpF!v
z<_H`E8ax)Dhtl7z@sTe3LHW-=!xIDaoeQAL4zZaGpF^*~3w4JZgd_rDv!iqo8V24+
z=p%wqH*6n)39Rosz*2~eiGC~Sp5mj@UB}X>CfW2ldx{|FIUqIs`5qM<Dj5&jJ}{Y|
z0wSg)@cwYXJ(tfEZs`8NJz*{yQw;b=in!sSGS%a_z%Z;5j)T`5hax$SDNO(5(=5vY
z$s*pP;tS})Lh7sY>Xh3ItGfZO7n+h^k&|Y81@@#DHIUE3tDj6t2q+Czm_g5Rja9T`
zTSCWE1dv~nPljK!@qor`V_m`rMMbm4#+jJZRO6|e>S`yqqo&C1Ih%{_%WrR(Guqll
zoLp_8fUE-{gqkdRZHo{%gN6|*G$`M0vX2SrM+=(5;^La7KmFYEz8exSYF~3KI8F^X
zy-G?<qye-`Nu4oOZ2VDfZkks_9D}P*W>(h0$qE$6&)28Ct&UN0SiO~O_2+9IQ&@N(
z9}4JpP}BGCF;sG?LM$vSY+fE@jGEQSvva&OOeK3*-@bWnH$tfTI9}j8guV;Q8XT1E
z;#k20xrO^+*ikOeqv6<A<%M6MKdLfPJOqb)D^yKYO-+fbD+)tKOFO`^^H;W;8%OKb
zSi@Q^>AbBl9*E+Ea(d~2G9S|1sc%Pzj*Y#66$FnqOp)&2;5wj^ggtnrR{NEikiKTt
z{;)*cN?;#g1TkXY2m?4;$!%TB^H!amMm@j95xr_mlj`vS;|keao*w<3@BaRv9j7#|
z;}^TKK7|DlJL3l71tleRl2S4<cA=smcp8|^P;+zL25sU*`iNeu{`BUKv|7loV5^eP
z7P#JXNgC?{_m7!7KD;mkrMHRwaMdBW1=R8%?qBV`x!^o3zo8s>Ej-lyE-NtbttN%5
zAV2?vQ<2m3fY*rlR^O08B)@2bQ|wrGbh~o-VYT-|v+OYdqF7~_2Wa1rlc)O7tGZqH
zY_+dQMqWyRpoCgYU<SlVqFf(T$}6irIB3CDkxUC0&$Xm?ux#CE<uX4*#ylO#f|%W1
z%h=!F4rfTNUY&Gwh?$$2Wfv5f<Fgt@Rd(N;zRxWxlGe-C+`c^42n}a1LTElEF=~tP
zvCp<p%?7Kg*G@oI^ky9$kbo6}0!h1v=35QnnA0zbG!6;NoNGuA7+TvNsGqAsUMP<F
z`2lF4F&2l}OQe2$YbcFkPV2v7JBI1fB-PdT!-p?B+avgZZuAL6CfF}+&RB$}XeA#^
zJO8SAD1gKz#@EIKa=-<>ph^cd*iC@TjUD^p5ZZ^*YkjZAy&MD`5s^~F`RT~Mt-@h<
zH~QXU{<T3mAj<)P$u&<hU1CH!hl9n0n0aqYT)a_@iIz5cb*<R0Ia#f49&%CyNogMU
zWzCQisvE7N?xh2^nq6Zg6pSxbtM28kcqCdP$mN!>*k%Hgc<NGbjn?bZrGdqo_R1e$
z&{Ls5qf247v*v1HK}SP_4kvS~(731gN&IuGAIw%u_H+B;wi+3JpBw!F3$utJPhJ8_
z?yIncWEr8^+L)2ID1g*J(|%(bHd-|pP>ar3&=I2B+A!vgTIx%DUio7-@!<~6U*p)U
z@sD#kKdtyJCZ+tWYmfKI3I^JMgRHFZZe|R>kwD*K_Mn8aT(Mi`o(xoeD0ec6b!>N?
z?p=e5u3<$O*Cl@AJn1}p$}8d|O}gz{A8JdS2EUEqy8jU=Tc*7<{tAQ@@PaIzdfl?q
zjFIa=AWaDY;Yf7-o8JTMo2_BRy}1WQ_RSZ8Lm32PMTZtY3EeI}i?G9mae0rgauibz
zKBTh`&09>d7gx0-{BYU-z1&I)lNwxGlWULVN-0!8d{7m>r=eiiq|8@EKcsBk$*RTm
z+z>_kXFCq4kuu#RoOdvco|CGfQ|U*A6W7UiO~vd&Z$n2yribAI!RhNrpVVN;*}7@?
z%`8kwm|#T4ia}x6;^Ayy!y?Mp`l}GEy}1$y6Uwr^GxYNjUkYFrC7ZuonZ9)L9eiOD
zcDBr~mr^};(hVg<_^E38V}dLl8?nmfd+=WJD6Qntb%bHG>Ka-Jcu5x5ClVdaf#hd~
z+7SG9NNpDTq095LfSe1X+=0E`T?JRt%TQ3Y28@TcscJsNG3HKCZbxHy*4}q^s^PF|
zs(h`D*E;^h@YyVVHfTCJ{NzWzOY_VmQb{jZ=j`F2rx)B*-_DH!-d^FW7oO(iCv!Fm
z=jC|#-3G3&8YJp-aW86qSE<W)o1L|Kw!6lRSEc`L4EAm#e~PNZ^{r3WghEH%cLgTL
zAq_oCro*rGURLC#+ySZGd)i9{C^8Cl33BfJH*Q8mkXoM22co9*JMUG8`?S?%x)$lr
zKLis*BY!}n#vPUiLnIoyi+L^P%MN;ZoIi73t9k0>6&u{`_DS6sZ4G7*o#io8!#7b3
zEOM(DY}2e~Fsy2P@fgV8Q&m5kH*p-8wU#H!Nv(9OomD+EADck-<B&!8!692(#6fh!
zPYo6CKJ+s4IWFk5$j&BUzRdjW*#;g-p21o6_v(l(W&Mo1lS=jXC}D}Dzavg>-TMvJ
zXjXr&f4DF?x^i=DG>+}w3t_J5WN8tc3#tzXR4`{Ch4~2DFs8lS!fvAZzSD$0S3@@`
zpS-T0d0}E+GvC#;@rVV(E29meb!--cYB%!1qMvmZOtW~Sd6amMxs`fYZRyBv<co?H
zT6zkH=Zk<(*4<;KytM(-RaJ#AjXvh)0I4uFP7i%+*)%*TOsXl@LtelG0SL4^Tf&aM
z;U_!FXFr_&EnhZBG`Fx&rm`MuT9|~Jgw0H+ppQzw$gjX}!S_LH_ppbQ{R7Y+h0%B9
zeQID^xRrMgjBV2RLfE|XwiMe+t}`4b==D^GMj*((8n#<%vazTa#i_yV*~Eug5%+cE
zP{PjQ&yQsv>6SIWD@zRy4p#bkG8w8$PBIAY*rn>K*3i_VpSP(UC%QMY#j^u|4?5@i
zqO+P@ceqpGUR7vIZTrMR$&{|#8ljDKOsk!1QsJVWIDhW5!fx=5_Or7}A4z8{yV`N=
zmzKgpw^+QSK0d_I{OG7L>?N~V&HY5rq_YhyDATJ}u`!Fg304X&-a6DFGc>yW)aG})
zB3i301ptf-2{cG3Jq!xe6!z>K1kF!|>d#&RM>1JQj6JkkNnUClJnjYpXjAY(zZAPG
zHI%7FUNfz_(_Al&BZ42U#IMjzYeRw$UW^r5^qJ>CVpXLpm?b+WXSC6^f|j3uAi_#Q
z0*+C)`HRy5UGDuI*zEc=pUw7Tsx~p<DaxZ&I28MQ7T<Zl3WLO1_dLSLc0bhz!zotE
zqSJ!W+(r!7uh3K<38qP|^|kO)Lycc(X!Wq)vkUq6JscYKr&jfQB}oTvYAV%9_V(pF
zi5AA8eAyr7y?=k2J2+V;{SCqwWXzyCMtDx9+N3(k4H89X@#?euIptE4hYoTJ6RK38
zHug>BbA{>b=or2{4p0J28UWSo7m&O$#*+DFk0LH1F;;7rY$E9B<TShSQ}`nSC_5qT
zmT|lFOY6)ZJX48)XPhxLIjP`v5C8ttv!sQCvoqwFl$3la)1_uuwvR%Bp`oFpGE;9;
zxkl3YE9Xj!`SqOp;d*-B%i0CoQVn0b(pGt?NQMA`^<zH~)^D@PZ_Y$M&sfHpgr~6z
zn$C_R;n_+8s1EHF8ZIt<_~fE#{U1;hH(9p}(%jt*Q}4{Ta(Cx_1Y{)*hmzPEPJKRF
z0n^iXx!<?94{b1#5yjQS!qDt-Am8(I6~CaJencEdYZju#U81_DeI4<WjmZ=VK*<*%
zK9&y+rJXf|C9$J@*Q~6gc^MfaDxnu+V~SQ?=0V!~%*~gHwsp9}!RVsOm2<_ZoyA4x
zm26w!+Ts@>N<dU_!`w-yDV0v(C)T)r?dQJ37dgfmi~!&zMLPw(ND?LC=Sge6E)dh$
zz{ZCAmq~drr%ERLBcbK?gg_tL15w*;M_d?{nvejmUw^Q1LciA5FjC;rhvg&qNNHTG
z&zY;M#g#6{n1*EzqqkRJ!v-lS=i%PiLbH*~fYTY1J=69;av+aV0#)ELSPe~r&EOYX
z;_IJh;Jo{Nk5>}|k=bPl;Ozs6Q7t>w$+|*^jwgRvEfs*8AtJS!HcioqNlDp8gI$DF
zvV5O@MKDI)DCVY0+zP`p_{yV!8PU78va8G<-5P-JYM2_RR=E51>(_}AtE4D2o4GF8
zj|iqDQhUXm&91WswR8NG0UnQ3Se9<J6?PVQrtcQ!qtZV!K-@1}H>Z~Z2cgNZ0-pbr
zZv7!{_jA7IouwvC2zMlE5#(K?<cJB-2?au{2<I2$sPXI$K0FesG12cu;Kmy5NR-t_
z4s3k`SR6xK0n#NOy%}UsUukZ+l-3fz?D#{oEzq+4MY8hn){FNnf>(>@mwGM^EZfYl
z+0X!>mj|?qy1Q~*WH*~8fQ-%AU@-IpZ!j7@;xYHAl^1CC$mN;#p}hOAd2~{oc5aNH
zERW8>%ePQW&P%n=#Pg$YAzu%j)V74}xHHg!IRGJa9JgSUbd4Tj!O9koSPL2_*hyEK
z2>jU>02dSism^eF-L+<_YQb0U_vU3dqd|W>aLdo2)wXq75d(`P`$Zj(lPE-M=ZHd<
z@g5VTJSd=$7C5b&KF}q15FVcE)cM<f0CvM`Dkvm=vFDsvX)MkGDorm@ChO=$g8wYZ
z5iqNZRP|OV|E~#e_|&$bTl{TBfGU7_hQuEXr+(9hcVES(GeW#8tiF}8h+0YV*pFX0
zkZ3jicHPij<nAR{Rc)@fru(A0ec><^?J;d!sFf2Y5b-*Z-NFpAsV^xblkEyIR&;Yq
zR4hC{5)>lfz)}zqff-0-Z>J?f7nAItA>W$VU-|eAQd{n5J*y~fHg?qd`iar-fQ9Bh
zZE~m2%QHn{RI^2uNpzqB?6YcXMGFf#c}*ZmMwMhtCi3*1PR}5Cb2orxg?w|?F2J!Q
z+t>4v<z5H+0N#@iA0)q0%cUnI;Nc54NA>nyD7d=^Dbja^(?{*dA_SA3sU>1SY(@P#
zZC5xlw^4x|{u=#t|Ax+AyK4zB__F^oyxiw?D`r;9BO*{L6g8wj>s`>wI^NJ{nhmI!
zV@z^4Oe-=GN~NTv-18`xj?dNZa=R{$@6-foFS$LVk$p^Jj{#+7X1Wz?2O{tw(Q9te
z{cfJwXQ-xINU{Qw`r>EKMgdb+YZLXCe|6Pd{z2FQmiIkgXd>H}#3W6>vTyw6n3`K5
ziZ&>H&1dj05CJOrWBYb=fOr{S7;e45JpKHc&D_+qvMS|C6!FjW3=9BXWOa4Mwd=FJ
zy%-!x^p*3^b;Ryxc^X<;gxTL;?HLIZ{&8Cip(5aICK3Pdl4qpdIMC$y9UAzcTaP6<
z?)mwjwN`@(L~s-SNpnRr3wWo7rI?#!r*yqzEzQh!P~46?r`82Zt-UW-y3A(Eu}=?b
z+}FKcg*Le{gxB1*B`C4j5D!9!t+cs1jBo;M_mS*h@sr^w4t7pXslH^~U-=(ds}RZ}
zRf7U8WBd4-nWv(zVU$BKzyi9pjdue?yiPS0>8LL2QqD6`&;RZc-_g+zWoZ0MCH{Kg
z?Sjd5!!7O)S)E!b^y7d%a#dDo>!$a)!Oq9=Fal-tk7VG22}G*5lR+v?Yy+6OY2-uL
z%IPTDQ^4ROgD+D{ZP{5S2eR)wEbyT~OHB$aPW$^8mw%3_cu@lTF4ASDbC5Y~g|T6}
z-SapJhq9%D=*Vgc<EOwMIS>yVYmxJqpfJ<&{2s!6QD=o(1P0EuJdV~Ez&@M_mG-6J
zs%V;ZL~5LNT8wGnv;^Rd0RR>9YLUthuw!&(s5WYv#*3gjFh5W5pV+|zaFpgE3W)3R
zjRt6C_&b)o`}oI8?+cvCxrnaHijruP(&rP-E&|J)kONHps23h#lTNvRl>74+hfYIW
zWVnyF@g6-w&wAeJ)*T|y0YW&#pAQ%o=|{lHQx7LyX1@UYjy%l1rk&^FGp_v~Q5g+1
zV{NLxQBT0d^Zv)6V>&6~qhNp$gYqkZowZBl`{4CX94??sp_k{P7K-(8GC(&#!br)_
z9h!Bux)R^90j9;dMP$-fj~m@54ZxevKEk^Wm9RF}G@0DW<3Bd{nH25VFCuo2zvc7q
z#q-oRV#trf8diisr{7kqTO5<hE#$*#cl;Z7ImPkyLAtJ4I?Qlhx(m+C3@&MQdyY&Z
z@G9N2tJl%MuCUYgxV?)&lg0cg^71=WNa$HpQ`4vWyQ8Tc6Tyb0eq$v5;|@VnKsRDx
zVy-D?dG_pc$pHgsmUm#vDuVd0m~c^(Kb@T-{Ro1g+cq+4Fxws$j`=nt;bhzDb6;sP
zB!JItU%SM!|2R!+LUW@O0#yogm_PdOckE-2)zuMzztzfW0LV^X>$0%wDsckNU1(D(
z$b^HhVqy$;r*&m=hNLz)oNSr0`&$xREYR7jsuQ&)gFr(F4h{)Mo3%)rzWH0zQJ8>U
z`i#)g*@;jJb|^i4o$hDN;Vf@yIYnDHW?Z3+HGd`mcyVd#va^+cQNDV`mCgRJrD(pS
zO1btmCsnp*_0vZ_-#B(=`QFZ&5SFutn2_UQiSVbD_uV!v6Do=qiLZa24!yIpqoi(z
zts$W8=?R<SW`1#a{un=)q%GuUXP=1+jNRkF@>&9n3joNR-qhJX9bWT2L;xubyOwx#
zhN9+|m4&RXt(hJz-Z``*PdP00THAlzaO3p3jVi)vzsfLILU`J@u)aQsZsz8QGAV9M
zj5@cVW4;T@c&&GClQms;Tn@?Rf;4e9Zeb*|X+P{24D9)P^NJwE%6@H+gNA|$#DLFB
z2luHD4{@9LO^1C^ag%eyyc>K@P6n*SB4%iFeCN>j>p>LCKZ45?1{&AlP{^YZLe8nn
zWBiT%8|x7MhLl3gaWqs*4bh<!$w~&^Bi~*|3mp{|a~6ogR(vi%ICFUib!2schE@K!
z6NG#a{HxR9@Ni@$ehk{7KkhN1C?I$Ux6=wTs2JDhYOLYMlYp}V7$Df;@$^LtOyjoT
zPzr>1?s(COIh~CDu?J8vpiqU<Meo=+_B`;slo`jnFb*C=L4bqw^f#eu%Nb&1!4)zi
z-E)ACe2IZz;|09#I@Yf&^XNB|+q6KRrjG&3&Dv*`6TJ&4dwl8BcC3%;?{_F5cIdUD
zzrCDvHH_eOLk1L(2C#QC*ma<7ipvLKz4Vg-bn~dQ=>Z5+YAbGSTsueMygvd4IvOSg
zl10MpcB@p{78WhG?ezcMWUTrlB9oY`&8h)NpYfQoo4AG(rP9uW3U8bYdTjK?qx}jp
zxKz%3#9;ilzi{{gI{cl~6!Ar9F>t%73r_+14-q~K6-Aq9R2VZ_Eow>U=jXH8ELuv+
z$$<;J9`0c)^+eGD9v&`If0I0lK5_%Hoa6=s$jv8ToJXSj1c6>u3||};r)Op|R(|}+
z&t^9It<qs<u3jdum|xh+>Oe``Q<{?~`ll#V*ovPC!!X9<WVR#zS9maX^$$FN$A?jS
zfy*Z98~CTEF3<~b-CrZdb#;5&bjP~1)Fyp@UpkUtjm|(K2omnw+c(*45F8$E_qnYs
z2!yZ7PYuqIZU|?<2@A@OLjFT=AA}$GXDYwdhw151Mtos|3>%OT-zrK3scUQ%P*~<C
z4H!F4XIWWUHT9i1I#MS;!h+;gw3U~>zvNQ0I`V7eEed-oV7j0E**704q5wFZ8&K^J
zwSUR5&B8K*g=@k{OhhF9@}=4M@UW=`E8xD)osDoWAFHv6Q#|hNHGdhOl(ZHT2)_iM
z*J;aF0vK7%%6=c7c2bi_tBEiP*Xzj3thViut4gmsDsDlqZFx!~<MWLF(_`8jY88dq
zs$WY=Ldu5vI5P)$U<474*#9*?^bf1PZUVl~ikP^=^jLb;u}pEKjootzlDk5OEe>|}
zv368JS<Xcl#*RgFW7J3)_t1sO;cAmGYaX2T9ev?fkKEx8@_jsrtFEH*HXtA%kgfK~
ztr-AKrI_S$$hR1EtxVawks2RgnCa)VSJ+w|yN6V|YwCPf;IU{4$9Iy=ITMZhhw$mR
z@G)_5p#j@hP9_}vBC1tX)YOIAbCaelHOhoAynq`|su_{FC4WHsU6NOjwY_tx#X*VA
zf+YHrh(a3&y}&(9KeX~8$n8iDn2PNd(Sqfp;UfAoN?Jf=l01)NwD(F)s+oC!#CTBG
zlgNIedz3hjy9_wZYAveQj}peExk*U6`Rlbf;}gXfFt?jjXC0QRqxzs(Q0vG<KRs3<
zwFGh-I8AA;vKe-%C1zM90G@rke1$N40<58|g6p8s6rMfy>?0KU7+~^`gx6mb^`Uu6
zJopF?9G7zUQ3&A{j0>E~K8Jc0`lO0a6QCFzNYF+4JwZYGPXq8G13(O1(Jbz-R@Mt>
z($L2?0i6w4DfRL1$ToFgM4g_x_CfOrI3zO$fS0Iz_Y>S2rUzsS@y9MC13n*rhQBN%
z3B`0_{aduZ0ovd2sb)H0RV)m=&Wqi@4KsDQNg|Mc_oNuN9btY9YrlZ*_#2`EItEb!
zlpOpa)c+5*cr?NAcI|clhkU5m^Y~x9CaH@8^7;`qEobNNmT#X%wTSk*U*uoDk>T&}
zO-4tvZFsA)iYz%d)(yuzFYcmb1@G8s4`si(vB2mrhW)lPe;PkA`fKT7DM?AQlXV0D
zAc(GY;eK$v5-_(p`e8Ajz>;O!8v~}{;Ys`zzQ%WxUr{l}BxghP8_D2uOHS|G#U%W4
z<I^DcfuK|0BMvx$NOV{K`1qfoj^kX7%@Q>J>vR%vaq*oT3ktx<F-1K%k0j!?2K*qj
zv^}zRhvfKa4=<~Z7&E>j*<?&ZY%=5GRGep`pH>12rXB<++V6UWN!wfR(w;SCuA&if
z*}mf_P5;Q|=#X&0^E0k~#&Xii>tAk=1&6L~K%sZhm|L;;)UL0;%B`%e<;vx13a&n3
z9B%}?E}Nc*9)7Dq;ZuEqTz_Z*sFq+Tg32ar51iA41k=#43Y^G@h|Is{$JwX<%@bm!
zukY9Az%T6lP5o;t^XL4Vtw_Tcf(C;_i4;!^=OdJd0SHA`$9kuL>wiR#_rFBXPIEku
zehn1V3*X1z0w^gc$RQJb-g9*luj?gR<R_N_7$A952`dLSO@YMJfApmf=0_VlFc>L2
zo15Spw9RJ{x$TN{-s=4Hk675aHrbtFX{2G<|3g6SR18gy1R{&a#7{*Md;CTx`z`jy
zSUL*<6J~2yg?{GvoU;qP<ByH+PvQ%N|HneHNj|8kkZ>#Wx}Q(5nUCiysZ@vQ8?2dL
z{wC8KGiwK>Y$01WoxQcg9cN$!vq!$RTYbsqWa$gyaNKq6NLxfigDjFqLjC~%WrQC_
z&6YEW^$+)H2*N|JbQ+6s_>UHwBCk(-e9{{T;sARFdlLcS_%Iao3R#~Rt)C0wal3+$
zp~E`KXGdw9e$CUQA`W7q>R@BX4$oE*_y-Wv=Y(>O4S9Q^9%Cw&Hl0@=>=a!=Y)EyY
z?rtHa!8;q!m5zHH=9ZS3eatOte3_mq!>0;!XzdIRR@pzROFIaM65!^#)A=8Hcv%qU
zwqp#?#b!NUs5izB18`HRb*$sWO;Xrpo68)A(-pVp!PLFa8!uN29S*8ghlevh?X;t+
zjwQ0|2Gg$Wv=s=F<%ZxgHL7Y{q6*&Pu2MI+Fsl6sSk%-dA;mz0K>7YMdz6=?@CaJA
zKW9!0b??LkVmmDwIjrcM%r`<Xk(Cq`rGijZN9rAu)}==1=ul=)j+1%es(-gYQlP6P
zs;yr78B!N9I(Xd>l$r(^5x-I;Nc{7&3r#{kTfHU&iNXN^Sv%SBI@CII*~quN6t~7>
z<Ku=`<rS5cSgV{#yU(1~Anq!DEdIsmqWzza?M0LR49FH#P!KW7xwQYKlyIwJa*8vQ
zGekr8?XSbjoE+mBJRRtvBesyp$lQ@EiC(U~+lKk74Fme-@^VRyoynNxYXT>~K^)-Z
z+17WX2;5(wsm{R3N4^&#MflCf`B2imJCWA+%$iD(V8LlFyd2@yzQQE!t|mchNp$IE
z(c|Sq8&YX#{CA^ZaR;3G)zF<)$4wR08|pL<sy!~Z%D~P3Z<M?CUZQ5CHygWJ3RGY7
zD}YdDEjgn0214&8pz~)+8XL7VZ+^Go5g?X2PcL*7@>TIQm4Ke@!z(EhVLIQ1=smC^
zHOSr^`z#5}%+E{SU2j|-Gej%1dq<v-vz$#1*&gr<HvgKeU6XUGsj5=Y)NELQ;k-x2
zXhxTh)OolXk_(%P(G~W**+dg8+ZbH%x^+nr>&9tm5j8W*uU+Y`;gy+I+eQO7*meE^
zeLk)4VHnyX^atPX>u*JJY;UZo#@$>e+F32!FQOs|tJ2S09oPUNz%KWhJI{%9uSek%
zj0V#vDe#p<2h|tt#IFP!ociV;y$GEJ|NG5e28M{nFz*y|_VeSP?lYnr_M*H``uA3x
z^!u|E|Aes_aK4PkI61esZ)l9$&yne9YfDR<(y&Sad;`mYXib6=1%s|s+;(z2iF8g~
zN8)n7!C`4Z_pVw6A(<x;PKwqB5>Xlc#vSHHN{kA>a(q_1<6=2rphRraVPHX=bwWcy
ztO<ets``z5ql~>Y{gkxA&9WFy`83aTx}~p4!a@Fz#|~gAk;hYuV$Tr=Ku@Y2+8NNL
zhufr2DDnULT*OI%5U0J4K;>xcQhU%Pvf*NWKDL#&YhmPAfRT|A`>6ARIB5>!GR--x
zYp-K*k`nM3qP|4@N5KAm{85mlgmm%0Q(0yQRP9u^{1Zfh0{KM(Bf9(rVti80P|&4-
zb>8$g@DI`iHb0sNFv6dlRHo>EfCrFKA_Mpj*jcgu0Vm6kKMK;!A{_R(V}PsG0*35L
z|C`PK*UGtnC4Gl+ylhQN>&#kSGFm2TnzJ;q$lKJknc^ig7^!4AFJa;&!vxbj4J$|5
zQp{9P^OBZYDl8C<ZeE6lN`jY=yn!tg#Tp_a`%-6T|G>V#e$Mxt&pDsZd0wCA{k)&&
zsnA>2t}^uTF^otDIwai**94&4k2+{UVi~OXhK;@b&FTPXp%%W^UnV&7(2Z^0fc&jZ
zu$_RA`Ynj0at~@O1aP-3@jz7of&Q8npl8thhgH4BIW`bTfw;FA_Wpf5<)m@H(#}Y~
z9<g9PI~3vzfb)Vd1wLR?V^GFqZv80dV1h$K55On{qX;0p81{DaUjIR-5A1DZ`9UC^
z(~wjitoaqB$ZLtJ$S;G4R@J&%p?m>N-Qa}g8=Q)QGpB6FPWG`&^HkMhs&Yl~6jd{d
z(bx+Ki{XIiRVB}I%JUn7r%W7K2XZG|ooiDB-gQwRk@N%xY5cE2R4{?ys+MV1?`WsL
zEOMV3;CK37oS*$eR5LPi_*`Nld2sGOPfFZC<$-v>n@DnvKnOl9bfPQc7HNlKU)x=Z
zNY4JLhwG8ysFud!c+3<tV3N$AkHn`>NR~PrU0iaE>{rV7XAuX0MyG<X$&4|V=QNu2
z{rdXSx(B%cTm~%R3-qk7Wmike3JYe*<g+m`-vPbx6>HZm^C)gAgN`j@gFxB7bS3Mz
zd+q7X(=`<ndD07hem14JF)$<JHS~5IhkSl~(+l?40_~C%LF#ps)D=mly0`AJvLbfm
z?yHo`(_d<)^n_axPcJvOWDa?2X_}11UaJpiH~rqum2~Fp+4zZ<2f&kaXm72g8r=mZ
zZUzOANU$v%S0!z&&K-b8tKp0sZ@ADM#-cyGr82jkNl-`s{PT#(JV>%INu!^YrXDS$
z4T5D(Dh0P-eG%p&y#Ny!Sl!~>_Q1gr3q+}YpiXgA+7ZnqZbVY-0;Jk}Ex3D#xr-fY
z+~4mYk5PMQe)2`7)56kj0a8qD5Y@mzXeAy!@@Q2fVBBBdkJOP4F3B<k*SSa~<6Rau
zy6NxjuQV+39f|7mz9bNQ;en~Igb8zbIB{JukbXJi9^ZJu$LdIT$K<%$*n}8OD4pn5
z(0OCqMc*098tx~#aV69AxaE-TaaZU~MUg`mUMIngFA#8)a({b9uwL4R^Fk)9DThRh
zq=v6cWlHYD+*0TFZ`yIky4nE=jO#eogm@@0UzO1E38T0w|AvWl7k@>SS70BH$Ez(b
z=RA)>XHR~^vS#D{BItJOKRIz)zp1=DWT#{7qp;|3%<!*~NTzp>B(coWpYrYV=2G%L
zl;0?Q7EBonc4d4$78Dee5@C8AyjI+Sh7Q$5!&OQN6^6=ao2<-!k(Qq9`$6MjWSW*d
z%u<S5lk3ild){B@1Rd<A>4k6BEK>Tg)XoBX#tj^*SXK)hnZ?n~HBC)!$*bAI61-xr
z#?XSe9JtHNUvAr5QBi;9mvNZ0!t$PpTN?QMTZCQhyz!pzES^}iJ=|2IqSbvuoGACD
zB-`nt)MVk~tLYQL`;qgULcZlG&y}e)jp`o*TGG7V3bqf|+KCyA^y_dv?7Mi#<ztx{
zUJ!Hh$Q2R1YiVF$_KC0M^*POa=sFPSWUd(=ni^%SyTZkB8(Md0PmzH8q`bTHU+>KB
z+g_*-dZ|!~F0ySItK6e6dPc=Jq!_~4W?L$(f$0o+E#P6do<*kXG=3!BV%p+df~WhZ
zH_dYY+pP+<Ok@kqj)UW?h8x?Ltw`Hb>JW#p%p^2&26%DY)@Rs}1MhldY~Hk!aICRq
z@0-9sb3{vq?2?j%g*QLsJTb!D;)D0u5&j<5l!W&cS@6FP$KXP@pHmy0zytNW0hKHx
zfE7s;%2v8CQR8v%A<s3e*n8}Qp1JPJ@W+REU@H5atj`R?A`=Zv!F<b^55GT1<-VRd
z3<n^L$SlSbnuFJu;@Ny?NPZIM)o_ZQZpSz8paGYCw>>k_Surs?B~GmtxlmZ_yha~-
z5A6yjf5{&Lh5DJ1)QNig54Ok0Qf>-%uS#04NIaYNMkH^Oc^23=&umEhu^mCgciBU~
z(impmG2=Kxp~FKPEh?QOd*}9u-=-K@Tw^GV*J3$ceJJcz%_Z5T!{j9G-34(J9}i85
z`|u-C`Cn><Ypx`L0g7~n_)x!4WcN*gN)yB#AnltTP?3%T(pb`tFu(|?UEejK^&TX4
y^+$b4)&Ad~Ghuzo((ccm#m_?HGZ?!dQsXf<PKzo1Ev{}2@cH=!9%=A8o%IirKE_Z0


From 084235d46a88bb15f48c26a23755438aa867793a Mon Sep 17 00:00:00 2001
From: Terry Kong <terrycurtiskong@gmail.com>
Date: Tue, 22 Jul 2025 16:47:19 -0700
Subject: [PATCH 58/59] ci: add a job that checks if submodules are fast
 forwarded (#695)

Signed-off-by: Terry Kong <terryk@nvidia.com>
Signed-off-by: Jialei Chen <jialeic@google.com>
---
 .github/workflows/_submodule_check.yml | 253 +++++++++++++++++++++++++
 .github/workflows/cicd-main.yml        |  11 ++
 2 files changed, 264 insertions(+)
 create mode 100644 .github/workflows/_submodule_check.yml

diff --git a/.github/workflows/_submodule_check.yml b/.github/workflows/_submodule_check.yml
new file mode 100644
index 0000000000..6930432c2c
--- /dev/null
+++ b/.github/workflows/_submodule_check.yml
@@ -0,0 +1,253 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: "Submodule Fast-Forward Check"
+
+on:
+  workflow_call:
+    inputs:
+      base_ref:
+        required: true
+        type: string
+        description: "Target branch to check against"
+      head_ref:
+        required: true
+        type: string
+        description: "Feature branch name"
+      pr_number:
+        required: true
+        type: string
+        description: "Pull request number"
+      head_sha:
+        required: true
+        type: string
+        description: "Head commit SHA of the feature branch"
+
+jobs:
+  check:
+    name: Check submodule fast-forward
+    runs-on: ubuntu-latest
+    outputs:
+      failed: ${{ steps.check.outputs.failed }}
+      changed: ${{ steps.check.outputs.changed }}
+      comment_body: ${{ steps.check.outputs.comment_body }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: 'recursive'
+      
+      - name: Fetch target branch reference
+        run: |
+          git fetch origin ${{ inputs.base_ref }}
+      
+      - name: Check submodule fast-forward status
+        id: check
+        shell: bash -x -e {0}
+        run: |
+          echo "Checking submodules are fast-forwarded..."
+          
+          # Get current submodule status
+          echo "Current submodule status:"
+          git submodule status
+          
+          failed=0
+          changed=0
+          success_body=""
+          failed_body=""
+          
+          # Process each submodule from git submodule status
+          while read -r line; do
+            # Extract commit and path from: " <commit> <path> (<branch_info>)"
+            current_commit=$(echo "$line" | awk '{print $1}' | sed 's/^[+-]//')
+            submodule_path=$(echo "$line" | awk '{print $2}')
+            
+            if [[ -z "$current_commit" ]] || [[ -z "$submodule_path" ]]; then
+              continue
+            fi
+            
+            submodule_name=$(basename "$submodule_path")
+            echo ""
+            echo "Checking $submodule_name at $submodule_path"
+            echo "Current commit: $current_commit"
+            
+            # Get target branch commit for this submodule
+            target_commit=$(git ls-tree origin/${{ inputs.base_ref }} "$submodule_path" | awk '{print $3}')
+            
+            if [[ -z "$target_commit" ]]; then
+              echo "❌ Could not find $submodule_name in ${{ inputs.base_ref }} branch"
+              failed=1
+              continue
+            fi
+            
+            echo "Target commit:  $target_commit"
+            
+            # Analyze the relationship between target and current commits
+            cd "$submodule_path"
+            
+            # Check if this is a shallow repository and unshallow if needed
+            if git rev-parse --is-shallow-repository >/dev/null 2>&1 && [ "$(git rev-parse --is-shallow-repository)" = "true" ]; then
+              echo "📦 $submodule_name: Detected shallow clone, fetching full history..."
+              git fetch --unshallow >/dev/null 2>&1 || {
+                echo "⚠️  Warning: Failed to unshallow repository. Ancestry checks may be limited."
+              }
+            fi
+            
+            # Get GitHub repository URL for comment
+            remote_url=$(git remote get-url origin 2>/dev/null || echo "")
+            if [[ "$remote_url" == *.git ]]; then
+              github_repo="${remote_url%.git}"
+            else
+              github_repo="$remote_url"
+            fi
+            
+            # Case 1: Same commit
+            if [[ "$current_commit" = "$target_commit" ]]; then
+              echo "✅ $submodule_name: PR branch matches ${{ inputs.base_ref }} branch (same commit)"
+              # No change, so don't add to changed count or comment
+              
+            # Case 2: Check if target commit is an ancestor of current commit (current is fast-forward)
+            elif git merge-base --is-ancestor "$target_commit" "$current_commit" 2>/dev/null; then
+              echo "✅ $submodule_name: PR branch is ahead of ${{ inputs.base_ref }} branch (fast-forward)"
+              echo "📊 Commits added in PR #${{ inputs.pr_number }} (${{ inputs.head_ref }} branch):"
+              git log --oneline --graph "$target_commit".."$current_commit" 2>/dev/null || echo "   (Unable to show progression - possibly shallow clone)"
+              changed=1
+              success_body+="$submodule_name: ✅ PR branch is ahead of ${{ inputs.base_ref }} branch (fast-forward)"$'\n'
+              
+            # Case 3: Check if current commit is an ancestor of target commit (current is behind)
+            elif git merge-base --is-ancestor "$current_commit" "$target_commit" 2>/dev/null; then
+              echo "❌ $submodule_name: PR branch is BEHIND ${{ inputs.base_ref }} branch"
+              echo "   Submodule needs to be updated to include recent changes from ${{ inputs.base_ref }}"
+              echo "📊 Missing commits from ${{ inputs.base_ref }} that should be included:"
+              git log --oneline --graph "$current_commit".."$target_commit" 2>/dev/null || echo "   (Unable to show missing commits)"
+              failed=1
+              changed=1
+              if [[ -n "$github_repo" && "$github_repo" == https://github.com/* ]]; then
+                failed_body+="$submodule_name: ❌ PR branch is BEHIND ${{ inputs.base_ref }} branch"$'\n'
+                failed_body+="   TARGET (${{ inputs.base_ref }} branch): $github_repo/commits/$target_commit/"$'\n'
+                failed_body+="   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $github_repo/commits/$current_commit/"$'\n\n'
+              fi
+              
+            else
+              # Case 4: Commits have diverged or have no common ancestor
+              common_ancestor=$(git merge-base "$target_commit" "$current_commit" 2>/dev/null)
+              
+              if [ -n "$common_ancestor" ]; then
+                echo "❌ $submodule_name: Commits have DIVERGED from a common ancestor"
+                echo "   This indicates parallel development - manual merge may be required"
+                echo ""
+                echo "📊 Divergence analysis:"
+                echo "   Common ancestor: $common_ancestor"
+                git log --oneline -1 "$common_ancestor" 2>/dev/null || echo "   (Unable to show common ancestor)"
+                echo ""
+                echo "   For detailed commit history inspection:"
+                failed=1
+                changed=1
+                if [[ -n "$github_repo" && "$github_repo" == https://github.com/* ]]; then
+                  echo "   TARGET (${{ inputs.base_ref }} branch):  $github_repo/commits/$target_commit/"
+                  echo "   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $github_repo/commits/$current_commit/"
+                  failed_body+="$submodule_name: ❌ Commits have DIVERGED from a common ancestor"$'\n'
+                  failed_body+="   TARGET (${{ inputs.base_ref }} branch): $github_repo/commits/$target_commit/"$'\n'
+                  failed_body+="   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $github_repo/commits/$current_commit/"$'\n\n'
+                else
+                  echo "   Repository: $github_repo (unable to generate GitHub URLs)"
+                  echo "   TARGET (${{ inputs.base_ref }} branch): $target_commit"
+                  echo "   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $current_commit"
+                  failed_body+="$submodule_name: ❌ Commits have DIVERGED from a common ancestor"$'\n'
+                  failed_body+="   TARGET (${{ inputs.base_ref }} branch): $target_commit"$'\n'
+                  failed_body+="   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $current_commit"$'\n\n'
+                fi
+              else
+                echo "❌ $submodule_name: Commits have NO COMMON ANCESTOR"
+                echo "   This indicates commits are from completely different repositories or history"
+                echo ""
+                echo "📊 For detailed commit inspection:"
+                failed=1
+                changed=1
+                if [[ -n "$github_repo" && "$github_repo" == https://github.com/* ]]; then
+                  echo "   TARGET (${{ inputs.base_ref }} branch):  $github_repo/commits/$target_commit/"
+                  echo "   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $github_repo/commits/$current_commit/"
+                  failed_body+="$submodule_name: ❌ Commits have NO COMMON ANCESTOR"$'\n'
+                  failed_body+="   TARGET (${{ inputs.base_ref }} branch): $github_repo/commits/$target_commit/"$'\n'
+                  failed_body+="   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $github_repo/commits/$current_commit/"$'\n\n'
+                else
+                  echo "   Repository: $github_repo (unable to generate GitHub URLs)"
+                  echo "   TARGET (${{ inputs.base_ref }} branch): $target_commit"
+                  echo "   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $current_commit"
+                  failed_body+="$submodule_name: ❌ Commits have NO COMMON ANCESTOR"$'\n'
+                  failed_body+="   TARGET (${{ inputs.base_ref }} branch): $target_commit"$'\n'
+                  failed_body+="   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $current_commit"$'\n\n'
+                fi
+              fi
+            fi
+            cd "$GITHUB_WORKSPACE"
+            
+          done < <(git submodule status)
+          
+          # Set outputs
+          echo "failed=$failed" >> $GITHUB_OUTPUT
+          echo "changed=$changed" >> $GITHUB_OUTPUT
+          if [[ $changed -eq 1 ]]; then
+            comment_body=""
+            if [[ -n "$success_body" ]]; then
+              comment_body+="### ✅ Submodules that are properly updated:"$'\n'
+              comment_body+="$success_body"$'\n'
+            fi
+            if [[ -n "$failed_body" ]]; then
+              comment_body+="### ❌ Submodules that need attention:"$'\n'
+              comment_body+="$failed_body"
+            fi
+            echo "comment_body<<EOF" >> $GITHUB_OUTPUT
+            echo "$comment_body" >> $GITHUB_OUTPUT
+            echo "EOF" >> $GITHUB_OUTPUT
+          fi
+          
+          if [[ $failed -eq 1 ]]; then
+            echo ""
+            echo "❌ One or more submodules are not fast-forwarded"
+            echo "Please ensure submodule commits are fast-forwards of the ${{ inputs.base_ref }} branch"
+            exit 1
+          fi
+          
+          echo ""
+          echo "✅ All submodules are properly fast-forwarded"
+
+  comment:
+    name: Comment on PR
+    needs: [check]
+    runs-on: ubuntu-latest
+    if: always() && needs.check.outputs.changed == '1'
+    steps:
+      - name: Comment on PR
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const failed = '${{ needs.check.outputs.failed }}' === '1';
+            const title = failed ? 
+              '## ❌ Submodule Fast-Forward Check Failed' : 
+              '## ✅ Submodule Fast-Forward Check Results';
+            
+            const commentBody = `${title}
+            
+            **Check based on commit:** ${{ inputs.head_sha }} (PR #${{ inputs.pr_number }} from \`${{ inputs.head_ref }}\`)
+            
+            ${{ needs.check.outputs.comment_body }}
+            ${failed ? 'Please ensure all submodule commits are fast-forwards of the ${{ inputs.base_ref }} branch before merging.' : 'All submodule changes look good! ✨'}`;
+            
+            await github.rest.issues.createComment({
+              issue_number: ${{ inputs.pr_number }},
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: commentBody
+            }); 
\ No newline at end of file
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 06a2022bbe..e9e504aa7f 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -103,6 +103,17 @@ jobs:
 
           echo "test_level=$TEST_LEVEL" | tee -a "$GITHUB_OUTPUT"
 
+  submodule-check:
+    name: Check submodule fast-forward
+    needs: [pre-flight]
+    if: github.event_name == 'pull_request'
+    uses: ./.github/workflows/_submodule_check.yml
+    with:
+      base_ref: ${{ github.base_ref }}
+      head_ref: ${{ github.head_ref }}
+      pr_number: ${{ github.event.number }}
+      head_sha: ${{ github.event.pull_request.head.sha }}
+
   lint-check:
     name: Lint check
     needs: [pre-flight]

From 1b972cec09c7c46af7ae7cd6ea1fd0ca0dc5cbc4 Mon Sep 17 00:00:00 2001
From: Jialei Chen <jialeic@google.com>
Date: Wed, 23 Jul 2025 21:01:11 +0000
Subject: [PATCH 59/59] add uv.lock file

Signed-off-by: Jialei Chen <jialeic@google.com>
---
 nemo_rl/experience/rollouts.py |  35 ++-
 uv.lock                        | 554 +++++++++++++++++++++++++++++++++
 2 files changed, 578 insertions(+), 11 deletions(-)

diff --git a/nemo_rl/experience/rollouts.py b/nemo_rl/experience/rollouts.py
index d68a5437b9..5aec9ff282 100644
--- a/nemo_rl/experience/rollouts.py
+++ b/nemo_rl/experience/rollouts.py
@@ -19,7 +19,6 @@
 import copy
 from typing import Any
 
-from datetime import datetime
 import ray
 import torch
 from transformers import PreTrainedTokenizerBase
@@ -353,8 +352,10 @@ def run_multi_turn_rollout(
         if len(active_indices) == 0:
             break
 
-        if max_rollout_turns > 0:
-            print(f"▶ ▶ ▶ Running rollout turn {turn + 1} / {max_rollout_turns} with {len(active_indices)} active samples...")
+        if max_rollout_turns > 1:
+            print(
+                f"▶ ▶ ▶ Running rollout turn {turn + 1} / {max_rollout_turns} with {len(active_indices)} active samples..."
+            )
 
         active_samples_per_turn.append(len(active_indices))
 
@@ -415,9 +416,13 @@ def run_multi_turn_rollout(
                     [{"role": env_role, "content": env_obs_content.strip()}],
                     tokenize=False,
                 ).removeprefix("<|begin_of_text|>")
-                tokenized_obs = tokenizer(formatted_obs, return_tensors="pt", add_special_tokens=False).input_ids[0]
+                tokenized_obs = tokenizer(
+                    formatted_obs, return_tensors="pt", add_special_tokens=False
+                ).input_ids[0]
             else:
-                tokenized_obs = tokenizer(env_obs_content, return_tensors="pt", add_special_tokens=False).input_ids[0]
+                tokenized_obs = tokenizer(
+                    env_obs_content, return_tensors="pt", add_special_tokens=False
+                ).input_ids[0]
             # check if new message overflows max_seq_len
             if (
                 len(tokenized_obs) + len(generated_ids[i]) + active_input_lengths[i]
@@ -669,13 +674,21 @@ async def run_sample_multi_turn_rollout(
         # Tokenize environment response
         env_role = env_output.observations[0]["role"].lower()
         if env_role in {"user", "assistant", "system"}:
-            formatted_obs = tokenizer.apply_chat_template(
-                [{"role": env_role, "content": env_obs_content.strip()}],
-                tokenize=False,
-            ).removeprefix("<|begin_of_text|>").strip()
-            tokenized_obs = tokenizer(formatted_obs, return_tensors="pt", add_special_tokens=False).input_ids[0]
+            formatted_obs = (
+                tokenizer.apply_chat_template(
+                    [{"role": env_role, "content": env_obs_content.strip()}],
+                    tokenize=False,
+                )
+                .removeprefix("<|begin_of_text|>")
+                .strip()
+            )
+            tokenized_obs = tokenizer(
+                formatted_obs, return_tensors="pt", add_special_tokens=False
+            ).input_ids[0]
         else:
-            tokenized_obs = tokenizer(env_obs_content, return_tensors="pt", add_special_tokens=False).input_ids[0]
+            tokenized_obs = tokenizer(
+                env_obs_content, return_tensors="pt", add_special_tokens=False
+            ).input_ids[0]
 
         # Check for sequence length overflow
         if input_lengths + gen_token_count + len(tokenized_obs) >= max_seq_len:
diff --git a/uv.lock b/uv.lock
index 1e6d6815b2..5e2546a953 100644
--- a/uv.lock
+++ b/uv.lock
@@ -244,6 +244,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" },
 ]
 
+[[package]]
+name = "authlib"
+version = "1.6.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8e/a1/d8d1c6f8bc922c0b87ae0d933a8ed57be1bef6970894ed79c2852a153cd3/authlib-1.6.1.tar.gz", hash = "sha256:4dffdbb1460ba6ec8c17981a4c67af7d8af131231b5a36a88a1e8c80c111cdfd", size = 159988, upload-time = "2025-07-20T07:38:42.834Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f9/58/cc6a08053f822f98f334d38a27687b69c6655fb05cd74a7a5e70a2aeed95/authlib-1.6.1-py2.py3-none-any.whl", hash = "sha256:e9d2031c34c6309373ab845afc24168fe9e93dc52d252631f52642f21f5ed06e", size = 239299, upload-time = "2025-07-20T07:38:39.259Z" },
+]
+
 [[package]]
 name = "babel"
 version = "2.17.0"
@@ -1227,6 +1239,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599, upload-time = "2025-01-02T07:32:40.731Z" },
 ]
 
+[[package]]
+name = "google-adk"
+version = "1.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "authlib" },
+    { name = "click" },
+    { name = "fastapi" },
+    { name = "google-api-python-client" },
+    { name = "google-cloud-aiplatform", extra = ["agent-engines"] },
+    { name = "google-cloud-secret-manager" },
+    { name = "google-cloud-speech" },
+    { name = "google-cloud-storage" },
+    { name = "google-genai" },
+    { name = "graphviz" },
+    { name = "mcp" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-exporter-gcp-trace" },
+    { name = "opentelemetry-sdk" },
+    { name = "pydantic" },
+    { name = "python-dateutil" },
+    { name = "python-dotenv" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "sqlalchemy" },
+    { name = "starlette" },
+    { name = "tenacity" },
+    { name = "typing-extensions" },
+    { name = "tzlocal" },
+    { name = "uvicorn" },
+    { name = "watchdog" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0e/66/9f8e03226c6205e4ae9472e19f8470db6ecdadc1cfd2ad683e60bb7a8f95/google_adk-1.7.0.tar.gz", hash = "sha256:1bb86371e794e9ec73e0cc45bca529512862ecb5cd57132e77262b0de3f88dec", size = 1560499, upload-time = "2025-07-16T22:39:54.278Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/3f/fd6c0780ff34531d3f4cd1551db206a2f9906fadee6d6f0cd3c83fb63521/google_adk-1.7.0-py3-none-any.whl", hash = "sha256:924bb7d65771ae6c03e0ab64d4e3a3a4d0fb02682faeb7735c2be302adade94c", size = 1750509, upload-time = "2025-07-16T22:39:52.825Z" },
+]
+
 [[package]]
 name = "google-api-core"
 version = "2.25.1"
@@ -1243,6 +1294,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/14/4b/ead00905132820b623732b175d66354e9d3e69fcf2a5dcdab780664e7896/google_api_core-2.25.1-py3-none-any.whl", hash = "sha256:8a2a56c1fef82987a524371f99f3bd0143702fecc670c72e600c1cda6bf8dbb7", size = 160807, upload-time = "2025-06-12T20:52:19.334Z" },
 ]
 
+[package.optional-dependencies]
+grpc = [
+    { name = "grpcio" },
+    { name = "grpcio-status" },
+]
+
+[[package]]
+name = "google-api-python-client"
+version = "2.177.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core" },
+    { name = "google-auth" },
+    { name = "google-auth-httplib2" },
+    { name = "httplib2" },
+    { name = "uritemplate" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7a/75/a89cad519fa8910132e3b08571d0e682ae1163643da6f963f1930f3dc788/google_api_python_client-2.177.0.tar.gz", hash = "sha256:9ffd2b57d68f5afa7e6ac64e2c440534eaa056cbb394812a62ff94723c31b50e", size = 13184405, upload-time = "2025-07-23T16:22:46.321Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/f5/121248e18ca605a11720c81ae1b52a5a8cb690af9f01887c56de23cd9a5a/google_api_python_client-2.177.0-py3-none-any.whl", hash = "sha256:f2f50f11105ab883eb9b6cf38ec54ea5fd4b429249f76444bec90deba5be79b3", size = 13709470, upload-time = "2025-07-23T16:22:44.081Z" },
+]
+
 [[package]]
 name = "google-auth"
 version = "2.40.3"
@@ -1257,6 +1330,264 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/17/63/b19553b658a1692443c62bd07e5868adaa0ad746a0751ba62c59568cd45b/google_auth-2.40.3-py2.py3-none-any.whl", hash = "sha256:1370d4593e86213563547f97a92752fc658456fe4514c809544f330fed45a7ca", size = 216137, upload-time = "2025-06-04T18:04:55.573Z" },
 ]
 
+[[package]]
+name = "google-auth-httplib2"
+version = "0.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-auth" },
+    { name = "httplib2" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842, upload-time = "2023-12-12T17:40:30.722Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253, upload-time = "2023-12-12T17:40:13.055Z" },
+]
+
+[[package]]
+name = "google-cloud-aiplatform"
+version = "1.105.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docstring-parser" },
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "google-cloud-bigquery" },
+    { name = "google-cloud-resource-manager" },
+    { name = "google-cloud-storage" },
+    { name = "google-genai" },
+    { name = "packaging" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+    { name = "pydantic" },
+    { name = "shapely" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/02/8f/77b36b40370af26f3cf5a2bfd5eae57d63bcdaba869e796de2dc56549bc0/google_cloud_aiplatform-1.105.0.tar.gz", hash = "sha256:749c1230826198fa55d7c38774391f1fa57b9cd021a0e6ad1c788f8bca279555", size = 9474048, upload-time = "2025-07-23T16:25:56.816Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/7c/8ea7e03e82172bede182e2227c2c82f2c41f94edce0ce86c4abc5a05c55f/google_cloud_aiplatform-1.105.0-py2.py3-none-any.whl", hash = "sha256:e6fa21bdd2716051c0c1a48353e43b83080426810f7fbfe71aea629b4d0635cb", size = 7880320, upload-time = "2025-07-23T16:25:53.252Z" },
+]
+
+[package.optional-dependencies]
+agent-engines = [
+    { name = "cloudpickle" },
+    { name = "google-cloud-logging" },
+    { name = "google-cloud-trace" },
+    { name = "opentelemetry-exporter-gcp-trace" },
+    { name = "opentelemetry-sdk" },
+    { name = "packaging" },
+    { name = "pydantic" },
+    { name = "typing-extensions" },
+]
+
+[[package]]
+name = "google-cloud-appengine-logging"
+version = "1.6.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e7/ea/85da73d4f162b29d24ad591c4ce02688b44094ee5f3d6c0cc533c2b23b23/google_cloud_appengine_logging-1.6.2.tar.gz", hash = "sha256:4890928464c98da9eecc7bf4e0542eba2551512c0265462c10f3a3d2a6424b90", size = 16587, upload-time = "2025-06-11T22:38:53.525Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e4/9e/dc1fd7f838dcaf608c465171b1a25d8ce63f9987e2d5c73bda98792097a9/google_cloud_appengine_logging-1.6.2-py3-none-any.whl", hash = "sha256:2b28ed715e92b67e334c6fcfe1deb523f001919560257b25fc8fcda95fd63938", size = 16889, upload-time = "2025-06-11T22:38:52.26Z" },
+]
+
+[[package]]
+name = "google-cloud-audit-log"
+version = "0.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "googleapis-common-protos" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/85/af/53b4ef636e492d136b3c217e52a07bee569430dda07b8e515d5f2b701b1e/google_cloud_audit_log-0.3.2.tar.gz", hash = "sha256:2598f1533a7d7cdd6c7bf448c12e5519c1d53162d78784e10bcdd1df67791bc3", size = 33377, upload-time = "2025-03-17T11:27:59.808Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b1/74/38a70339e706b174b3c1117ad931aaa0ff0565b599869317a220d1967e1b/google_cloud_audit_log-0.3.2-py3-none-any.whl", hash = "sha256:daaedfb947a0d77f524e1bd2b560242ab4836fe1afd6b06b92f152b9658554ed", size = 32472, upload-time = "2025-03-17T11:27:58.51Z" },
+]
+
+[[package]]
+name = "google-cloud-bigquery"
+version = "3.35.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "google-cloud-core" },
+    { name = "google-resumable-media" },
+    { name = "packaging" },
+    { name = "python-dateutil" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/15/ee/fc5e651899abd7b7c631afc270fc668c4d757d27403c8ec2c11f0588f226/google_cloud_bigquery-3.35.0.tar.gz", hash = "sha256:b3db627355303ac52e07548d448d6c6cb87e52d80c88e57599cdd64185f40664", size = 496456, upload-time = "2025-07-16T00:36:44.83Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/95/2c/663be60fe7c4090d84267a17204fceaa4efd541000325d4f9690f6c6fcdc/google_cloud_bigquery-3.35.0-py3-none-any.whl", hash = "sha256:8c98e304d47c82f1fbba77b2f4c1e6c458474842d713ee117d9c58e61b74a70d", size = 256874, upload-time = "2025-07-16T00:36:43.292Z" },
+]
+
+[[package]]
+name = "google-cloud-core"
+version = "2.4.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core" },
+    { name = "google-auth" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d6/b8/2b53838d2acd6ec6168fd284a990c76695e84c65deee79c9f3a4276f6b4f/google_cloud_core-2.4.3.tar.gz", hash = "sha256:1fab62d7102844b278fe6dead3af32408b1df3eb06f5c7e8634cbd40edc4da53", size = 35861, upload-time = "2025-03-10T21:05:38.948Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/86/bda7241a8da2d28a754aad2ba0f6776e35b67e37c36ae0c45d49370f1014/google_cloud_core-2.4.3-py2.py3-none-any.whl", hash = "sha256:5130f9f4c14b4fafdff75c79448f9495cfade0d8775facf1b09c3bf67e027f6e", size = 29348, upload-time = "2025-03-10T21:05:37.785Z" },
+]
+
+[[package]]
+name = "google-cloud-logging"
+version = "3.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "google-cloud-appengine-logging" },
+    { name = "google-cloud-audit-log" },
+    { name = "google-cloud-core" },
+    { name = "grpc-google-iam-v1" },
+    { name = "opentelemetry-api" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/14/9c/d42ecc94f795a6545930e5f846a7ae59ff685ded8bc086648dd2bee31a1a/google_cloud_logging-3.12.1.tar.gz", hash = "sha256:36efc823985055b203904e83e1c8f9f999b3c64270bcda39d57386ca4effd678", size = 289569, upload-time = "2025-04-22T20:50:24.71Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b1/41/f8a3197d39b773a91f335dee36c92ef26a8ec96efe78d64baad89d367df4/google_cloud_logging-3.12.1-py2.py3-none-any.whl", hash = "sha256:6817878af76ec4e7568976772839ab2c43ddfd18fbbf2ce32b13ef549cd5a862", size = 229466, upload-time = "2025-04-22T20:50:23.294Z" },
+]
+
+[[package]]
+name = "google-cloud-resource-manager"
+version = "1.14.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "grpc-google-iam-v1" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6e/ca/a4648f5038cb94af4b3942815942a03aa9398f9fb0bef55b3f1585b9940d/google_cloud_resource_manager-1.14.2.tar.gz", hash = "sha256:962e2d904c550d7bac48372607904ff7bb3277e3bb4a36d80cc9a37e28e6eb74", size = 446370, upload-time = "2025-03-17T11:35:56.343Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b1/ea/a92631c358da377af34d3a9682c97af83185c2d66363d5939ab4a1169a7f/google_cloud_resource_manager-1.14.2-py3-none-any.whl", hash = "sha256:d0fa954dedd1d2b8e13feae9099c01b8aac515b648e612834f9942d2795a9900", size = 394344, upload-time = "2025-03-17T11:35:54.722Z" },
+]
+
+[[package]]
+name = "google-cloud-secret-manager"
+version = "2.24.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "grpc-google-iam-v1" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/58/7a/2fa6735ec693d822fe08a76709c4d95d9b5b4c02e83e720497355039d2ee/google_cloud_secret_manager-2.24.0.tar.gz", hash = "sha256:ce573d40ffc2fb7d01719243a94ee17aa243ea642a6ae6c337501e58fbf642b5", size = 269516, upload-time = "2025-06-05T22:22:22.965Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/af/db1217cae1809e69a4527ee6293b82a9af2a1fb2313ad110c775e8f3c820/google_cloud_secret_manager-2.24.0-py3-none-any.whl", hash = "sha256:9bea1254827ecc14874bc86c63b899489f8f50bfe1442bfb2517530b30b3a89b", size = 218050, upload-time = "2025-06-10T02:02:19.88Z" },
+]
+
+[[package]]
+name = "google-cloud-speech"
+version = "2.33.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9a/74/9c5a556f8af19cab461058aa15e1409e7afa453ca2383473a24a12801ef7/google_cloud_speech-2.33.0.tar.gz", hash = "sha256:fd08511b5124fdaa768d71a4054e84a5d8eb02531cb6f84f311c0387ea1314ed", size = 389072, upload-time = "2025-06-11T23:56:37.231Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/1d/880342b2541b4bad888ad8ab2ac77d4b5dad25b32a2a1c5f21140c14c8e3/google_cloud_speech-2.33.0-py3-none-any.whl", hash = "sha256:4ba16c8517c24a6abcde877289b0f40b719090504bf06b1adea248198ccd50a5", size = 335681, upload-time = "2025-06-11T23:56:36.026Z" },
+]
+
+[[package]]
+name = "google-cloud-storage"
+version = "2.19.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core" },
+    { name = "google-auth" },
+    { name = "google-cloud-core" },
+    { name = "google-crc32c" },
+    { name = "google-resumable-media" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/76/4d965702e96bb67976e755bed9828fa50306dca003dbee08b67f41dd265e/google_cloud_storage-2.19.0.tar.gz", hash = "sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2", size = 5535488, upload-time = "2024-12-05T01:35:06.49Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d5/94/6db383d8ee1adf45dc6c73477152b82731fa4c4a46d9c1932cc8757e0fd4/google_cloud_storage-2.19.0-py2.py3-none-any.whl", hash = "sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba", size = 131787, upload-time = "2024-12-05T01:35:04.736Z" },
+]
+
+[[package]]
+name = "google-cloud-trace"
+version = "1.16.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-api-core", extra = ["grpc"] },
+    { name = "google-auth" },
+    { name = "proto-plus" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c5/ea/0e42e2196fb2bc8c7b25f081a0b46b5053d160b34d5322e7eac2d5f7a742/google_cloud_trace-1.16.2.tar.gz", hash = "sha256:89bef223a512465951eb49335be6d60bee0396d576602dbf56368439d303cab4", size = 97826, upload-time = "2025-06-12T00:53:02.12Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/96/7a8d271e91effa9ccc2fd7cfd5cf287a2d7900080a475477c2ac0c7a331d/google_cloud_trace-1.16.2-py3-none-any.whl", hash = "sha256:40fb74607752e4ee0f3d7e5fc6b8f6eb1803982254a1507ba918172484131456", size = 103755, upload-time = "2025-06-12T00:53:00.672Z" },
+]
+
+[[package]]
+name = "google-crc32c"
+version = "1.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/ae/87802e6d9f9d69adfaedfcfd599266bf386a54d0be058b532d04c794f76d/google_crc32c-1.7.1.tar.gz", hash = "sha256:2bff2305f98846f3e825dbeec9ee406f89da7962accdb29356e4eadc251bd472", size = 14495, upload-time = "2025-03-26T14:29:13.32Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dd/b7/787e2453cf8639c94b3d06c9d61f512234a82e1d12d13d18584bd3049904/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2d73a68a653c57281401871dd4aeebbb6af3191dcac751a76ce430df4d403194", size = 30470, upload-time = "2025-03-26T14:34:31.655Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/b4/6042c2b0cbac3ec3a69bb4c49b28d2f517b7a0f4a0232603c42c58e22b44/google_crc32c-1.7.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:22beacf83baaf59f9d3ab2bbb4db0fb018da8e5aebdce07ef9f09fce8220285e", size = 30315, upload-time = "2025-03-26T15:01:54.634Z" },
+    { url = "https://files.pythonhosted.org/packages/29/ad/01e7a61a5d059bc57b702d9ff6a18b2585ad97f720bd0a0dbe215df1ab0e/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19eafa0e4af11b0a4eb3974483d55d2d77ad1911e6cf6f832e1574f6781fd337", size = 33180, upload-time = "2025-03-26T14:41:32.168Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/a5/7279055cf004561894ed3a7bfdf5bf90a53f28fadd01af7cd166e88ddf16/google_crc32c-1.7.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d86616faaea68101195c6bdc40c494e4d76f41e07a37ffdef270879c15fb65", size = 32794, upload-time = "2025-03-26T14:41:33.264Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/d6/77060dbd140c624e42ae3ece3df53b9d811000729a5c821b9fd671ceaac6/google_crc32c-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:b7491bdc0c7564fcf48c0179d2048ab2f7c7ba36b84ccd3a3e1c3f7a72d3bba6", size = 33477, upload-time = "2025-03-26T14:29:10.94Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/72/b8d785e9184ba6297a8620c8a37cf6e39b81a8ca01bb0796d7cbb28b3386/google_crc32c-1.7.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:df8b38bdaf1629d62d51be8bdd04888f37c451564c2042d36e5812da9eff3c35", size = 30467, upload-time = "2025-03-26T14:36:06.909Z" },
+    { url = "https://files.pythonhosted.org/packages/34/25/5f18076968212067c4e8ea95bf3b69669f9fc698476e5f5eb97d5b37999f/google_crc32c-1.7.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:e42e20a83a29aa2709a0cf271c7f8aefaa23b7ab52e53b322585297bb94d4638", size = 30309, upload-time = "2025-03-26T15:06:15.318Z" },
+    { url = "https://files.pythonhosted.org/packages/92/83/9228fe65bf70e93e419f38bdf6c5ca5083fc6d32886ee79b450ceefd1dbd/google_crc32c-1.7.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:905a385140bf492ac300026717af339790921f411c0dfd9aa5a9e69a08ed32eb", size = 33133, upload-time = "2025-03-26T14:41:34.388Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/ca/1ea2fd13ff9f8955b85e7956872fdb7050c4ace8a2306a6d177edb9cf7fe/google_crc32c-1.7.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b211ddaf20f7ebeec5c333448582c224a7c90a9d98826fbab82c0ddc11348e6", size = 32773, upload-time = "2025-03-26T14:41:35.19Z" },
+    { url = "https://files.pythonhosted.org/packages/89/32/a22a281806e3ef21b72db16f948cad22ec68e4bdd384139291e00ff82fe2/google_crc32c-1.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:0f99eaa09a9a7e642a61e06742856eec8b19fc0037832e03f941fe7cf0c8e4db", size = 33475, upload-time = "2025-03-26T14:29:11.771Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/c5/002975aff514e57fc084ba155697a049b3f9b52225ec3bc0f542871dd524/google_crc32c-1.7.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32d1da0d74ec5634a05f53ef7df18fc646666a25efaaca9fc7dcfd4caf1d98c3", size = 33243, upload-time = "2025-03-26T14:41:35.975Z" },
+    { url = "https://files.pythonhosted.org/packages/61/cb/c585282a03a0cea70fcaa1bf55d5d702d0f2351094d663ec3be1c6c67c52/google_crc32c-1.7.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e10554d4abc5238823112c2ad7e4560f96c7bf3820b202660373d769d9e6e4c9", size = 32870, upload-time = "2025-03-26T14:41:37.08Z" },
+]
+
+[[package]]
+name = "google-genai"
+version = "1.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "google-auth" },
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "requests" },
+    { name = "tenacity" },
+    { name = "typing-extensions" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4a/6e/d9618081990ad7c4907c93fcccacb13081e825ca818e9e18618f91050246/google_genai-1.26.0.tar.gz", hash = "sha256:d7b019ac98ca07888caa6121a953eb65db20f78370d8ae06aec29fb534534dc8", size = 218877, upload-time = "2025-07-16T21:51:46.989Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/7d/201894058552d5ed810930f9483bf6be8650e3d599efab180d0510d0eea1/google_genai-1.26.0-py3-none-any.whl", hash = "sha256:a050de052ee6e68654ba7cdb97028a576ad7108d0ecc9257c69bcc555498e9a2", size = 217693, upload-time = "2025-07-16T21:51:45.797Z" },
+]
+
+[[package]]
+name = "google-resumable-media"
+version = "2.7.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-crc32c" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/58/5a/0efdc02665dca14e0837b62c8a1a93132c264bd02054a15abb2218afe0ae/google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0", size = 2163099, upload-time = "2024-08-07T22:20:38.555Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/82/35/b8d3baf8c46695858cb9d8835a53baa1eeb9906ddaf2f728a5f5b640fd1e/google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa", size = 81251, upload-time = "2024-08-07T22:20:36.409Z" },
+]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.70.0"
@@ -1269,6 +1600,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload-time = "2025-04-14T10:17:01.271Z" },
 ]
 
+[package.optional-dependencies]
+grpc = [
+    { name = "grpcio" },
+]
+
 [[package]]
 name = "graphene"
 version = "3.4.3"
@@ -1347,6 +1683,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5c/4f/aab73ecaa6b3086a4c89863d94cf26fa84cbff63f52ce9bc4342b3087a06/greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a", size = 301236, upload-time = "2025-06-05T16:15:20.111Z" },
 ]
 
+[[package]]
+name = "grpc-google-iam-v1"
+version = "0.14.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "googleapis-common-protos", extra = ["grpc"] },
+    { name = "grpcio" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b9/4e/8d0ca3b035e41fe0b3f31ebbb638356af720335e5a11154c330169b40777/grpc_google_iam_v1-0.14.2.tar.gz", hash = "sha256:b3e1fc387a1a329e41672197d0ace9de22c78dd7d215048c4c78712073f7bd20", size = 16259, upload-time = "2025-03-17T11:40:23.586Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/66/6f/dd9b178aee7835b96c2e63715aba6516a9d50f6bebbd1cc1d32c82a2a6c3/grpc_google_iam_v1-0.14.2-py3-none-any.whl", hash = "sha256:a3171468459770907926d56a440b2bb643eec1d7ba215f48f3ecece42b4d8351", size = 19242, upload-time = "2025-03-17T11:40:22.648Z" },
+]
+
 [[package]]
 name = "grpcio"
 version = "1.73.0"
@@ -1375,6 +1725,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d7/35/347db7d2e7674b621afd21b12022e7f48c7b0861b5577134b4e939536141/grpcio-1.73.0-cp313-cp313-win_amd64.whl", hash = "sha256:38cf518cc54cd0c47c9539cefa8888549fcc067db0b0c66a46535ca8032020c4", size = 4335872, upload-time = "2025-06-09T10:04:29.032Z" },
 ]
 
+[[package]]
+name = "grpcio-status"
+version = "1.71.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "googleapis-common-protos" },
+    { name = "grpcio" },
+    { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fd/d1/b6e9877fedae3add1afdeae1f89d1927d296da9cf977eca0eb08fb8a460e/grpcio_status-1.71.2.tar.gz", hash = "sha256:c7a97e176df71cdc2c179cd1847d7fc86cca5832ad12e9798d7fed6b7a1aab50", size = 13677, upload-time = "2025-06-28T04:24:05.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/67/58/317b0134129b556a93a3b0afe00ee675b5657f0155509e22fcb853bafe2d/grpcio_status-1.71.2-py3-none-any.whl", hash = "sha256:803c98cb6a8b7dc6dbb785b1111aed739f241ab5e9da0bba96888aa74704cfd3", size = 14424, upload-time = "2025-06-28T04:23:42.136Z" },
+]
+
 [[package]]
 name = "gunicorn"
 version = "23.0.0"
@@ -1460,6 +1824,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
 ]
 
+[[package]]
+name = "httplib2"
+version = "0.22.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyparsing" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/ad/2371116b22d616c194aa25ec410c9c6c37f23599dcd590502b74db197584/httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81", size = 351116, upload-time = "2023-03-21T22:29:37.214Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854, upload-time = "2023-03-21T22:29:35.683Z" },
+]
+
 [[package]]
 name = "httptools"
 version = "0.6.4"
@@ -1497,6 +1873,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
 ]
 
+[[package]]
+name = "httpx-sse"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6e/fa/66bd985dd0b7c109a3bcb89272ee0bfb7e2b4d06309ad7b38ff866734b2a/httpx_sse-0.4.1.tar.gz", hash = "sha256:8f44d34414bc7b21bf3602713005c5df4917884f76072479b21f68befa4ea26e", size = 12998, upload-time = "2025-06-24T13:21:05.71Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/25/0a/6269e3473b09aed2dab8aa1a600c70f31f00ae1349bee30658f7e358a159/httpx_sse-0.4.1-py3-none-any.whl", hash = "sha256:cba42174344c3a5b06f255ce65b350880f962d99ead85e776f23c6618a377a37", size = 8054, upload-time = "2025-06-24T13:21:04.772Z" },
+]
+
 [[package]]
 name = "huggingface-hub"
 version = "0.33.0"
@@ -2117,6 +2502,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1b/92/9a45c91089c3cf690b5badd4be81e392ff086ccca8a1d4e3a08463d8a966/matplotlib-3.10.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4f23ffe95c5667ef8a2b56eea9b53db7f43910fa4a2d5472ae0f72b64deab4d5", size = 8139044, upload-time = "2025-05-08T19:10:44.551Z" },
 ]
 
+[[package]]
+name = "mcp"
+version = "1.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "httpx" },
+    { name = "httpx-sse" },
+    { name = "jsonschema" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "python-multipart" },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "sse-starlette" },
+    { name = "starlette" },
+    { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/16cef13b2e60d5f865fbc96372efb23dc8b0591f102dd55003b4ae62f9b1/mcp-1.12.1.tar.gz", hash = "sha256:d1d0bdeb09e4b17c1a72b356248bf3baf75ab10db7008ef865c4afbeb0eb810e", size = 425768, upload-time = "2025-07-22T16:51:41.66Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/04/9a967a575518fc958bda1e34a52eae0c7f6accf3534811914fdaf57b0689/mcp-1.12.1-py3-none-any.whl", hash = "sha256:34147f62891417f8b000c39718add844182ba424c8eb2cea250b4267bda4b08b", size = 158463, upload-time = "2025-07-22T16:51:40.086Z" },
+]
+
 [[package]]
 name = "mdit-py-plugins"
 version = "0.4.2"
@@ -2481,6 +2888,7 @@ dependencies = [
     { name = "colored" },
     { name = "datasets" },
     { name = "debugpy" },
+    { name = "google-adk" },
     { name = "hydra-core" },
     { name = "math-verify" },
     { name = "matplotlib" },
@@ -2560,6 +2968,7 @@ requires-dist = [
     { name = "flash-attn", marker = "extra == 'automodel'", specifier = "==2.7.4.post1" },
     { name = "flash-attn", marker = "extra == 'mcore'", specifier = "==2.7.4.post1" },
     { name = "flash-attn", marker = "extra == 'vllm'", specifier = "==2.7.4.post1" },
+    { name = "google-adk" },
     { name = "hydra-core" },
     { name = "math-verify" },
     { name = "matplotlib" },
@@ -3140,6 +3549,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a5/3a/2ba85557e8dc024c0842ad22c570418dc02c36cbd1ab4b832a93edf071b8/opentelemetry_api-1.34.1-py3-none-any.whl", hash = "sha256:b7df4cb0830d5a6c29ad0c0691dbae874d8daefa934b8b1d642de48323d32a8c", size = 65767, upload-time = "2025-06-10T08:54:56.717Z" },
 ]
 
+[[package]]
+name = "opentelemetry-exporter-gcp-trace"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "google-cloud-trace" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-resourcedetector-gcp" },
+    { name = "opentelemetry-sdk" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c3/15/7556d54b01fb894497f69a98d57faa9caa45ffa59896e0bba6847a7f0d15/opentelemetry_exporter_gcp_trace-1.9.0.tar.gz", hash = "sha256:c3fc090342f6ee32a0cc41a5716a6bb716b4422d19facefcb22dc4c6b683ece8", size = 18568, upload-time = "2025-02-04T19:45:08.185Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/cd/6d7fbad05771eb3c2bace20f6360ce5dac5ca751c6f2122853e43830c32e/opentelemetry_exporter_gcp_trace-1.9.0-py3-none-any.whl", hash = "sha256:0a8396e8b39f636eeddc3f0ae08ddb40c40f288bc8c5544727c3581545e77254", size = 13973, upload-time = "2025-02-04T19:44:59.148Z" },
+]
+
 [[package]]
 name = "opentelemetry-exporter-otlp"
 version = "1.34.1"
@@ -3213,6 +3637,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/28/ab/4591bfa54e946350ce8b3f28e5c658fe9785e7cd11e9c11b1671a867822b/opentelemetry_proto-1.34.1-py3-none-any.whl", hash = "sha256:eb4bb5ac27f2562df2d6857fc557b3a481b5e298bc04f94cc68041f00cebcbd2", size = 55692, upload-time = "2025-06-10T08:55:14.904Z" },
 ]
 
+[[package]]
+name = "opentelemetry-resourcedetector-gcp"
+version = "1.9.0a0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-sdk" },
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e1/86/f0693998817779802525a5bcc885a3cdb68d05b636bc6faae5c9ade4bee4/opentelemetry_resourcedetector_gcp-1.9.0a0.tar.gz", hash = "sha256:6860a6649d1e3b9b7b7f09f3918cc16b72aa0c0c590d2a72ea6e42b67c9a42e7", size = 20730, upload-time = "2025-02-04T19:45:10.693Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/04/7e33228c88422a5518e1774a836c9ec68f10f51bde0f1d5dd5f3054e612a/opentelemetry_resourcedetector_gcp-1.9.0a0-py3-none-any.whl", hash = "sha256:4e5a0822b0f0d7647b7ceb282d7aa921dd7f45466540bd0a24f954f90db8fde8", size = 20378, upload-time = "2025-02-04T19:45:03.898Z" },
+]
+
 [[package]]
 name = "opentelemetry-sdk"
 version = "1.34.1"
@@ -3802,6 +4241,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" },
 ]
 
+[[package]]
+name = "pydantic-settings"
+version = "2.10.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/85/1ea668bbab3c50071ca613c6ab30047fb36ab0da1b92fa8f17bbc38fd36c/pydantic_settings-2.10.1.tar.gz", hash = "sha256:06f0062169818d0f5524420a360d632d5857b83cffd4d42fe29597807a1614ee", size = 172583, upload-time = "2025-06-24T13:26:46.841Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235, upload-time = "2025-06-24T13:26:45.485Z" },
+]
+
 [[package]]
 name = "pydata-sphinx-theme"
 version = "0.16.1"
@@ -4512,6 +4965,41 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0d/6d/b4752b044bf94cb802d88a888dc7d288baaf77d7910b7dedda74b5ceea0c/setuptools-79.0.1-py3-none-any.whl", hash = "sha256:e147c0549f27767ba362f9da434eab9c5dc0045d5304feb602a0af001089fc51", size = 1256281, upload-time = "2025-04-23T22:20:56.768Z" },
 ]
 
+[[package]]
+name = "shapely"
+version = "2.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ca/3c/2da625233f4e605155926566c0e7ea8dda361877f48e8b1655e53456f252/shapely-2.1.1.tar.gz", hash = "sha256:500621967f2ffe9642454808009044c21e5b35db89ce69f8a2042c2ffd0e2772", size = 315422, upload-time = "2025-05-19T11:04:41.265Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/64/9544dc07dfe80a2d489060791300827c941c451e2910f7364b19607ea352/shapely-2.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2827365b58bf98efb60affc94a8e01c56dd1995a80aabe4b701465d86dcbba43", size = 1833021, upload-time = "2025-05-19T11:04:08.022Z" },
+    { url = "https://files.pythonhosted.org/packages/07/aa/fb5f545e72e89b6a0f04a0effda144f5be956c9c312c7d4e00dfddbddbcf/shapely-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9c551f7fa7f1e917af2347fe983f21f212863f1d04f08eece01e9c275903fad", size = 1643018, upload-time = "2025-05-19T11:04:09.343Z" },
+    { url = "https://files.pythonhosted.org/packages/03/46/61e03edba81de729f09d880ce7ae5c1af873a0814206bbfb4402ab5c3388/shapely-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78dec4d4fbe7b1db8dc36de3031767e7ece5911fb7782bc9e95c5cdec58fb1e9", size = 2986417, upload-time = "2025-05-19T11:04:10.56Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/1e/83ec268ab8254a446b4178b45616ab5822d7b9d2b7eb6e27cf0b82f45601/shapely-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:872d3c0a7b8b37da0e23d80496ec5973c4692920b90de9f502b5beb994bbaaef", size = 3098224, upload-time = "2025-05-19T11:04:11.903Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/44/0c21e7717c243e067c9ef8fa9126de24239f8345a5bba9280f7bb9935959/shapely-2.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2e2b9125ebfbc28ecf5353511de62f75a8515ae9470521c9a693e4bb9fbe0cf1", size = 3925982, upload-time = "2025-05-19T11:04:13.224Z" },
+    { url = "https://files.pythonhosted.org/packages/15/50/d3b4e15fefc103a0eb13d83bad5f65cd6e07a5d8b2ae920e767932a247d1/shapely-2.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4b96cea171b3d7f6786976a0520f178c42792897653ecca0c5422fb1e6946e6d", size = 4089122, upload-time = "2025-05-19T11:04:14.477Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/05/9a68f27fc6110baeedeeebc14fd86e73fa38738c5b741302408fb6355577/shapely-2.1.1-cp312-cp312-win32.whl", hash = "sha256:39dca52201e02996df02e447f729da97cfb6ff41a03cb50f5547f19d02905af8", size = 1522437, upload-time = "2025-05-19T11:04:16.203Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/e9/a4560e12b9338842a1f82c9016d2543eaa084fce30a1ca11991143086b57/shapely-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:13d643256f81d55a50013eff6321142781cf777eb6a9e207c2c9e6315ba6044a", size = 1703479, upload-time = "2025-05-19T11:04:18.497Z" },
+    { url = "https://files.pythonhosted.org/packages/71/8e/2bc836437f4b84d62efc1faddce0d4e023a5d990bbddd3c78b2004ebc246/shapely-2.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3004a644d9e89e26c20286d5fdc10f41b1744c48ce910bd1867fdff963fe6c48", size = 1832107, upload-time = "2025-05-19T11:04:19.736Z" },
+    { url = "https://files.pythonhosted.org/packages/12/a2/12c7cae5b62d5d851c2db836eadd0986f63918a91976495861f7c492f4a9/shapely-2.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1415146fa12d80a47d13cfad5310b3c8b9c2aa8c14a0c845c9d3d75e77cb54f6", size = 1642355, upload-time = "2025-05-19T11:04:21.035Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/7e/6d28b43d53fea56de69c744e34c2b999ed4042f7a811dc1bceb876071c95/shapely-2.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21fcab88b7520820ec16d09d6bea68652ca13993c84dffc6129dc3607c95594c", size = 2968871, upload-time = "2025-05-19T11:04:22.167Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/87/1017c31e52370b2b79e4d29e07cbb590ab9e5e58cf7e2bdfe363765d6251/shapely-2.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5ce6a5cc52c974b291237a96c08c5592e50f066871704fb5b12be2639d9026a", size = 3080830, upload-time = "2025-05-19T11:04:23.997Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/fe/f4a03d81abd96a6ce31c49cd8aaba970eaaa98e191bd1e4d43041e57ae5a/shapely-2.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:04e4c12a45a1d70aeb266618d8cf81a2de9c4df511b63e105b90bfdfb52146de", size = 3908961, upload-time = "2025-05-19T11:04:25.702Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/59/7605289a95a6844056a2017ab36d9b0cb9d6a3c3b5317c1f968c193031c9/shapely-2.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6ca74d851ca5264aae16c2b47e96735579686cb69fa93c4078070a0ec845b8d8", size = 4079623, upload-time = "2025-05-19T11:04:27.171Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/4d/9fea036eff2ef4059d30247128b2d67aaa5f0b25e9fc27e1d15cc1b84704/shapely-2.1.1-cp313-cp313-win32.whl", hash = "sha256:fd9130501bf42ffb7e0695b9ea17a27ae8ce68d50b56b6941c7f9b3d3453bc52", size = 1521916, upload-time = "2025-05-19T11:04:28.405Z" },
+    { url = "https://files.pythonhosted.org/packages/12/d9/6d13b8957a17c95794f0c4dfb65ecd0957e6c7131a56ce18d135c1107a52/shapely-2.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:ab8d878687b438a2f4c138ed1a80941c6ab0029e0f4c785ecfe114413b498a97", size = 1702746, upload-time = "2025-05-19T11:04:29.643Z" },
+    { url = "https://files.pythonhosted.org/packages/60/36/b1452e3e7f35f5f6454d96f3be6e2bb87082720ff6c9437ecc215fa79be0/shapely-2.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0c062384316a47f776305ed2fa22182717508ffdeb4a56d0ff4087a77b2a0f6d", size = 1833482, upload-time = "2025-05-19T11:04:30.852Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/ca/8e6f59be0718893eb3e478141285796a923636dc8f086f83e5b0ec0036d0/shapely-2.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4ecf6c196b896e8f1360cc219ed4eee1c1e5f5883e505d449f263bd053fb8c05", size = 1642256, upload-time = "2025-05-19T11:04:32.068Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/78/0053aea449bb1d4503999525fec6232f049abcdc8df60d290416110de943/shapely-2.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb00070b4c4860f6743c600285109c273cca5241e970ad56bb87bef0be1ea3a0", size = 3016614, upload-time = "2025-05-19T11:04:33.7Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/53/36f1b1de1dfafd1b457dcbafa785b298ce1b8a3e7026b79619e708a245d5/shapely-2.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d14a9afa5fa980fbe7bf63706fdfb8ff588f638f145a1d9dbc18374b5b7de913", size = 3093542, upload-time = "2025-05-19T11:04:34.952Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/bf/0619f37ceec6b924d84427c88835b61f27f43560239936ff88915c37da19/shapely-2.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b640e390dabde790e3fb947198b466e63223e0a9ccd787da5f07bcb14756c28d", size = 3945961, upload-time = "2025-05-19T11:04:36.32Z" },
+    { url = "https://files.pythonhosted.org/packages/93/c9/20ca4afeb572763b07a7997f00854cb9499df6af85929e93012b189d8917/shapely-2.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:69e08bf9697c1b73ec6aa70437db922bafcea7baca131c90c26d59491a9760f9", size = 4089514, upload-time = "2025-05-19T11:04:37.683Z" },
+    { url = "https://files.pythonhosted.org/packages/33/6a/27036a5a560b80012a544366bceafd491e8abb94a8db14047b5346b5a749/shapely-2.1.1-cp313-cp313t-win32.whl", hash = "sha256:ef2d09d5a964cc90c2c18b03566cf918a61c248596998a0301d5b632beadb9db", size = 1540607, upload-time = "2025-05-19T11:04:38.925Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/f1/5e9b3ba5c7aa7ebfaf269657e728067d16a7c99401c7973ddf5f0cf121bd/shapely-2.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8cb8f17c377260452e9d7720eeaf59082c5f8ea48cf104524d953e5d36d4bdb7", size = 1723061, upload-time = "2025-05-19T11:04:40.082Z" },
+]
+
 [[package]]
 name = "shellingham"
 version = "1.5.4"
@@ -4740,6 +5228,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a9/5c/bfd6bd0bf979426d405cc6e71eceb8701b148b16c21d2dc3c261efc61c7b/sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca", size = 44415, upload-time = "2024-12-10T12:05:27.824Z" },
 ]
 
+[[package]]
+name = "sse-starlette"
+version = "2.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/07/3e/eae74d8d33e3262bae0a7e023bb43d8bdd27980aa3557333f4632611151f/sse_starlette-2.4.1.tar.gz", hash = "sha256:7c8a800a1ca343e9165fc06bbda45c78e4c6166320707ae30b416c42da070926", size = 18635, upload-time = "2025-07-06T09:41:33.631Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e4/f1/6c7eaa8187ba789a6dd6d74430307478d2a91c23a5452ab339b6fbe15a08/sse_starlette-2.4.1-py3-none-any.whl", hash = "sha256:08b77ea898ab1a13a428b2b6f73cfe6d0e607a7b4e15b9bb23e4a37b087fd39a", size = 10824, upload-time = "2025-07-06T09:41:32.321Z" },
+]
+
 [[package]]
 name = "starlette"
 version = "0.46.2"
@@ -4773,6 +5273,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
 ]
 
+[[package]]
+name = "tenacity"
+version = "8.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a3/4d/6a19536c50b849338fcbe9290d562b52cbdcf30d8963d3588a68a4107df1/tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78", size = 47309, upload-time = "2024-07-05T07:25:31.836Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/3f/8ba87d9e287b9d385a02a7114ddcef61b26f86411e121c9003eb509a1773/tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687", size = 28165, upload-time = "2024-07-05T07:25:29.591Z" },
+]
+
 [[package]]
 name = "tensorboard"
 version = "2.19.0"
@@ -5225,6 +5734,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
 ]
 
+[[package]]
+name = "tzlocal"
+version = "5.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "tzdata", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761, upload-time = "2025-03-05T21:17:41.549Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload-time = "2025-03-05T21:17:39.857Z" },
+]
+
+[[package]]
+name = "uritemplate"
+version = "4.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/60/f174043244c5306c9988380d2cb10009f91563fc4b31293d27e17201af56/uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e", size = 33267, upload-time = "2025-06-02T15:12:06.318Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a9/99/3ae339466c9183ea5b8ae87b34c0b897eda475d2aec2307cae60e5cd4f29/uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686", size = 11488, upload-time = "2025-06-02T15:12:03.405Z" },
+]
+
 [[package]]
 name = "urllib3"
 version = "1.26.20"
@@ -5399,6 +5929,30 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/52/5f/c44ad7b2a062ca5f4da99ae475cea274c38f6ec37bdaca1b1c653ee87274/wandb-0.20.1-py3-none-win_amd64.whl", hash = "sha256:6d2431652f096b7e394c29a99135a6441c02ed3198b963f0b351a5b5e56aeca0", size = 22518459, upload-time = "2025-06-05T00:00:21.374Z" },
 ]
 
+[[package]]
+name = "watchdog"
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/39/ea/3930d07dafc9e286ed356a679aa02d777c06e9bfd1164fa7c19c288a5483/watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948", size = 96471, upload-time = "2024-11-01T14:06:37.745Z" },
+    { url = "https://files.pythonhosted.org/packages/12/87/48361531f70b1f87928b045df868a9fd4e253d9ae087fa4cf3f7113be363/watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860", size = 88449, upload-time = "2024-11-01T14:06:39.748Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/7e/8f322f5e600812e6f9a31b75d242631068ca8f4ef0582dd3ae6e72daecc8/watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0", size = 89054, upload-time = "2024-11-01T14:06:41.009Z" },
+    { url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480, upload-time = "2024-11-01T14:06:42.952Z" },
+    { url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451, upload-time = "2024-11-01T14:06:45.084Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057, upload-time = "2024-11-01T14:06:47.324Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" },
+    { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" },
+    { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" },
+    { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" },
+    { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" },
+]
+
 [[package]]
 name = "watchfiles"
 version = "1.1.0"