diff --git a/dockerfiles/nemo-rl.patch b/dockerfiles/nemo-rl.patch index 4c59e7fe25..eb5c5c8050 100644 --- a/dockerfiles/nemo-rl.patch +++ b/dockerfiles/nemo-rl.patch @@ -1,8 +1,8 @@ diff --git a/nemo_rl/algorithms/sft.py b/nemo_rl/algorithms/sft.py -index a447510..e987002 100644 +index 6de9ac81..fc232ac6 100644 --- a/nemo_rl/algorithms/sft.py +++ b/nemo_rl/algorithms/sft.py -@@ -138,6 +138,7 @@ def setup( +@@ -137,6 +137,7 @@ def setup( shuffle=data_config["shuffle"], collate_fn=rl_collate_fn, drop_last=True, @@ -10,11 +10,24 @@ index a447510..e987002 100644 ) if last_checkpoint_path is not None: -@@ -152,6 +153,7 @@ def setup( +@@ -151,6 +152,7 @@ def setup( shuffle=False, collate_fn=rl_collate_fn, - drop_last=False, + drop_last=True, + num_workers=10, ) # ========================== +diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py +index 10c8cf33..b5ab26f0 100644 +--- a/nemo_rl/models/policy/megatron_policy_worker.py ++++ b/nemo_rl/models/policy/megatron_policy_worker.py +@@ -586,6 +586,8 @@ class MegatronPolicyWorker: + "https://github.com/NVIDIA/Megatron-LM/blob/1ab876ddc4c1893c76f26d775226a8d1dcdfb3d2/megatron/core/transformer/mlp.py#L174." + ) + model_cfg.apply_rope_fusion = self.cfg["megatron_cfg"]["apply_rope_fusion"] ++ if "layernorm_epsilon" in self.cfg["megatron_cfg"]: ++ model_cfg.layernorm_epsilon = self.cfg["megatron_cfg"]["layernorm_epsilon"] + + checkpoint_config = CheckpointConfig( + save_interval=100,