diff --git a/dockerfiles/Dockerfile.nemo-rl b/dockerfiles/Dockerfile.nemo-rl index ff9c74ef59..b339b14eea 100644 --- a/dockerfiles/Dockerfile.nemo-rl +++ b/dockerfiles/Dockerfile.nemo-rl @@ -49,7 +49,7 @@ ENV NEMO_RL_VENV_DIR=/opt/ray_venvs FROM base AS hermetic ARG NEMO_RL_COMMIT -ENV NEMO_RL_COMMIT=${NEMO_RL_COMMIT:-838475ff8ea8949a3bbb9bc6f222bb0c282ae5ab} +ENV NEMO_RL_COMMIT=${NEMO_RL_COMMIT:-85eeb8d059b0249cace427dd5dec9573107be224} RUN git clone https://github.com/NVIDIA-NeMo/RL.git /opt/NeMo-RL && cd /opt/NeMo-RL && git checkout ${NEMO_RL_COMMIT} && git submodule update --init --recursive diff --git a/nemo_skills/training/nemo_rl/configs/grpo.yaml b/nemo_skills/training/nemo_rl/configs/grpo.yaml index 894eafd1c0..75f8e20ec6 100644 --- a/nemo_skills/training/nemo_rl/configs/grpo.yaml +++ b/nemo_skills/training/nemo_rl/configs/grpo.yaml @@ -43,6 +43,7 @@ policy: model_name: ??? tokenizer: name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default + chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true train_global_batch_size: 512 train_micro_batch_size: 4 generation_batch_size: 32 # Only used when generating using HF backend diff --git a/nemo_skills/training/nemo_rl/configs/sft.yaml b/nemo_skills/training/nemo_rl/configs/sft.yaml index 7a84981933..20cc35ff8d 100644 --- a/nemo_skills/training/nemo_rl/configs/sft.yaml +++ b/nemo_skills/training/nemo_rl/configs/sft.yaml @@ -28,6 +28,7 @@ policy: tokenizer: name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default chat_template: "infer_from_data" ## Can be: null (passthrough), "default" (tokenizer's default), "infer_from_data" (auto-detect from data), or custom jinja2 template + chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true train_global_batch_size: 32 train_micro_batch_size: 1 max_total_sequence_length: 4096