From 77b03d7e0d5b1eb32330d571c6df5818fbd09e5a Mon Sep 17 00:00:00 2001 From: ashors1 Date: Fri, 4 Apr 2025 17:20:36 -0700 Subject: [PATCH 01/23] make chat template configurable from config, save chat template as attribute of tokenizer Signed-off-by: ashors1 --- examples/configs/sft.yaml | 2 + examples/run_sft.py | 10 ++++- nemo_reinforcer/algorithms/sft.py | 3 ++ nemo_reinforcer/data/hf_datasets/__init__.py | 3 +- .../data/hf_datasets/chat_templates.py | 9 +++++ .../data/hf_datasets/interfaces.py | 39 ------------------- nemo_reinforcer/data/hf_datasets/oasst.py | 10 ++--- .../data/hf_datasets/openmathinstruct2.py | 9 +++-- nemo_reinforcer/data/hf_datasets/squad.py | 13 +++---- nemo_reinforcer/data/interfaces.py | 2 - nemo_reinforcer/data/llm_message_utils.py | 4 +- nemo_reinforcer/models/policy/hf_policy.py | 6 ++- 12 files changed, 47 insertions(+), 63 deletions(-) create mode 100644 nemo_reinforcer/data/hf_datasets/chat_templates.py delete mode 100644 nemo_reinforcer/data/hf_datasets/interfaces.py diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml index e4b116a351..78cc1463d4 100644 --- a/examples/configs/sft.yaml +++ b/examples/configs/sft.yaml @@ -18,6 +18,8 @@ checkpointing: policy: model_name: "meta-llama/Llama-3.2-1B" + tokenizer: + chat_template: "{% for message in messages %}{%- if message['role'] == 'system' %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user' %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant' %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}" train_global_batch_size: 32 train_micro_batch_size: 1 max_total_sequence_length: 1024 diff --git a/examples/run_sft.py b/examples/run_sft.py index 950938b4da..4a35d6a242 100644 --- a/examples/run_sft.py +++ b/examples/run_sft.py @@ -100,7 +100,15 @@ def setup_data(data_config: DataConfig, policy_config: PolicyConfig): val_dataset = data.formatted_ds["validation"] sft_task_spec = data.task_spec + ## TODO: save tokenizer whenever we save an hf checkpoint tokenizer = AutoTokenizer.from_pretrained(policy_config["model_name"]) + if "chat_template" in policy_config["tokenizer"]: + if policy_config["tokenizer"]["chat_template"].lower() in {"none", "null"}: + tokenizer.chat_template = ( + hf_datasets.COMMON_CHAT_TEMPLATES.passthrough_prompt_response + ) + else: + tokenizer.chat_template = policy_config["tokenizer"]["chat_template"] train_dataset = AllTaskProcessedDataset( train_dataset, @@ -166,7 +174,7 @@ def main(): checkpointer, sft_save_state, master_config, - ) = setup(config, dataset, val_dataset) + ) = setup(config, dataset, val_dataset, tokenizer) sft_train( policy, train_dataloader, diff --git a/nemo_reinforcer/algorithms/sft.py b/nemo_reinforcer/algorithms/sft.py index 8f9e34f9da..fc327f3915 100644 --- a/nemo_reinforcer/algorithms/sft.py +++ b/nemo_reinforcer/algorithms/sft.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os +from transformers import AutoTokenizer from pathlib import Path from typing import Optional, Tuple, TypedDict @@ -78,6 +79,7 @@ def setup( master_config: MasterConfig, train_dataset: AllTaskProcessedDataset, val_dataset: AllTaskProcessedDataset, + tokenizer: AutoTokenizer, ) -> Tuple[ HfPolicy, RayVirtualCluster, @@ -175,6 +177,7 @@ def setup( policy = HfPolicy( cluster=cluster, config=policy_config, + tokenizer=tokenizer, weights_path=Path(last_checkpoint_path) / "policy.pt" if last_checkpoint_path else None, diff --git a/nemo_reinforcer/data/hf_datasets/__init__.py b/nemo_reinforcer/data/hf_datasets/__init__.py index df1227140d..12d1a0b368 100644 --- a/nemo_reinforcer/data/hf_datasets/__init__.py +++ b/nemo_reinforcer/data/hf_datasets/__init__.py @@ -14,5 +14,6 @@ from nemo_reinforcer.data.hf_datasets.oasst import OasstDataset from nemo_reinforcer.data.hf_datasets.squad import SquadDataset +from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES -__all__ = ["OasstDataset", "SquadDataset"] +__all__ = ["OasstDataset", "SquadDataset", "COMMON_CHAT_TEMPLATES"] diff --git a/nemo_reinforcer/data/hf_datasets/chat_templates.py b/nemo_reinforcer/data/hf_datasets/chat_templates.py new file mode 100644 index 0000000000..0454ae68e4 --- /dev/null +++ b/nemo_reinforcer/data/hf_datasets/chat_templates.py @@ -0,0 +1,9 @@ +## a reference to frequently used chat templates for convenience +class COMMON_CHAT_TEMPLATES: + ### simple template which prepends a role header to the content + simple_role_header = "{% for message in messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}" + + ### passthrough template which just concatenates the content of the messages with no special tokens + passthough_prompt_response = ( + "{% for message in messages %}{{ message['content'] }}{% endfor %}" + ) diff --git a/nemo_reinforcer/data/hf_datasets/interfaces.py b/nemo_reinforcer/data/hf_datasets/interfaces.py deleted file mode 100644 index 63be96c5a7..0000000000 --- a/nemo_reinforcer/data/hf_datasets/interfaces.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from typing import Dict, Any, Optional -from nemo_reinforcer.data.interfaces import TaskDataSpec - - -class COMMON_CHAT_TEMPLATES: - ### simple template which prepends a role header to the content - simple_role_header = "{% for message in messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}" - - -class HfDataset: - """Interface for HuggingFace datasets.""" - - formatted_ds: Dict[str, Any] - - def __init__( - self, - dataset_name: str, - custom_template: Optional[ - str - ] = None, ## "None" means use HuggingFace's tokenizer's template - ): - self.task_spec = TaskDataSpec( - task_name=dataset_name, - custom_template=custom_template, - ) diff --git a/nemo_reinforcer/data/hf_datasets/oasst.py b/nemo_reinforcer/data/hf_datasets/oasst.py index 4525c0fa92..f4e5acecba 100644 --- a/nemo_reinforcer/data/hf_datasets/oasst.py +++ b/nemo_reinforcer/data/hf_datasets/oasst.py @@ -20,7 +20,8 @@ import copy from dataclasses import dataclass from typing import Optional -from nemo_reinforcer.data.hf_datasets.interfaces import HfDataset, COMMON_CHAT_TEMPLATES + +from nemo_reinforcer.data.interfaces import TaskDataSpec SYSTEM_PROMPT = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n\n" @@ -119,10 +120,9 @@ def download_and_process_oasst(output_directory=".", seed=42, split_ratio=0.95): @dataclass -class OasstDataset(HfDataset): +class OasstDataset: def __init__(self, output_dir: str = "."): self.formatted_ds = download_and_process_oasst(output_dir) - super().__init__( - dataset_name="oasst", - custom_template=COMMON_CHAT_TEMPLATES.simple_role_header, + self.task_spec = TaskDataSpec( + task_name="OASST", ) diff --git a/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py b/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py index c3c5126263..57f7384568 100644 --- a/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py +++ b/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py @@ -14,9 +14,10 @@ from typing import Optional from datasets import load_dataset -from nemo_reinforcer.data.hf_datasets.interfaces import HfDataset from dataclasses import dataclass +from nemo_reinforcer.data.interfaces import TaskDataSpec + def format_math(data): return { @@ -62,7 +63,7 @@ def prepare_openinstructmath2_dataset(split: str = "train_1M", seed=42, test_siz @dataclass -class OpenMathInstruct2Dataset(HfDataset): +class OpenMathInstruct2Dataset: def __init__( self, split: str = "train_1M", seed: int = 42, test_size: float = 0.05 ): @@ -82,6 +83,6 @@ def __init__( split=split, seed=seed, test_size=test_size ) - super().__init__( - dataset_name="OpenMathInstruct-2", + self.task_spec = TaskDataSpec( + task_name="OpenMathInstruct-2", ) diff --git a/nemo_reinforcer/data/hf_datasets/squad.py b/nemo_reinforcer/data/hf_datasets/squad.py index 4ba3e87949..3bc257c88a 100644 --- a/nemo_reinforcer/data/hf_datasets/squad.py +++ b/nemo_reinforcer/data/hf_datasets/squad.py @@ -14,7 +14,8 @@ from typing import Optional from datasets import load_dataset -from nemo_reinforcer.data.hf_datasets.interfaces import HfDataset + +from nemo_reinforcer.data.interfaces import TaskDataSpec def format_squad(data): @@ -36,14 +37,10 @@ def format_squad(data): } -class SquadDataset(HfDataset): +class SquadDataset: def __init__(self): original_ds = load_dataset("rajpurkar/squad") self.formatted_ds = original_ds.map(format_squad) - - custom_template = "{% for message in messages %}{%- if message['role'] == 'system' %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user' %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant' %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}" - - super().__init__( - dataset_name="squad", - custom_template=custom_template, + self.task_spec = TaskDataSpec( + task_name="SQuAD", ) diff --git a/nemo_reinforcer/data/interfaces.py b/nemo_reinforcer/data/interfaces.py index ade6f145e8..6ae1152be7 100644 --- a/nemo_reinforcer/data/interfaces.py +++ b/nemo_reinforcer/data/interfaces.py @@ -42,7 +42,6 @@ class TaskDataSpec: prompt_file: Optional[os.PathLike] = None system_prompt_file: Optional[Union[str, os.PathLike]] = None - custom_template: Optional[Union[str, os.PathLike]] = None def __post_init__(self): def load_prompt_file( @@ -66,7 +65,6 @@ def copy_defaults(self, from_spec: "TaskDataSpec"): default_attrs = { "system_prompt": from_spec.system_prompt, "prompt": from_spec.prompt, - "custom_template": from_spec.custom_template, } for attr_name, default_value in default_attrs.items(): diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_reinforcer/data/llm_message_utils.py index 93f6ddc50b..b14956cdbd 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_reinforcer/data/llm_message_utils.py @@ -355,12 +355,10 @@ def get_formatted_message_log( """ new_message_log = [] prev_formatted_message = "" - template = task_data_spec.custom_template for i, message in enumerate(message_log): formatted_message = tokenizer.apply_chat_template( message_log[: i + 1], - chat_template=template, add_generation_prompt=False, tokenize=False, add_special_tokens=False, @@ -376,11 +374,13 @@ def get_formatted_message_log( message_chunk = formatted_message[prev_message_len_no_eos:] if tokenizer.bos_token is not None: + ## TODO: make configurable if i == 0 and not message_chunk.startswith(tokenizer.bos_token): message_chunk = tokenizer.bos_token + message_chunk if i == len(message_log) - 1: message_chunk = message_chunk.rstrip("\n") + ## TODO: make configurable if tokenizer.eos_token is not None and not message_chunk.endswith( tokenizer.eos_token ): diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index 3a316ba3ae..ceed86b213 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -65,6 +65,7 @@ def __repr__(self): def __init__( self, config: PolicyConfig, + tokenizer: AutoTokenizer, weights_path: Optional[str] = None, optimizer_path: Optional[str] = None, init_optimizer: bool = True, @@ -97,7 +98,8 @@ def __init__( ) else: self.reference_model = None - self.tokenizer = AutoTokenizer.from_pretrained(model_name) + + self.tokenizer = tokenizer # If no pad token is defined, you might need: if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token @@ -895,6 +897,7 @@ def __init__( self, cluster: RayVirtualCluster, config: PolicyConfig, + tokenizer: AutoTokenizer, name_prefix: str = "hf_policy", workers_per_node: Optional[Union[int, List[int]]] = None, init_optimizer: bool = True, @@ -910,6 +913,7 @@ def __init__( worker_builder = RayWorkerBuilder( HfPolicyWorker, config, + tokenizer=tokenizer, init_optimizer=init_optimizer, weights_path=weights_path, optimizer_path=optimizer_path, From 4f171273a095359ca5ed6c09afa782d10f52ef23 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Mon, 7 Apr 2025 15:36:46 -0700 Subject: [PATCH 02/23] save hf tokenizer Signed-off-by: ashors1 --- nemo_reinforcer/models/policy/hf_policy.py | 1 + nemo_reinforcer/utils/native_checkpoint.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index f69be02aa7..73c7346440 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -858,6 +858,7 @@ def save_checkpoint( optimizer_path=optimizer_path, save_torch_dist=save_torch_dist, save_hf=save_hf, + tokenizer=self.tokenizer if save_hf else None, ) def load_checkpoint(self, weights_path: str, optimizer_path: Optional[str] = None): diff --git a/nemo_reinforcer/utils/native_checkpoint.py b/nemo_reinforcer/utils/native_checkpoint.py index 6f22ea82fd..fbf13e8fb2 100644 --- a/nemo_reinforcer/utils/native_checkpoint.py +++ b/nemo_reinforcer/utils/native_checkpoint.py @@ -137,6 +137,7 @@ def save_checkpoint( optimizer_path: Optional[str] = None, save_torch_dist: bool = True, save_hf: bool = False, + tokenizer: Optional[Any] = None, ) -> None: """Save a checkpoint of the model and optionally optimizer state. @@ -161,6 +162,9 @@ def save_checkpoint( state_dict=model_state_dict, ) + if tokenizer is not None: + tokenizer.save_pretrained(hf_weights_path) + if save_torch_dist: model_state = {"model": ModelState(model)} dcp.save(model_state, checkpoint_id=weights_path) From 525296a30cf7c82b035e1f167a9a71504022e716 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Fri, 4 Apr 2025 11:21:05 -0700 Subject: [PATCH 03/23] add sft example with json data Signed-off-by: ashors1 --- examples/run_sft.py | 2 ++ nemo_reinforcer/data/hf_datasets/__init__.py | 3 ++- .../data/hf_datasets/json_dataset.py | 26 +++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 nemo_reinforcer/data/hf_datasets/json_dataset.py diff --git a/examples/run_sft.py b/examples/run_sft.py index bb5b4731c7..6e431c7ac4 100644 --- a/examples/run_sft.py +++ b/examples/run_sft.py @@ -102,6 +102,8 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig): data = hf_datasets.OasstDataset(output_dir="/tmp/open_assistant") elif data_cls == "squad": data = hf_datasets.SquadDataset() + elif data_cls == "json_dataset": + data = hf_datasets.JsonDataset(data_config["data_path"]) else: raise ValueError(f"Unknown dataset class: {data_cls}") print( diff --git a/nemo_reinforcer/data/hf_datasets/__init__.py b/nemo_reinforcer/data/hf_datasets/__init__.py index 12d1a0b368..4eb0b68d37 100644 --- a/nemo_reinforcer/data/hf_datasets/__init__.py +++ b/nemo_reinforcer/data/hf_datasets/__init__.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from nemo_reinforcer.data.hf_datasets.json_dataset import JsonDataset from nemo_reinforcer.data.hf_datasets.oasst import OasstDataset from nemo_reinforcer.data.hf_datasets.squad import SquadDataset from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES -__all__ = ["OasstDataset", "SquadDataset", "COMMON_CHAT_TEMPLATES"] +__all__ = ["JsonDataset", "OasstDataset", "SquadDataset", "COMMON_CHAT_TEMPLATES"] diff --git a/nemo_reinforcer/data/hf_datasets/json_dataset.py b/nemo_reinforcer/data/hf_datasets/json_dataset.py new file mode 100644 index 0000000000..1517a4e210 --- /dev/null +++ b/nemo_reinforcer/data/hf_datasets/json_dataset.py @@ -0,0 +1,26 @@ +from datasets import load_dataset +from nemo_reinforcer.data.hf_datasets.interfaces import HfDataset + + +class JsonDataset(HfDataset): + def __init__(self, path: str): + original_dataset = load_dataset("json", data_files=path)["train"] + formatted_dataset = original_dataset.map(self.add_messages_key) + + ## just duplicating the dataset for train and validation for simplicity + self.formatted_ds = { + "train": formatted_dataset, + "validation": formatted_dataset, + } + + super().__init__( + "json_dataset", + ) + + def add_messages_key(self, example): + return { + "messages": [ + {"role": "user", "content": example["input"]}, + {"role": "assistant", "content": example["output"]}, + ] + } From ce04b9bfbba9b7fe8a74f0a8abe371c71548ff1d Mon Sep 17 00:00:00 2001 From: ashors1 Date: Mon, 7 Apr 2025 16:58:30 -0700 Subject: [PATCH 04/23] improved configurability Signed-off-by: ashors1 --- examples/run_sft.py | 12 +++++++-- .../data/hf_datasets/json_dataset.py | 25 +++++++++++++------ nemo_reinforcer/data/llm_message_utils.py | 12 +++++---- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/examples/run_sft.py b/examples/run_sft.py index 6e431c7ac4..f16e50ecfc 100644 --- a/examples/run_sft.py +++ b/examples/run_sft.py @@ -59,7 +59,11 @@ def sft_preprocessor( ) -> DatumSpec: """Process a datum dictionary for SFT training.""" message_log = get_formatted_message_log( - datum_dict["messages"], tokenizer, task_data_spec + datum_dict["messages"], + tokenizer, + task_data_spec, + add_bos_token=True, + add_eos_token=True, ) length = sum(len(m["token_ids"]) for m in message_log) @@ -103,7 +107,11 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig): elif data_cls == "squad": data = hf_datasets.SquadDataset() elif data_cls == "json_dataset": - data = hf_datasets.JsonDataset(data_config["data_path"]) + data = hf_datasets.JsonDataset( + data_config["data_path"], + data_config["input_key"], + data_config["output_key"], + ) else: raise ValueError(f"Unknown dataset class: {data_cls}") print( diff --git a/nemo_reinforcer/data/hf_datasets/json_dataset.py b/nemo_reinforcer/data/hf_datasets/json_dataset.py index 1517a4e210..11dc61ba50 100644 --- a/nemo_reinforcer/data/hf_datasets/json_dataset.py +++ b/nemo_reinforcer/data/hf_datasets/json_dataset.py @@ -3,16 +3,27 @@ class JsonDataset(HfDataset): - def __init__(self, path: str): - original_dataset = load_dataset("json", data_files=path)["train"] - formatted_dataset = original_dataset.map(self.add_messages_key) + def __init__( + self, + train_ds_path: str, + val_ds_path: str, + input_key: str = "input", + output_key: str = "output", + ): + train_original_dataset = load_dataset("json", data_files=train_ds_path)["train"] + val_original_dataset = load_dataset("json", data_files=val_ds_path)["train"] + formatted_train_dataset = train_original_dataset.map(self.add_messages_key) + formatted_val_dataset = val_original_dataset.map(self.add_messages_key) ## just duplicating the dataset for train and validation for simplicity self.formatted_ds = { - "train": formatted_dataset, - "validation": formatted_dataset, + "train": formatted_train_dataset, + "validation": formatted_val_dataset, } + self.input_key = input_key + self.output_key = output_key + super().__init__( "json_dataset", ) @@ -20,7 +31,7 @@ def __init__(self, path: str): def add_messages_key(self, example): return { "messages": [ - {"role": "user", "content": example["input"]}, - {"role": "assistant", "content": example["output"]}, + {"role": "user", "content": example[self.input_key]}, + {"role": "assistant", "content": example[self.output_key]}, ] } diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_reinforcer/data/llm_message_utils.py index b14956cdbd..0ffbf30364 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_reinforcer/data/llm_message_utils.py @@ -342,6 +342,8 @@ def get_formatted_message_log( message_log: LLMMessageLogType, tokenizer, task_data_spec: TaskDataSpec, + add_bos_token: bool = True, + add_eos_token: bool = True, ) -> LLMMessageLogType: """Format and tokenize chat messages using the specified template. @@ -373,16 +375,16 @@ def get_formatted_message_log( ## pull out the chunk corresponding to the current message message_chunk = formatted_message[prev_message_len_no_eos:] - if tokenizer.bos_token is not None: - ## TODO: make configurable + if add_bos_token and tokenizer.bos_token is not None: if i == 0 and not message_chunk.startswith(tokenizer.bos_token): message_chunk = tokenizer.bos_token + message_chunk if i == len(message_log) - 1: message_chunk = message_chunk.rstrip("\n") - ## TODO: make configurable - if tokenizer.eos_token is not None and not message_chunk.endswith( - tokenizer.eos_token + if ( + add_eos_token + and tokenizer.eos_token is not None + and not message_chunk.endswith(tokenizer.eos_token) ): message_chunk += tokenizer.eos_token From 1597d0fb124e66ca488c35e188906f71e6924608 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Mon, 7 Apr 2025 17:01:10 -0700 Subject: [PATCH 05/23] fixes Signed-off-by: ashors1 --- examples/run_sft.py | 12 +++++---- nemo_reinforcer/data/hf_datasets/__init__.py | 11 ++++++-- .../data/hf_datasets/chat_templates.py | 16 ++++++++++- ..._dataset.py => prompt_response_dataset.py} | 27 ++++++++++++++----- nemo_reinforcer/data/llm_message_utils.py | 10 ++++--- nemo_reinforcer/models/policy/__init__.py | 7 ++++- nemo_reinforcer/models/policy/hf_policy.py | 4 +-- 7 files changed, 67 insertions(+), 20 deletions(-) rename nemo_reinforcer/data/hf_datasets/{json_dataset.py => prompt_response_dataset.py} (60%) diff --git a/examples/run_sft.py b/examples/run_sft.py index f16e50ecfc..b5159a9c64 100644 --- a/examples/run_sft.py +++ b/examples/run_sft.py @@ -29,6 +29,7 @@ from nemo_reinforcer.distributed.virtual_cluster import init_ray from nemo_reinforcer.utils.config import load_config from nemo_reinforcer.utils.logger import get_next_experiment_dir +from nemo_reinforcer.models.policy import TokenizerConfig def parse_args(): @@ -90,7 +91,7 @@ def sft_preprocessor( def setup_tokenizer(tokenizer_config: TokenizerConfig): tokenizer = get_tokenizer(tokenizer_config["name"]) if "chat_template" in tokenizer_config: - if tokenizer_config["chat_template"].lower() in {"none", "null"}: + if tokenizer_config["chat_template"] is None: tokenizer.chat_template = ( hf_datasets.COMMON_CHAT_TEMPLATES.passthrough_prompt_response ) @@ -106,9 +107,10 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig): data = hf_datasets.OasstDataset(output_dir="/tmp/open_assistant") elif data_cls == "squad": data = hf_datasets.SquadDataset() - elif data_cls == "json_dataset": - data = hf_datasets.JsonDataset( - data_config["data_path"], + elif data_cls == "prompt_response_dataset": + data = hf_datasets.PromptResponseDataset( + data_config["train_data_path"], + data_config["val_data_path"], data_config["input_key"], data_config["output_key"], ) @@ -173,7 +175,7 @@ def main(): init_ray() # setup tokenizer - tokenizer = setup_tokenizer(config["tokenizer"]) + tokenizer = setup_tokenizer(config["policy"]["tokenizer"]) # setup data ( diff --git a/nemo_reinforcer/data/hf_datasets/__init__.py b/nemo_reinforcer/data/hf_datasets/__init__.py index 4eb0b68d37..919f1a494e 100644 --- a/nemo_reinforcer/data/hf_datasets/__init__.py +++ b/nemo_reinforcer/data/hf_datasets/__init__.py @@ -12,9 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nemo_reinforcer.data.hf_datasets.json_dataset import JsonDataset +from nemo_reinforcer.data.hf_datasets.prompt_response_dataset import ( + PromptResponseDataset, +) from nemo_reinforcer.data.hf_datasets.oasst import OasstDataset from nemo_reinforcer.data.hf_datasets.squad import SquadDataset from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES -__all__ = ["JsonDataset", "OasstDataset", "SquadDataset", "COMMON_CHAT_TEMPLATES"] +__all__ = [ + "OasstDataset", + "PromptResponseDataset", + "SquadDataset", + "COMMON_CHAT_TEMPLATES", +] diff --git a/nemo_reinforcer/data/hf_datasets/chat_templates.py b/nemo_reinforcer/data/hf_datasets/chat_templates.py index 0454ae68e4..8e7ad957a6 100644 --- a/nemo_reinforcer/data/hf_datasets/chat_templates.py +++ b/nemo_reinforcer/data/hf_datasets/chat_templates.py @@ -1,9 +1,23 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + ## a reference to frequently used chat templates for convenience class COMMON_CHAT_TEMPLATES: ### simple template which prepends a role header to the content simple_role_header = "{% for message in messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}" ### passthrough template which just concatenates the content of the messages with no special tokens - passthough_prompt_response = ( + passthrough_prompt_response = ( "{% for message in messages %}{{ message['content'] }}{% endfor %}" ) diff --git a/nemo_reinforcer/data/hf_datasets/json_dataset.py b/nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py similarity index 60% rename from nemo_reinforcer/data/hf_datasets/json_dataset.py rename to nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py index 11dc61ba50..c9ff14d348 100644 --- a/nemo_reinforcer/data/hf_datasets/json_dataset.py +++ b/nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py @@ -1,8 +1,22 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from datasets import load_dataset -from nemo_reinforcer.data.hf_datasets.interfaces import HfDataset +from nemo_reinforcer.data.interfaces import TaskDataSpec -class JsonDataset(HfDataset): +class PromptResponseDataset: def __init__( self, train_ds_path: str, @@ -12,6 +26,10 @@ def __init__( ): train_original_dataset = load_dataset("json", data_files=train_ds_path)["train"] val_original_dataset = load_dataset("json", data_files=val_ds_path)["train"] + + self.input_key = input_key + self.output_key = output_key + formatted_train_dataset = train_original_dataset.map(self.add_messages_key) formatted_val_dataset = val_original_dataset.map(self.add_messages_key) @@ -21,10 +39,7 @@ def __init__( "validation": formatted_val_dataset, } - self.input_key = input_key - self.output_key = output_key - - super().__init__( + self.task_spec = TaskDataSpec( "json_dataset", ) diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_reinforcer/data/llm_message_utils.py index 0ffbf30364..e88e0ab9d9 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_reinforcer/data/llm_message_utils.py @@ -375,17 +375,21 @@ def get_formatted_message_log( ## pull out the chunk corresponding to the current message message_chunk = formatted_message[prev_message_len_no_eos:] - if add_bos_token and tokenizer.bos_token is not None: - if i == 0 and not message_chunk.startswith(tokenizer.bos_token): + if i == 0: + if ( + add_bos_token + and tokenizer.bos_token is not None + and not message_chunk.startswith(tokenizer.bos_token) + ): message_chunk = tokenizer.bos_token + message_chunk if i == len(message_log) - 1: - message_chunk = message_chunk.rstrip("\n") if ( add_eos_token and tokenizer.eos_token is not None and not message_chunk.endswith(tokenizer.eos_token) ): + message_chunk = message_chunk.rstrip("\n") message_chunk += tokenizer.eos_token new_message = message.copy() diff --git a/nemo_reinforcer/models/policy/__init__.py b/nemo_reinforcer/models/policy/__init__.py index 24390b9670..43c9422e2a 100644 --- a/nemo_reinforcer/models/policy/__init__.py +++ b/nemo_reinforcer/models/policy/__init__.py @@ -17,9 +17,14 @@ from nemo_reinforcer.models.generation.interfaces import GenerationConfig +class TokenizerConfig(TypedDict): + name: str + chat_template: str + + class PolicyConfig(TypedDict): model_name: str - tokenizer_name: str + tokenizer: TokenizerConfig train_global_batch_size: int train_micro_batch_size: int learning_rate: float diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index 73c7346440..d7a0a50b0d 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -26,7 +26,7 @@ MixedPrecision, ) from torch.distributed.fsdp.wrap import size_based_auto_wrap_policy -from transformers import AutoModelForCausalLM +from transformers import AutoModelForCausalLM, AutoTokenizer from nemo_reinforcer.algorithms.interfaces import LossFunction from nemo_reinforcer.algorithms.utils import get_tokenizer @@ -80,7 +80,7 @@ def __init__( rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() model_name = self.cfg["model_name"] - tokenizer_name = self.cfg["tokenizer_name"] + tokenizer_name = self.cfg["tokenizer"]["name"] if self.cfg["precision"] == "float32": self.dtype = torch.float32 elif self.cfg["precision"] == "bfloat16": From 70f11a3f563d0f10d11364b3329ddea73f8d4789 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Mon, 7 Apr 2025 22:19:39 -0700 Subject: [PATCH 06/23] update grpo and clean up Signed-off-by: ashors1 --- examples/configs/grpo_math_1B.yaml | 3 ++- examples/configs/grpo_math_8B.yaml | 3 ++- examples/run_grpo_math.py | 4 +--- examples/run_sft.py | 17 ++--------------- nemo_reinforcer/algorithms/grpo.py | 1 + nemo_reinforcer/algorithms/sft.py | 2 +- nemo_reinforcer/algorithms/utils.py | 14 ++++++++++++-- 7 files changed, 21 insertions(+), 23 deletions(-) diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml index 3d8fdfce43..d781b90414 100644 --- a/examples/configs/grpo_math_1B.yaml +++ b/examples/configs/grpo_math_1B.yaml @@ -25,7 +25,8 @@ checkpointing: policy: model_name: "meta-llama/Llama-3.2-1B-Instruct" - tokenizer_name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default + tokenizer: + name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default train_global_batch_size: 512 train_micro_batch_size: 4 generation_batch_size: 32 # Only used when generating using HF backend diff --git a/examples/configs/grpo_math_8B.yaml b/examples/configs/grpo_math_8B.yaml index f2e0576fbc..64e2f2aff5 100644 --- a/examples/configs/grpo_math_8B.yaml +++ b/examples/configs/grpo_math_8B.yaml @@ -7,7 +7,8 @@ grpo: policy: model_name: "meta-llama/Llama-3.1-8B-Instruct" - tokenizer_name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default + tokenizer: + name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default train_global_batch_size: 512 train_micro_batch_size: 1 generation_batch_size: 32 # Only used when generating using HF backend diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py index c6dea5609c..e147d167f9 100644 --- a/examples/run_grpo_math.py +++ b/examples/run_grpo_math.py @@ -64,7 +64,6 @@ def openinstructmath2_data_processor( problem = user_message[0]["content"] extra_env_info = {"ground_truth": user_message[1]["content"]} - template = task_data_spec.custom_template message_log: LLMMessageLogType = [] user_message = { "role": "user", @@ -72,7 +71,6 @@ def openinstructmath2_data_processor( } message = tokenizer.apply_chat_template( [user_message], - chat_template=template, tokenize=False, add_generation_prompt=True, add_special_tokens=False, @@ -254,7 +252,7 @@ def main(): init_ray() # setup tokenizer - tokenizer = get_tokenizer(config["policy"]["model_name"]) + tokenizer = get_tokenizer(config["policy"]["tokenizer"]) config["policy"]["generation"] = configure_generation_config( config["policy"]["generation"], tokenizer ) diff --git a/examples/run_sft.py b/examples/run_sft.py index b5159a9c64..be56330c9a 100644 --- a/examples/run_sft.py +++ b/examples/run_sft.py @@ -29,7 +29,6 @@ from nemo_reinforcer.distributed.virtual_cluster import init_ray from nemo_reinforcer.utils.config import load_config from nemo_reinforcer.utils.logger import get_next_experiment_dir -from nemo_reinforcer.models.policy import TokenizerConfig def parse_args(): @@ -88,18 +87,6 @@ def sft_preprocessor( return output -def setup_tokenizer(tokenizer_config: TokenizerConfig): - tokenizer = get_tokenizer(tokenizer_config["name"]) - if "chat_template" in tokenizer_config: - if tokenizer_config["chat_template"] is None: - tokenizer.chat_template = ( - hf_datasets.COMMON_CHAT_TEMPLATES.passthrough_prompt_response - ) - else: - tokenizer.chat_template = tokenizer_config["chat_template"] - return tokenizer - - def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig): print("\n▶ Setting up data...") data_cls = data_config["dataset_name"] @@ -175,7 +162,7 @@ def main(): init_ray() # setup tokenizer - tokenizer = setup_tokenizer(config["policy"]["tokenizer"]) + tokenizer = get_tokenizer(config["policy"]["tokenizer"]) # setup data ( @@ -194,7 +181,7 @@ def main(): checkpointer, sft_save_state, master_config, - ) = setup(config, dataset, val_dataset, tokenizer) + ) = setup(config, tokenizer, dataset, val_dataset) sft_train( policy, train_dataloader, diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index 0eda853375..a89a29fa97 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -236,6 +236,7 @@ def setup( policy = HfPolicy( cluster=cluster, config=policy_config, + tokenizer=tokenizer, weights_path=Path(last_checkpoint_path) / "policy" / "weights" if last_checkpoint_path else None, diff --git a/nemo_reinforcer/algorithms/sft.py b/nemo_reinforcer/algorithms/sft.py index 76ec5777a4..0bb42bd27a 100644 --- a/nemo_reinforcer/algorithms/sft.py +++ b/nemo_reinforcer/algorithms/sft.py @@ -77,9 +77,9 @@ class MasterConfig(TypedDict): # ======================================================= def setup( master_config: MasterConfig, + tokenizer: AutoTokenizer, train_dataset: AllTaskProcessedDataset, val_dataset: AllTaskProcessedDataset, - tokenizer: AutoTokenizer, ) -> Tuple[ HfPolicy, RayVirtualCluster, diff --git a/nemo_reinforcer/algorithms/utils.py b/nemo_reinforcer/algorithms/utils.py index a3c42e2a19..6441d280ff 100644 --- a/nemo_reinforcer/algorithms/utils.py +++ b/nemo_reinforcer/algorithms/utils.py @@ -20,6 +20,9 @@ from torch.masked import as_masked_tensor from transformers import AutoTokenizer +from nemo_reinforcer.data import hf_datasets +from nemo_reinforcer.models.policy import TokenizerConfig + def calculate_kl_penalty_joschu2020( logprobs_policy: torch.Tensor, logprobs_reference: torch.Tensor @@ -133,9 +136,16 @@ def set_seed(seed: int): torch.cuda.manual_seed_all(seed) -def get_tokenizer(model_name: str) -> AutoTokenizer: +def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: """Get the tokenizer and set pad token to eos token if it is not already set.""" - tokenizer = AutoTokenizer.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(tokenizer_config["name"]) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token + if "chat_template" in tokenizer_config: + if tokenizer_config["chat_template"] is None: + tokenizer.chat_template = ( + hf_datasets.COMMON_CHAT_TEMPLATES.passthrough_prompt_response + ) + else: + tokenizer.chat_template = tokenizer_config["chat_template"] return tokenizer From 83552a65ceef8c927e0fcf6aa5bc5f8b4488e2ed Mon Sep 17 00:00:00 2001 From: ashors1 Date: Mon, 7 Apr 2025 22:55:39 -0700 Subject: [PATCH 07/23] fix unit tests Signed-off-by: ashors1 --- nemo_reinforcer/data/llm_message_utils.py | 2 +- .../models/generation/test_vllm_generation.py | 15 ++++++------- .../unit/models/policy/test_hf_ray_policy.py | 21 ++++++++++++------- tests/unit/utils/test_native_checkpoint.py | 14 ++++++++----- 4 files changed, 32 insertions(+), 20 deletions(-) diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_reinforcer/data/llm_message_utils.py index e88e0ab9d9..e7ba160c1c 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_reinforcer/data/llm_message_utils.py @@ -384,12 +384,12 @@ def get_formatted_message_log( message_chunk = tokenizer.bos_token + message_chunk if i == len(message_log) - 1: + message_chunk = message_chunk.rstrip("\n") if ( add_eos_token and tokenizer.eos_token is not None and not message_chunk.endswith(tokenizer.eos_token) ): - message_chunk = message_chunk.rstrip("\n") message_chunk += tokenizer.eos_token new_message = message.copy() diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index ba1bade3fc..0ae1c2e9ad 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -29,7 +29,9 @@ basic_vllm_test_config: VllmConfig = { "backend": "vllm", "model_name": "meta-llama/Llama-3.2-1B", # Small model for testing - "tokenizer_name": "meta-llama/Llama-3.2-1B", + "tokenizer": { + "name": "meta-llama/Llama-3.2-1B", + }, "dtype": "bfloat16", "max_new_tokens": 10, "temperature": 1.0, @@ -70,8 +72,7 @@ def cluster(): @pytest.fixture(scope="function") def tokenizer(): """Initialize tokenizer for the test model.""" - model_name = basic_vllm_test_config["model_name"] - tokenizer = get_tokenizer(model_name) + tokenizer = get_tokenizer(basic_vllm_test_config["tokenizer"]) return tokenizer @@ -205,7 +206,7 @@ def test_vllm_generation_with_hf_training(cluster, tokenizer): # Create HF-specific config with required parameters hf_config = { "model_name": basic_vllm_test_config["model_name"], - "tokenizer_name": basic_vllm_test_config["tokenizer_name"], + "tokenizer": basic_vllm_test_config["tokenizer"], # Required training parameters "train_global_batch_size": 4, "train_micro_batch_size": 1, @@ -276,7 +277,7 @@ def test_vllm_generation_with_hf_training(cluster, tokenizer): vllm_policy.finish_generation() print("Creating HF policy...") - hf_policy = HfPolicy(cluster, hf_config) + hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") ipc_handles = hf_policy.get_weights_ipc_handles() @@ -509,7 +510,7 @@ def test_vllm_weight_update_and_prefix_cache_reset( hf_config = { "model_name": basic_vllm_test_config["model_name"], - "tokenizer_name": "meta-llama/Llama-3.2-1B", + "tokenizer": basic_vllm_test_config["tokenizer"], "train_global_batch_size": 1, "train_micro_batch_size": 1, "learning_rate": 1e-6, @@ -525,7 +526,7 @@ def test_vllm_weight_update_and_prefix_cache_reset( hf_policy = None try: print(f"Creating HF policy for TP={tensor_parallel_size}...") - hf_policy = HfPolicy(cluster, hf_config) + hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"Creating vLLM policy for TP={tensor_parallel_size}...") vllm_policy = VllmGeneration(cluster, vllm_config) diff --git a/tests/unit/models/policy/test_hf_ray_policy.py b/tests/unit/models/policy/test_hf_ray_policy.py index 7cde591049..6753b04c5a 100644 --- a/tests/unit/models/policy/test_hf_ray_policy.py +++ b/tests/unit/models/policy/test_hf_ray_policy.py @@ -28,7 +28,7 @@ basic_llama_test_config: PolicyConfig = { "model_name": "meta-llama/Llama-3.2-1B", - "tokenizer_name": "meta-llama/Llama-3.2-1B", + "tokenizer": {"name": "meta-llama/Llama-3.2-1B"}, "generation_batch_size": 1, # Small batch size for testing "train_global_batch_size": 4, "train_micro_batch_size": 1, @@ -71,8 +71,7 @@ def gc_collect(): @pytest.fixture(scope="function") def tokenizer(): """Initialize tokenizer for the test model.""" - model_name = basic_llama_test_config["model_name"] - tokenizer = get_tokenizer(model_name) + tokenizer = get_tokenizer(basic_llama_test_config["tokenizer"]) return tokenizer @@ -97,7 +96,7 @@ def policy_setup(tokenizer): config["generation"] = configure_generation_config(config["generation"], tokenizer) print("Creating HfPolicy...") - policy = HfPolicy(cluster=cluster, config=config) + policy = HfPolicy(cluster=cluster, config=config, tokenizer=tokenizer) yield policy, cluster @@ -189,7 +188,7 @@ def test_hf_policy_init(policy_setup): @pytest.fixture -def training_setup(): +def training_setup(tokenizer): """Setup and teardown specifically for training tests.""" policy = None cluster = None @@ -212,7 +211,12 @@ def training_setup(): config = basic_llama_test_config print("Creating training HfPolicy...") - policy = HfPolicy(cluster=cluster, config=config, init_reference_model=False) + policy = HfPolicy( + cluster=cluster, + config=config, + init_reference_model=False, + tokenizer=tokenizer, + ) # Create a test batch print("Creating test batch...") @@ -315,7 +319,10 @@ def generation_setup(request, tokenizer): print("Creating generation HfPolicy...") policy = HfPolicy( - cluster=cluster, config=config, init_reference_model=request.param + cluster=cluster, + config=config, + tokenizer=tokenizer, + init_reference_model=request.param, ) # Create a test batch diff --git a/tests/unit/utils/test_native_checkpoint.py b/tests/unit/utils/test_native_checkpoint.py index 8f71badea1..aa913f8a97 100755 --- a/tests/unit/utils/test_native_checkpoint.py +++ b/tests/unit/utils/test_native_checkpoint.py @@ -17,6 +17,7 @@ import torch from tempfile import TemporaryDirectory +from nemo_reinforcer.algorithms.utils import get_tokenizer from nemo_reinforcer.distributed.batched_data_dict import BatchedDataDict from nemo_reinforcer.distributed.virtual_cluster import RayVirtualCluster from nemo_reinforcer.models.policy.hf_policy import HfPolicy @@ -31,7 +32,9 @@ # Define basic test config simple_policy_config = { "model_name": "meta-llama/Llama-3.2-1B", # "hf-internal-testing/tiny-random-Gemma3ForCausalLM", - "tokenizer_name": "meta-llama/Llama-3.2-1B", # "hf-internal-testing/tiny-random-Gemma3ForCausalLM", + "tokenizer": { + "name": "meta-llama/Llama-3.2-1B", + }, "train_global_batch_size": 32, "train_micro_batch_size": 1, "logprob_batch_size": 1, @@ -75,10 +78,7 @@ def cluster(): @pytest.fixture(scope="function") def tokenizer(): """Initialize tokenizer for the test model.""" - tokenizer_name = simple_policy_config["tokenizer_name"] - tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, trust_remote_code=True) - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token + tokenizer = get_tokenizer(simple_policy_config["tokenizer"]) return tokenizer @@ -87,6 +87,7 @@ def policy(cluster, tokenizer): """Initialize the policy.""" return HfPolicy( cluster=cluster, + tokenizer=tokenizer, config=simple_policy_config, init_optimizer=False, init_reference_model=False, @@ -273,6 +274,9 @@ def test_save_and_load_hf_checkpoint(policy): "model-00001-of-00002.safetensors", "model-00002-of-00002.safetensors", "model.safetensors.index.json", + "tokenizer_config.json", + "tokenizer.json", + "special_tokens_map.json", } coverted_model = AutoModelForCausalLM.from_pretrained( From 23f06d278cd54e19c8906ad15e0ef2ff0013efdf Mon Sep 17 00:00:00 2001 From: ashors1 Date: Wed, 9 Apr 2025 15:08:15 -0700 Subject: [PATCH 08/23] address comments Signed-off-by: ashors1 --- examples/run_sft.py | 19 +++++++++++++++---- nemo_reinforcer/data/__init__.py | 2 ++ nemo_reinforcer/data/hf_datasets/oasst.py | 1 - .../data/hf_datasets/openmathinstruct2.py | 1 - 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/examples/run_sft.py b/examples/run_sft.py index be56330c9a..1b812b71a2 100644 --- a/examples/run_sft.py +++ b/examples/run_sft.py @@ -15,6 +15,7 @@ import argparse import os import pprint +from functools import partial from typing import Dict, Any from omegaconf import OmegaConf @@ -56,14 +57,16 @@ def sft_preprocessor( tokenizer, max_seq_length: int, idx: int, + add_bos: bool = True, + add_eos: bool = True, ) -> DatumSpec: """Process a datum dictionary for SFT training.""" message_log = get_formatted_message_log( datum_dict["messages"], tokenizer, task_data_spec, - add_bos_token=True, - add_eos_token=True, + add_bos_token=add_bos, + add_eos_token=add_eos, ) length = sum(len(m["token_ids"]) for m in message_log) @@ -115,7 +118,11 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig): train_dataset, tokenizer, sft_task_spec, - sft_preprocessor, + partial( + sft_preprocessor, + add_bos=data_config.get("add_bos", True), + add_eos=data_config.get("add_eos", True), + ), max_seq_length=data_config["max_input_seq_length"], ) @@ -123,7 +130,11 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig): val_dataset, tokenizer, sft_task_spec, - sft_preprocessor, + partial( + sft_preprocessor, + add_bos=data_config.get("add_bos", True), + add_eos=data_config.get("add_eos", True), + ), max_seq_length=data_config["max_input_seq_length"], ) diff --git a/nemo_reinforcer/data/__init__.py b/nemo_reinforcer/data/__init__.py index 09eaf35fb5..eba1eaf7ce 100644 --- a/nemo_reinforcer/data/__init__.py +++ b/nemo_reinforcer/data/__init__.py @@ -21,6 +21,8 @@ class DataConfig(TypedDict): system_prompt_file: Optional[str] dataset_name: str val_dataset_name: Optional[str] + add_bos: Optional[bool] + add_eos: Optional[bool] class MathDataConfig(DataConfig): diff --git a/nemo_reinforcer/data/hf_datasets/oasst.py b/nemo_reinforcer/data/hf_datasets/oasst.py index f4e5acecba..3d10c46e37 100644 --- a/nemo_reinforcer/data/hf_datasets/oasst.py +++ b/nemo_reinforcer/data/hf_datasets/oasst.py @@ -119,7 +119,6 @@ def download_and_process_oasst(output_directory=".", seed=42, split_ratio=0.95): return formatted_ds -@dataclass class OasstDataset: def __init__(self, output_dir: str = "."): self.formatted_ds = download_and_process_oasst(output_dir) diff --git a/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py b/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py index 57f7384568..b5a0bfa1bc 100644 --- a/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py +++ b/nemo_reinforcer/data/hf_datasets/openmathinstruct2.py @@ -62,7 +62,6 @@ def prepare_openinstructmath2_dataset(split: str = "train_1M", seed=42, test_siz } -@dataclass class OpenMathInstruct2Dataset: def __init__( self, split: str = "train_1M", seed: int = 42, test_size: float = 0.05 From 58fe349fce573c600dc35bbb4bd0e5748fa1dd6c Mon Sep 17 00:00:00 2001 From: ashors1 Date: Fri, 11 Apr 2025 11:12:11 -0700 Subject: [PATCH 09/23] add unit tests and documentation Signed-off-by: ashors1 --- nemo_reinforcer/algorithms/utils.py | 58 +++++++++++++- tests/unit/algorithms/test_utils.py | 115 ++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+), 1 deletion(-) create mode 100755 tests/unit/algorithms/test_utils.py diff --git a/nemo_reinforcer/algorithms/utils.py b/nemo_reinforcer/algorithms/utils.py index 6441d280ff..66d7c0e0c2 100644 --- a/nemo_reinforcer/algorithms/utils.py +++ b/nemo_reinforcer/algorithms/utils.py @@ -137,15 +137,71 @@ def set_seed(seed: int): def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: - """Get the tokenizer and set pad token to eos token if it is not already set.""" + """Get the tokenizer and set pad token to eos token if it is not already set. + + This function initializes a tokenizer from the Hugging Face transformers library + and configures it with appropriate chat templates and padding tokens. + + Args: + tokenizer_config: A dictionary containing tokenizer configuration. + Required keys: + - name: The name or path of the pretrained tokenizer + Optional keys: + - chat_template: The chat template to use. Can be: + - None: Uses a passthrough template that just returns message content + - "default": Uses the tokenizer's default template + - A custom jinja2 template string + If not specified, the tokenizer's default template will be used. + + Returns: + AutoTokenizer: The configured tokenizer instance + + Examples: + >>> from transformers import AutoTokenizer + >>> from nemo_reinforcer.models.policy import TokenizerConfig + >>> # not specifying a chat template uses the tokenizer's default + >>> config = {"name": "meta-llama/Llama-3.2-1B-Instruct"} + >>> tokenizer = get_tokenizer(config) + >>> messages = [ + ... {"role": "system", "content": "You are a helpful AI assistant."}, + ... {"role": "user", "content": "Hello!"} + ... ] + >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) + >>> assert formatted == AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct").apply_chat_template(messages, tokenize=False) + + >>> # Using a passthrough template + >>> config = { + ... "name": "meta-llama/Llama-3.2-1B-Instruct", + ... "chat_template": None + ... } + >>> tokenizer = get_tokenizer(config) + >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) + >>> assert formatted == "".join(msg["content"] for msg in messages) + + >>> # Using a custom template + >>> config = { + ... "name": "meta-llama/Llama-3.2-1B-Instruct", + ... "chat_template": "{% for message in messages %}{{ ' START: ' + message['content'] + ' END.' }}{% endfor %}" + ... } + >>> tokenizer = get_tokenizer(config) + >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) + >>> assert formatted == " START: You are a helpful AI assistant. END. START: Hello! END." + """ tokenizer = AutoTokenizer.from_pretrained(tokenizer_config["name"]) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token if "chat_template" in tokenizer_config: if tokenizer_config["chat_template"] is None: + print("Using passthrough chat template") tokenizer.chat_template = ( hf_datasets.COMMON_CHAT_TEMPLATES.passthrough_prompt_response ) + elif tokenizer_config["chat_template"].lower() == "default": + print("Using tokenizer's default chat template") else: + print("Using custom chat template") tokenizer.chat_template = tokenizer_config["chat_template"] + else: + print("No chat template provided, using tokenizer's default") + return tokenizer diff --git a/tests/unit/algorithms/test_utils.py b/tests/unit/algorithms/test_utils.py new file mode 100755 index 0000000000..ce517a2b3a --- /dev/null +++ b/tests/unit/algorithms/test_utils.py @@ -0,0 +1,115 @@ +import pytest +from datetime import datetime +from transformers import AutoTokenizer +from nemo_reinforcer.algorithms.utils import get_tokenizer +from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES + + +@pytest.fixture +def conversation_messages(): + """Fixture providing a multi-turn conversation for testing chat templates""" + return [ + {"role": "system", "content": "You are a helpful AI assistant."}, + {"role": "user", "content": "What's the weather like today?"}, + { + "role": "assistant", + "content": "I don't have access to real-time weather data.", + }, + {"role": "user", "content": "Can you help me with something else then?"}, + {"role": "assistant", "content": "Of course! What would you like help with?"}, + ] + + +def get_expected_llama_format(messages): + """Generate the expected output format for Llama's chat template""" + # Extract the date from the formatted output + # Get current date + current_date = datetime.now() + formatted_date = current_date.strftime("%d %b %Y") + + # Extract system message if present + if messages[0]["role"] == "system": + system_message = messages[0]["content"].strip() + messages = messages[1:] + else: + system_message = "" + + # Start with BOS token and system header + expected = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n" + expected += "Cutting Knowledge Date: December 2023\n" + expected += f"Today Date: {formatted_date}\n\n" + expected += f"{system_message}<|eot_id|>" + + # Add each message + for message in messages: + if message["role"] not in ["ipython", "tool"]: + expected += f"<|start_header_id|>{message['role']}<|end_header_id|>\n\n" + expected += f"{message['content'].strip()}<|eot_id|>" + + return expected + + +def get_format_with_simple_role_header(messages): + message = "<|begin_of_text|>" + for msg in messages: + message += ( + "<|start_header_id|>" + + msg["role"] + + "<|end_header_id|>\n\n" + + msg["content"].strip() + + "<|eot_id|>" + ) + return message + + +def test_get_tokenizer_no_chat_template(conversation_messages): + """Test get_tokenizer when no chat template is specified in config""" + config = {"name": "meta-llama/Llama-3.2-1B-Instruct"} + tokenizer = get_tokenizer(config) + + # Verify that the tokenizer's default template is used + formatted = tokenizer.apply_chat_template(conversation_messages, tokenize=False) + + expected = get_expected_llama_format(conversation_messages) + assert formatted == expected + + +def test_get_tokenizer_default_chat_template(conversation_messages): + """Test get_tokenizer when chat_template is 'default' in config""" + config = {"name": "meta-llama/Llama-3.2-1B-Instruct", "chat_template": "default"} + tokenizer = get_tokenizer(config) + + # Verify that the tokenizer's default template is used + formatted = tokenizer.apply_chat_template(conversation_messages, tokenize=False) + expected = get_expected_llama_format(conversation_messages) + assert formatted == expected + + +def test_get_tokenizer_null_chat_template(conversation_messages): + """Test get_tokenizer when chat_template is None in config""" + config = {"name": "meta-llama/Llama-3.2-1B-Instruct", "chat_template": None} + tokenizer = get_tokenizer(config) + + # Verify that the passthrough template is used + formatted = tokenizer.apply_chat_template(conversation_messages, tokenize=False) + + expected = "".join(msg["content"] for msg in conversation_messages) + + assert ( + formatted == expected + ) # Passthrough template should just return the last content + + +def test_get_tokenizer_custom_jinja_template(conversation_messages): + """Test get_tokenizer when a custom jinja template is specified""" + custom_template = COMMON_CHAT_TEMPLATES.simple_role_header + config = { + "name": "meta-llama/Llama-3.2-1B-Instruct", + "chat_template": custom_template, + } + tokenizer = get_tokenizer(config) + + # Verify that the custom template is used + formatted = tokenizer.apply_chat_template(conversation_messages, tokenize=False) + expected = get_format_with_simple_role_header(conversation_messages) + assert formatted == expected # Should use our custom template From 956c7f08d8b71437a02f4832536424ba6292a258 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Fri, 11 Apr 2025 11:14:32 -0700 Subject: [PATCH 10/23] copyright header Signed-off-by: ashors1 --- tests/unit/algorithms/test_utils.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/unit/algorithms/test_utils.py b/tests/unit/algorithms/test_utils.py index ce517a2b3a..c3cc381fe9 100755 --- a/tests/unit/algorithms/test_utils.py +++ b/tests/unit/algorithms/test_utils.py @@ -1,3 +1,17 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest from datetime import datetime from transformers import AutoTokenizer @@ -95,9 +109,7 @@ def test_get_tokenizer_null_chat_template(conversation_messages): expected = "".join(msg["content"] for msg in conversation_messages) - assert ( - formatted == expected - ) # Passthrough template should just return the last content + assert formatted == expected def test_get_tokenizer_custom_jinja_template(conversation_messages): @@ -112,4 +124,4 @@ def test_get_tokenizer_custom_jinja_template(conversation_messages): # Verify that the custom template is used formatted = tokenizer.apply_chat_template(conversation_messages, tokenize=False) expected = get_format_with_simple_role_header(conversation_messages) - assert formatted == expected # Should use our custom template + assert formatted == expected From 4913f2589996c689607cf1686f3faf89235e07ff Mon Sep 17 00:00:00 2001 From: ashors1 Date: Fri, 11 Apr 2025 12:44:10 -0700 Subject: [PATCH 11/23] address comments Signed-off-by: ashors1 --- examples/configs/sft.yaml | 2 + examples/run_sft.py | 4 +- nemo_reinforcer/algorithms/grpo.py | 3 + nemo_reinforcer/algorithms/sft.py | 3 + nemo_reinforcer/algorithms/utils.py | 2 + nemo_reinforcer/data/llm_message_utils.py | 26 +++--- nemo_reinforcer/models/policy/hf_policy.py | 4 +- nemo_reinforcer/utils/native_checkpoint.py | 13 ++- .../data/hf_datasets/test_prompt_response.py | 90 +++++++++++++++++++ .../test_squad.py} | 5 +- 10 files changed, 132 insertions(+), 20 deletions(-) create mode 100644 tests/unit/data/hf_datasets/test_prompt_response.py rename tests/unit/data/{test_hf_datasets.py => hf_datasets/test_squad.py} (80%) diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml index 973c9fee78..dad99b2479 100644 --- a/examples/configs/sft.yaml +++ b/examples/configs/sft.yaml @@ -37,6 +37,8 @@ policy: data: max_input_seq_length: ${policy.max_total_sequence_length} dataset_name: "squad" + add_bos: true + add_eos: true logger: log_dir: "logs" # Base directory for all logs diff --git a/examples/run_sft.py b/examples/run_sft.py index 1b812b71a2..875aa9a000 100644 --- a/examples/run_sft.py +++ b/examples/run_sft.py @@ -120,8 +120,8 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig): sft_task_spec, partial( sft_preprocessor, - add_bos=data_config.get("add_bos", True), - add_eos=data_config.get("add_eos", True), + add_bos=data_config["add_bos"], + add_eos=data_config["add_eos"], ), max_seq_length=data_config["max_input_seq_length"], ) diff --git a/nemo_reinforcer/algorithms/grpo.py b/nemo_reinforcer/algorithms/grpo.py index a89a29fa97..eb0947011f 100644 --- a/nemo_reinforcer/algorithms/grpo.py +++ b/nemo_reinforcer/algorithms/grpo.py @@ -629,6 +629,9 @@ def grpo_train( optimizer_path=os.path.join( checkpoint_path, "policy", "optimizer" ), + tokenizer_path=os.path.join( + checkpoint_path, "policy", "tokenizer" + ), save_hf=is_last_checkpoint, ) torch.save( diff --git a/nemo_reinforcer/algorithms/sft.py b/nemo_reinforcer/algorithms/sft.py index 0bb42bd27a..45f4f08575 100644 --- a/nemo_reinforcer/algorithms/sft.py +++ b/nemo_reinforcer/algorithms/sft.py @@ -419,6 +419,9 @@ def sft_train( optimizer_path=os.path.join( checkpoint_path, "policy", "optimizer" ), + tokenizer_path=os.path.join( + checkpoint_path, "policy", "tokenizer" + ), save_hf=is_last_checkpoint, ) torch.save( diff --git a/nemo_reinforcer/algorithms/utils.py b/nemo_reinforcer/algorithms/utils.py index 66d7c0e0c2..418f14c074 100644 --- a/nemo_reinforcer/algorithms/utils.py +++ b/nemo_reinforcer/algorithms/utils.py @@ -157,6 +157,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: AutoTokenizer: The configured tokenizer instance Examples: + ```{doctest} >>> from transformers import AutoTokenizer >>> from nemo_reinforcer.models.policy import TokenizerConfig >>> # not specifying a chat template uses the tokenizer's default @@ -186,6 +187,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: >>> tokenizer = get_tokenizer(config) >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) >>> assert formatted == " START: You are a helpful AI assistant. END. START: Hello! END." + ``` """ tokenizer = AutoTokenizer.from_pretrained(tokenizer_config["name"]) if tokenizer.pad_token is None: diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_reinforcer/data/llm_message_utils.py index e7ba160c1c..c9290271bf 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_reinforcer/data/llm_message_utils.py @@ -376,21 +376,23 @@ def get_formatted_message_log( message_chunk = formatted_message[prev_message_len_no_eos:] if i == 0: - if ( - add_bos_token - and tokenizer.bos_token is not None - and not message_chunk.startswith(tokenizer.bos_token) - ): - message_chunk = tokenizer.bos_token + message_chunk + if add_bos_token: + if tokenizer.bos_token is None: + logging.warning( + "add_bos_token is True but the tokenizer does not have a BOS token. Skipping BOS token addition." + ) + elif not message_chunk.startswith(tokenizer.bos_token): + message_chunk = tokenizer.bos_token + message_chunk if i == len(message_log) - 1: message_chunk = message_chunk.rstrip("\n") - if ( - add_eos_token - and tokenizer.eos_token is not None - and not message_chunk.endswith(tokenizer.eos_token) - ): - message_chunk += tokenizer.eos_token + if add_eos_token: + if tokenizer.eos_token is None: + logging.warning( + "add_eos_token is True but the tokenizer does not have an EOS token. Skipping EOS token addition." + ) + elif not message_chunk.endswith(tokenizer.eos_token): + message_chunk += tokenizer.eos_token new_message = message.copy() new_message["token_ids"] = tokenizer( diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index d7a0a50b0d..ab656f2f9e 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -825,6 +825,7 @@ def save_checkpoint( self, weights_path: str, optimizer_path: Optional[str] = None, + tokenizer_path: Optional[str] = None, save_torch_dist: bool = True, save_hf: bool = False, ): @@ -856,9 +857,10 @@ def save_checkpoint( optimizer=self.optimizer if optimizer_path else None, scheduler=self.scheduler if optimizer_path else None, optimizer_path=optimizer_path, + tokenizer=self.tokenizer, + tokenizer_path=tokenizer_path, save_torch_dist=save_torch_dist, save_hf=save_hf, - tokenizer=self.tokenizer if save_hf else None, ) def load_checkpoint(self, weights_path: str, optimizer_path: Optional[str] = None): diff --git a/nemo_reinforcer/utils/native_checkpoint.py b/nemo_reinforcer/utils/native_checkpoint.py index fbf13e8fb2..bf6ce0f81f 100644 --- a/nemo_reinforcer/utils/native_checkpoint.py +++ b/nemo_reinforcer/utils/native_checkpoint.py @@ -135,9 +135,10 @@ def save_checkpoint( optimizer: Optional[torch.optim.Optimizer] = None, scheduler: Optional[Any] = None, optimizer_path: Optional[str] = None, + tokenizer: Optional[Any] = None, + tokenizer_path: Optional[str] = None, save_torch_dist: bool = True, save_hf: bool = False, - tokenizer: Optional[Any] = None, ) -> None: """Save a checkpoint of the model and optionally optimizer state. @@ -162,9 +163,6 @@ def save_checkpoint( state_dict=model_state_dict, ) - if tokenizer is not None: - tokenizer.save_pretrained(hf_weights_path) - if save_torch_dist: model_state = {"model": ModelState(model)} dcp.save(model_state, checkpoint_id=weights_path) @@ -177,6 +175,13 @@ def save_checkpoint( optimizer_state = {"optim": OptimizerState(model, optimizer, scheduler)} dcp.save(optimizer_state, checkpoint_id=optimizer_path) + if tokenizer is not None: + if tokenizer_path is None: + raise ValueError( + "tokenizer_path must be provided when saving tokenizer state" + ) + tokenizer.save_pretrained(tokenizer_path) + def load_checkpoint( model, diff --git a/tests/unit/data/hf_datasets/test_prompt_response.py b/tests/unit/data/hf_datasets/test_prompt_response.py new file mode 100644 index 0000000000..748b41fd18 --- /dev/null +++ b/tests/unit/data/hf_datasets/test_prompt_response.py @@ -0,0 +1,90 @@ +import pytest +import tempfile +import json +from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES +from nemo_reinforcer.data.hf_datasets.prompt_response_dataset import ( + PromptResponseDataset, +) +from transformers import AutoTokenizer + + +@pytest.fixture +def sample_data(request): + input_key = request.param[0] + output_key = request.param[1] + + train_data = [ + {input_key: "Hello", output_key: "Hi there!"}, + {input_key: "How are you?", output_key: "I'm good, thanks!"}, + ] + val_data = [ + {input_key: "What's up?", output_key: "Not much!"}, + {input_key: "Bye", output_key: "Goodbye!"}, + ] + + # Create temporary files for train and validation data + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as train_file: + json.dump(train_data, train_file) + train_path = train_file.name + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".json", delete=False + ) as val_file: + json.dump(val_data, val_file) + val_path = val_file.name + + return train_path, val_path + + +@pytest.mark.parametrize("sample_data", [("input", "output")], indirect=True) +def test_dataset_initialization(sample_data): + train_path, val_path = sample_data + dataset = PromptResponseDataset(train_path, val_path) + + assert dataset.input_key == "input" + assert dataset.output_key == "output" + assert "train" in dataset.formatted_ds + assert "validation" in dataset.formatted_ds + + +@pytest.mark.parametrize("sample_data", [("question", "answer")], indirect=True) +def test_custom_keys(sample_data): + train_path, val_path = sample_data + dataset = PromptResponseDataset( + train_path, val_path, input_key="question", output_key="answer" + ) + + assert dataset.input_key == "question" + assert dataset.output_key == "answer" + + +@pytest.mark.parametrize("sample_data", [("question", "answer")], indirect=True) +def test_message_formatting(sample_data): + train_path, val_path = sample_data + dataset = PromptResponseDataset( + train_path, val_path, input_key="question", output_key="answer" + ) + + first_example = dataset.formatted_ds["train"][0] + + assert first_example["messages"][0]["role"] == "user" + assert first_example["messages"][0]["content"] == "Hello" + assert first_example["messages"][1]["role"] == "assistant" + assert first_example["messages"][1]["content"] == "Hi there!" + + chat_template = COMMON_CHAT_TEMPLATES.passthrough_prompt_response + tokenizer = AutoTokenizer.from_pretrained("Meta-Llama/Meta-Llama-3-8B-Instruct") + + combined_message = tokenizer.apply_chat_template( + first_example["messages"], + chat_template=chat_template, + tokenize=False, + add_generation_prompt=False, + add_special_tokens=False, + ) + + assert combined_message == "".join( + message["content"] for message in first_example["messages"] + ) diff --git a/tests/unit/data/test_hf_datasets.py b/tests/unit/data/hf_datasets/test_squad.py similarity index 80% rename from tests/unit/data/test_hf_datasets.py rename to tests/unit/data/hf_datasets/test_squad.py index 5fe634a2a7..633a5e4c0c 100644 --- a/tests/unit/data/test_hf_datasets.py +++ b/tests/unit/data/hf_datasets/test_squad.py @@ -14,6 +14,7 @@ import pytest from transformers import AutoTokenizer +from nemo_reinforcer.data.hf_dataset.chat_templates import COMMON_CHAT_TEMPLATES from nemo_reinforcer.data.hf_datasets.squad import SquadDataset @@ -31,10 +32,12 @@ def test_squad_dataset(): assert example["messages"][1]["role"] == "user" assert example["messages"][2]["role"] == "assistant" + template = "{% for message in messages %}{%- if message['role'] == 'system' %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user' %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant' %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}" + ## check that applying chat template works as expected default_templated = tokenizer.apply_chat_template( example["messages"], - chat_template=squad_dataset.task_spec.custom_template, + chat_template=template, tokenize=False, add_generation_prompt=False, add_special_tokens=False, From c016902e8ac1ee3e6320e0f8a7184181da111379 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Fri, 11 Apr 2025 12:46:04 -0700 Subject: [PATCH 12/23] small fixes Signed-off-by: ashors1 --- nemo_reinforcer/models/policy/hf_policy.py | 1 - .../unit/data/hf_datasets/test_prompt_response.py | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index ab656f2f9e..d529212dcc 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -80,7 +80,6 @@ def __init__( rank = torch.distributed.get_rank() world_size = torch.distributed.get_world_size() model_name = self.cfg["model_name"] - tokenizer_name = self.cfg["tokenizer"]["name"] if self.cfg["precision"] == "float32": self.dtype = torch.float32 elif self.cfg["precision"] == "bfloat16": diff --git a/tests/unit/data/hf_datasets/test_prompt_response.py b/tests/unit/data/hf_datasets/test_prompt_response.py index 748b41fd18..12b98d7fb2 100644 --- a/tests/unit/data/hf_datasets/test_prompt_response.py +++ b/tests/unit/data/hf_datasets/test_prompt_response.py @@ -1,3 +1,17 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest import tempfile import json From 326b15116926a948311c8da7dc0add46d727909c Mon Sep 17 00:00:00 2001 From: ashors1 Date: Fri, 11 Apr 2025 12:59:40 -0700 Subject: [PATCH 13/23] fix typo Signed-off-by: ashors1 --- tests/unit/data/hf_datasets/test_squad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/data/hf_datasets/test_squad.py b/tests/unit/data/hf_datasets/test_squad.py index 633a5e4c0c..47511363b5 100644 --- a/tests/unit/data/hf_datasets/test_squad.py +++ b/tests/unit/data/hf_datasets/test_squad.py @@ -14,7 +14,7 @@ import pytest from transformers import AutoTokenizer -from nemo_reinforcer.data.hf_dataset.chat_templates import COMMON_CHAT_TEMPLATES +from nemo_reinforcer.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES from nemo_reinforcer.data.hf_datasets.squad import SquadDataset From 506848752f9f19ec22d3deaa422d6efd936c8d88 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Fri, 11 Apr 2025 13:59:41 -0700 Subject: [PATCH 14/23] fix tests Signed-off-by: ashors1 --- nemo_reinforcer/data/llm_message_utils.py | 5 +++-- nemo_reinforcer/models/policy/hf_policy.py | 2 ++ tests/unit/utils/test_native_checkpoint.py | 4 ++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/nemo_reinforcer/data/llm_message_utils.py b/nemo_reinforcer/data/llm_message_utils.py index c9290271bf..13fc7dbd30 100644 --- a/nemo_reinforcer/data/llm_message_utils.py +++ b/nemo_reinforcer/data/llm_message_utils.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import warnings from typing import Dict, List import torch @@ -378,7 +379,7 @@ def get_formatted_message_log( if i == 0: if add_bos_token: if tokenizer.bos_token is None: - logging.warning( + warnings.warn( "add_bos_token is True but the tokenizer does not have a BOS token. Skipping BOS token addition." ) elif not message_chunk.startswith(tokenizer.bos_token): @@ -388,7 +389,7 @@ def get_formatted_message_log( message_chunk = message_chunk.rstrip("\n") if add_eos_token: if tokenizer.eos_token is None: - logging.warning( + warnings.warn( "add_eos_token is True but the tokenizer does not have an EOS token. Skipping EOS token addition." ) elif not message_chunk.endswith(tokenizer.eos_token): diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index d529212dcc..4166d6df77 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -1091,6 +1091,7 @@ def save_checkpoint( self, weights_path: str, optimizer_path: Optional[str] = None, + tokenizer_path: Optional[str] = None, save_torch_dist: bool = True, save_hf: bool = False, ): @@ -1099,6 +1100,7 @@ def save_checkpoint( "save_checkpoint", weights_path, optimizer_path, + tokenizer_path, save_torch_dist, save_hf, respect_tied_workers=True, diff --git a/tests/unit/utils/test_native_checkpoint.py b/tests/unit/utils/test_native_checkpoint.py index aa913f8a97..4d30ef66ec 100755 --- a/tests/unit/utils/test_native_checkpoint.py +++ b/tests/unit/utils/test_native_checkpoint.py @@ -259,6 +259,7 @@ def test_save_and_load_hf_checkpoint(policy): os.path.join(tmp_dir, "test_hf_and_dcp"), save_hf=True, save_torch_dist=True, + tokenizer_path=os.path.join(tmp_dir, "test_hf_and_dcp_tokenizer"), ) ## make sure we save both HF and DCP checkpoints @@ -274,6 +275,9 @@ def test_save_and_load_hf_checkpoint(policy): "model-00001-of-00002.safetensors", "model-00002-of-00002.safetensors", "model.safetensors.index.json", + } + + assert set(os.listdir(os.path.join(tmp_dir, "test_hf_and_dcp_tokenizer"))) == { "tokenizer_config.json", "tokenizer.json", "special_tokens_map.json", From 8e835ba86bcf768ccf9d741e503dca6ad2784ad1 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Mon, 14 Apr 2025 21:35:39 -0700 Subject: [PATCH 15/23] update chat template documentation Signed-off-by: ashors1 --- docs/guides/sft.md | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/guides/sft.md b/docs/guides/sft.md index 4d452b109d..fe49b71734 100644 --- a/docs/guides/sft.md +++ b/docs/guides/sft.md @@ -27,7 +27,7 @@ uv run examples/run_sft.py \ SFT datasets in Reinforcer are encapsulated using classes. Each SFT data class is expected to have the following attributes: 1. `formatted_ds`: The dictionary of formatted datasets. This dictionary should contain `train` and `validation` splits, and each split should conform to the format described below. - 2. `task_spec`: The `TaskDataSpec` for this dataset. This should specify the name you choose for this dataset as well as the `custom_template` for this dataset. More on custom templates below. + 2. `task_spec`: The `TaskDataSpec` for this dataset. This should specify the name you choose for this dataset. SFT datasets are expected to follow the HuggingFace chat format. Refer to the [chat dataset document](../design_docs/chat_datasets.md) for details. If your data is not in the correct format, simply write a preprocessing script to convert the data into this format. [data/hf_datasets/squad.py](../../nemo_reinforcer/data/hf_datasets/squad.py) has an example: @@ -51,17 +51,21 @@ def format_squad(data): } ``` -Reinforcer SFT uses HuggingFace chat templates to format the individual examples. If you would like to use a custom template, create a string template in [jinja format](https://huggingface.co/docs/transformers/v4.34.0/en/chat_templating#how-do-i-create-a-chat-template) and pass it to the dataset's `TaskDataSpec`. For example, +Reinforcer SFT uses HuggingFace chat templates to format the individual examples. Three types of chat templates are supported, which can be configured via `tokenizer.chat_template` in your yaml config (see [sft.yaml](../../examples/configs/sft.yaml) for an example): + +1. Apply the tokenizer's default chat template. To use the tokenizer's default, either omit `tokenizer.chat_template` from the config altogether, or set `tokenizer.chat_template="default"`. +2. Use a "passthrough" template which simply concatenates all messages. This is desirable if the chat template has been applied to your dataset as an offline preprocessing step. In this case, you should set `tokenizer.chat_template` to None as follows: + ```yaml + tokenizer: + chat_template: NULL + ``` +3. Use a custom template: If you would like to use a custom template, create a string template in [jinja format](https://huggingface.co/docs/transformers/v4.34.0/en/chat_templating#how-do-i-create-a-chat-template), and add that string to the config. For example, + + ```yaml + tokenizer: + custom_template: "{% for message in messages %}{%- if message['role'] == 'system' %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user' %}{{' Question: ' + message['content'].strip() + ' Answer: '}}{%- elif message['role'] == 'assistant' %}{{message['content'].strip()}}{%- endif %}{% endfor %}" + ``` -```python -custom_template = ( - "{% for message in messages %}{%- if message['role'] == 'system' %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user' %}{{' Question: ' + message['content'].strip() + ' Answer: '}}{%- elif message['role'] == 'assistant' %}{{message['content'].strip()}}{%- endif %}{% endfor %}" -) -task_spec = TaskDataSpec( - task_name="squad", - custom_template=custom_template, -) -``` By default, NeMo-Reinforcer has support for `Squad` and `OpenAssistant` datasets. Both of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk. From d95d88b6cf65ae97ec7c518578bcbdbf4c83802f Mon Sep 17 00:00:00 2001 From: ashors1 Date: Tue, 15 Apr 2025 11:32:54 -0700 Subject: [PATCH 16/23] fix unit tests Signed-off-by: ashors1 --- tests/unit/models/generation/test_vllm_generation.py | 2 +- tests/unit/models/policy/test_hf_ray_policy.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 20607c6a65..23b0ca356d 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -622,7 +622,7 @@ def test_vllm_generation_with_stop(cluster, test_input_data, tokenizer, is_eval) print("Creating HF policy...") hf_config = basic_hf_test_config.copy() - hf_policy = HfPolicy(cluster, hf_config) + hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") ipc_handles = hf_policy.get_weights_ipc_handles() diff --git a/tests/unit/models/policy/test_hf_ray_policy.py b/tests/unit/models/policy/test_hf_ray_policy.py index e24238ac05..f56d3b8df3 100644 --- a/tests/unit/models/policy/test_hf_ray_policy.py +++ b/tests/unit/models/policy/test_hf_ray_policy.py @@ -591,7 +591,7 @@ def test_hf_policy_generation_with_stop(test_input_data, tokenizer): ) # Create policy - policy = HfPolicy(cluster=cluster, config=config) + policy = HfPolicy(cluster=cluster, config=config, tokenizer=tokenizer) # Call prepare_for_generation if available print("Preparing for generation...") From 1958bb99daa3c97993a8e35e2b76b16520682b40 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Tue, 15 Apr 2025 15:05:10 -0700 Subject: [PATCH 17/23] fix doctest Signed-off-by: ashors1 --- nemo_reinforcer/algorithms/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_reinforcer/algorithms/utils.py b/nemo_reinforcer/algorithms/utils.py index 418f14c074..6f814b3597 100644 --- a/nemo_reinforcer/algorithms/utils.py +++ b/nemo_reinforcer/algorithms/utils.py @@ -159,7 +159,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: Examples: ```{doctest} >>> from transformers import AutoTokenizer - >>> from nemo_reinforcer.models.policy import TokenizerConfig + >>> from nemo_reinforcer.algorithms.utils import get_tokenizer >>> # not specifying a chat template uses the tokenizer's default >>> config = {"name": "meta-llama/Llama-3.2-1B-Instruct"} >>> tokenizer = get_tokenizer(config) From 2c7c5c575252417e95bff8f66d026c32955a94c0 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Tue, 15 Apr 2025 19:11:49 -0700 Subject: [PATCH 18/23] fix checkpoint save when tokenizer not provided Signed-off-by: ashors1 --- nemo_reinforcer/models/policy/hf_policy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_reinforcer/models/policy/hf_policy.py b/nemo_reinforcer/models/policy/hf_policy.py index 2585396abe..0141a382ef 100644 --- a/nemo_reinforcer/models/policy/hf_policy.py +++ b/nemo_reinforcer/models/policy/hf_policy.py @@ -858,7 +858,7 @@ def save_checkpoint( optimizer=self.optimizer if optimizer_path else None, scheduler=self.scheduler if optimizer_path else None, optimizer_path=optimizer_path, - tokenizer=self.tokenizer, + tokenizer=self.tokenizer if tokenizer_path else None, tokenizer_path=tokenizer_path, save_torch_dist=save_torch_dist, save_hf=save_hf, From 1df9a9f951d4b4210a264a1251cb02fb719a25b7 Mon Sep 17 00:00:00 2001 From: Parth Chadha Date: Tue, 15 Apr 2025 15:42:01 -0700 Subject: [PATCH 19/23] feat: add a unique seed for each vllm llm engine (#171) Signed-off-by: Parth Chadha --- nemo_reinforcer/distributed/worker_groups.py | 6 +- nemo_reinforcer/models/generation/vllm.py | 27 +++- .../models/generation/test_vllm_generation.py | 142 ++++++++++++++++++ 3 files changed, 167 insertions(+), 8 deletions(-) diff --git a/nemo_reinforcer/distributed/worker_groups.py b/nemo_reinforcer/distributed/worker_groups.py index d4ec9d7f1a..4e3bbbf2a6 100644 --- a/nemo_reinforcer/distributed/worker_groups.py +++ b/nemo_reinforcer/distributed/worker_groups.py @@ -91,7 +91,7 @@ def __call__( placement_group: PlacementGroup, placement_group_bundle_index: int, num_gpus: int, - bundle_indices: Optional[list] = None, + bundle_indices: Optional[tuple] = None, **extra_options: Dict[str, Any], ): """Create a Ray worker with the specified configuration. @@ -108,7 +108,7 @@ def __call__( placement_group: Ray placement group for resource allocation placement_group_bundle_index: Index of the bundle in the placement group num_gpus: Number of GPUs to allocate to this worker - bundle_indices: List of bundle indices for tensor parallelism (if applicable) + bundle_indices: Tuple of (node_idx, local_bundle_indices) for tensor parallelism (if applicable) extra_options: Additional options to pass to the Ray actor (may be overridden by actor's configure_worker(...) method) Returns: @@ -300,7 +300,7 @@ def _create_workers_from_bundle_indices( # For tensor parallel groups, only the first worker gets bundle_indices worker_bundle_indices = ( - local_bundle_indices if local_rank == 0 else None + (node_idx, local_bundle_indices) if local_rank == 0 else None ) # Create a descriptive name based on group structure diff --git a/nemo_reinforcer/models/generation/vllm.py b/nemo_reinforcer/models/generation/vllm.py index c9676a2f2b..4e8ff364c7 100644 --- a/nemo_reinforcer/models/generation/vllm.py +++ b/nemo_reinforcer/models/generation/vllm.py @@ -61,7 +61,7 @@ def __repr__(self): @staticmethod def configure_worker( - num_gpus: int | float, bundle_indices: Optional[list] = None + num_gpus: int | float, bundle_indices: Optional[tuple] = None ) -> tuple[dict, dict, dict]: """Provides complete worker configuration for vLLM tensor parallelism. @@ -70,7 +70,7 @@ def configure_worker( Args: num_gpus: Original GPU allocation for this worker based on the placement group - bundle_indices: Bundle indices for tensor parallelism (if applicable) + bundle_indices: Tuple of (node_idx, local_bundle_indices) for tensor parallelism (if applicable) Returns: tuple with complete worker configuration: @@ -83,11 +83,26 @@ def configure_worker( init_kwargs = {} env_vars = {} - init_kwargs["bundle_indices"] = bundle_indices + local_bundle_indices = None + if bundle_indices is not None: + node_idx = bundle_indices[0] + local_bundle_indices = bundle_indices[1] + init_kwargs["bundle_indices"] = local_bundle_indices + + """ + compute a unique seed from the node_idx and bundle_indices: + node_idx = 0, bundle_indices = [0, 1, 2, 3] -> seed = 0*1024 + 0 + node_idx = 0, bundle_indices = [4, 5, 6, 7] -> seed = 0*1024 + 1 + node_idx = 1, bundle_indices = [0, 1, 2, 3] -> seed = 1*1024 + 0 + node_idx = 1, bundle_indices = [4, 5, 6, 7] -> seed = 1*1024 + 1 + """ + bundle_id = local_bundle_indices[0] // len(local_bundle_indices) + seed = node_idx * 1024 + bundle_id + init_kwargs["seed"] = seed is_part_of_tp_workers = ( - bundle_indices is not None and len(bundle_indices) > 1 - ) or bundle_indices is None + local_bundle_indices is not None and len(local_bundle_indices) > 1 + ) or local_bundle_indices is None if is_part_of_tp_workers: # Ray + vllm likes to manage GPU assignment internally resources["num_gpus"] = 0 @@ -104,6 +119,7 @@ def __init__( config: VllmConfig, bundle_indices: Optional[list] = None, fraction_of_gpus: float = 1.0, + seed: Optional[int] = None, ): """Initialize a vLLM worker for distributed inference. @@ -177,6 +193,7 @@ def __init__( gpu_memory_utilization=self.cfg["vllm_cfg"]["gpu_memory_utilization"], enable_prefix_caching=True, dtype="auto", + seed=seed, # Don't use cuda-graph by default as it leads to convergence issue (see https://github.com/NVIDIA/reinforcer/issues/186) enforce_eager=True, max_model_len=self.cfg["vllm_cfg"]["max_model_len"], diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 23b0ca356d..62294ef46e 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -208,6 +208,148 @@ def test_vllm_policy_generation(policy, test_input_data, tokenizer): ) +def test_vllm_worker_seed_behavior(cluster, tokenizer): + """ + 1. Different workers generate different outputs for identical prompts due to different seeds + 2. When forced to use the same seed, workers generate identical outputs + """ + from nemo_reinforcer.models.generation.vllm import VllmGenerationWorker + + unique_prompts = [ + "Hello, my name is", + "The capital of France is", + ] + + # Create a batch where each prompt appears twice + # When sharded, different workers will get the same prompt + duplicated_prompts = unique_prompts + unique_prompts + + # Tokenize prompts + encodings = tokenizer( + duplicated_prompts, + padding="max_length", + max_length=20, + truncation=True, + return_tensors="pt", + padding_side="right", + ) + + input_lengths = encodings["attention_mask"].sum(dim=1).to(torch.int32) + + # Create input data dictionary + duplicated_batch = BatchedDataDict( + { + "input_ids": encodings["input_ids"], + "input_lengths": input_lengths, + } + ) + + # Part 1: Test that different workers generate different outputs due to different seeds + print("Creating vLLM policy with default seed behavior...") + vllm_config = basic_vllm_test_config.copy() + vllm_config = configure_generation_config(vllm_config, tokenizer) + policy = VllmGeneration(cluster, vllm_config) + policy.finish_generation() + + from nemo_reinforcer.models.policy.hf_policy import HfPolicy + + hf_config = basic_hf_test_config.copy() + hf_policy = HfPolicy(cluster, hf_config) + + print(f"refitting vllm policy...") + ipc_handles = hf_policy.get_weights_ipc_handles() + policy.prepare_for_generation() + policy.update_weights(ipc_handles) + + try: + # Generate with duplicated prompts + print("Running generation with duplicated prompts...") + outputs = policy.generate(duplicated_batch, greedy=False) + + # Decode the generated sequences + gen_texts = tokenizer.batch_decode( + outputs["output_ids"], skip_special_tokens=True + ) + + print(f"Generated texts with duplicated prompts: {gen_texts}") + + # Check if the duplicated prompts generated different texts + # The first half and second half should be different due to different worker seeds + first_half = gen_texts[: len(unique_prompts)] + second_half = gen_texts[len(unique_prompts) :] + + print(f"First worker outputs: {first_half}") + print(f"Second worker outputs: {second_half}") + + # At least one of the pairs should be different due to different seeds + assert first_half != second_half, ( + "Different workers should generate different outputs for identical prompts due to different seeds" + ) + + # Clean up before the second test + policy.shutdown() + + # Part 2: Test with fixed seed to verify identical outputs + print("\nNow testing with fixed seed...") + + # Store the original configure_worker method + original_configure_worker = VllmGenerationWorker.configure_worker + + # Override the configure_worker method to always use the same seed + def configure_worker_fixed_seed(num_gpus, bundle_indices=None): + resources, env_vars, init_kwargs = original_configure_worker( + num_gpus, bundle_indices + ) + # Override with fixed seed + init_kwargs["seed"] = 42 + return resources, env_vars, init_kwargs + + VllmGenerationWorker.configure_worker = configure_worker_fixed_seed + + # Create a new policy with fixed seed + fixed_seed_policy = VllmGeneration(cluster, vllm_config) + + # Generate with the same duplicated prompts + print("Running generation with fixed seed...") + fixed_seed_outputs = fixed_seed_policy.generate(duplicated_batch, greedy=False) + + # Decode the generated sequences + fixed_seed_gen_texts = tokenizer.batch_decode( + fixed_seed_outputs["output_ids"], skip_special_tokens=True + ) + + print(f"Generated texts with fixed seed: {fixed_seed_gen_texts}") + + # Check if the duplicated prompts now generate the same texts + fixed_seed_first_half = fixed_seed_gen_texts[: len(unique_prompts)] + fixed_seed_second_half = fixed_seed_gen_texts[len(unique_prompts) :] + + print(f"First worker outputs (fixed seed): {fixed_seed_first_half}") + print(f"Second worker outputs (fixed seed): {fixed_seed_second_half}") + + # With the same seed, outputs should be identical + assert fixed_seed_first_half == fixed_seed_second_half, ( + "Workers with the same fixed seed should generate identical outputs for identical prompts" + ) + + finally: + # Restore the original method if we patched it + if "original_configure_worker" in locals(): + VllmGenerationWorker.configure_worker = original_configure_worker + + # Clean up resources + if "policy" in locals() and hasattr(policy, "shutdown"): + policy.shutdown() + if "fixed_seed_policy" in locals() and hasattr(fixed_seed_policy, "shutdown"): + fixed_seed_policy.shutdown() + + # Force garbage collection + import gc + + gc.collect() + torch.cuda.empty_cache() + + @pytest.mark.timeout(140) def test_vllm_generation_with_hf_training(cluster, tokenizer): """1. Use vLLM for generation From f35ad95af26b91ba38d5dca884c1229e64ddd1ca Mon Sep 17 00:00:00 2001 From: Terry Kong Date: Tue, 15 Apr 2025 16:00:04 -0700 Subject: [PATCH 20/23] fix: unit test script halts on first failure (#189) Signed-off-by: Terry Kong --- pyproject.toml | 1 + tests/run_unit.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b257b7aed5..febbf9c5f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,6 +79,7 @@ exclude = ''' ''' [tool.pytest.ini_options] +addopts = "--cov=nemo_reinforcer --cov-report=term --cov-report=json -s -rA -x" testpaths = ["tests"] python_files = "test_*.py" diff --git a/tests/run_unit.sh b/tests/run_unit.sh index 093583faf9..5367749199 100755 --- a/tests/run_unit.sh +++ b/tests/run_unit.sh @@ -32,7 +32,7 @@ export PYTHONPATH=$(realpath ${SCRIPT_DIR}/..):${PYTHONPATH:-} # Run unit tests echo "Running unit tests..." -if ! pytest unit/ --cov=nemo_reinforcer --cov-report=term --cov-report=json -s -rA "$@"; then +if ! pytest unit/ "$@"; then echo "[ERROR]: Unit tests failed." exit 1 fi From 844e47015bb30a409ce6ea4b43ced8d6f6578471 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Tue, 15 Apr 2025 19:52:56 -0700 Subject: [PATCH 21/23] fix new vllm test and doctest Signed-off-by: ashors1 --- nemo_reinforcer/algorithms/utils.py | 3 +++ tests/unit/models/generation/test_vllm_generation.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/nemo_reinforcer/algorithms/utils.py b/nemo_reinforcer/algorithms/utils.py index 6f814b3597..520625cb79 100644 --- a/nemo_reinforcer/algorithms/utils.py +++ b/nemo_reinforcer/algorithms/utils.py @@ -168,6 +168,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: ... {"role": "user", "content": "Hello!"} ... ] >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) + "No chat template provided, using tokenizer's default" >>> assert formatted == AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct").apply_chat_template(messages, tokenize=False) >>> # Using a passthrough template @@ -177,6 +178,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: ... } >>> tokenizer = get_tokenizer(config) >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) + "Using passthrough chat template" >>> assert formatted == "".join(msg["content"] for msg in messages) >>> # Using a custom template @@ -186,6 +188,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: ... } >>> tokenizer = get_tokenizer(config) >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) + "Using custom chat template" >>> assert formatted == " START: You are a helpful AI assistant. END. START: Hello! END." ``` """ diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py index 62294ef46e..d86f4983cc 100644 --- a/tests/unit/models/generation/test_vllm_generation.py +++ b/tests/unit/models/generation/test_vllm_generation.py @@ -254,7 +254,7 @@ def test_vllm_worker_seed_behavior(cluster, tokenizer): from nemo_reinforcer.models.policy.hf_policy import HfPolicy hf_config = basic_hf_test_config.copy() - hf_policy = HfPolicy(cluster, hf_config) + hf_policy = HfPolicy(cluster, hf_config, tokenizer) print(f"refitting vllm policy...") ipc_handles = hf_policy.get_weights_ipc_handles() From ac4b6eac61456f6f9d9fd73b34c8491c51f8fec9 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Wed, 16 Apr 2025 09:04:19 -0700 Subject: [PATCH 22/23] remove old comment Signed-off-by: ashors1 --- nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py b/nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py index c9ff14d348..928af4fdff 100644 --- a/nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py +++ b/nemo_reinforcer/data/hf_datasets/prompt_response_dataset.py @@ -33,7 +33,6 @@ def __init__( formatted_train_dataset = train_original_dataset.map(self.add_messages_key) formatted_val_dataset = val_original_dataset.map(self.add_messages_key) - ## just duplicating the dataset for train and validation for simplicity self.formatted_ds = { "train": formatted_train_dataset, "validation": formatted_val_dataset, From c5328f0d80a2874e38026f35d4f257ef8aed5459 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Wed, 16 Apr 2025 10:11:28 -0700 Subject: [PATCH 23/23] fix doctest Signed-off-by: ashors1 --- nemo_reinforcer/algorithms/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nemo_reinforcer/algorithms/utils.py b/nemo_reinforcer/algorithms/utils.py index 520625cb79..3cfd8569a9 100644 --- a/nemo_reinforcer/algorithms/utils.py +++ b/nemo_reinforcer/algorithms/utils.py @@ -163,12 +163,12 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: >>> # not specifying a chat template uses the tokenizer's default >>> config = {"name": "meta-llama/Llama-3.2-1B-Instruct"} >>> tokenizer = get_tokenizer(config) + No chat template provided, using tokenizer's default >>> messages = [ ... {"role": "system", "content": "You are a helpful AI assistant."}, ... {"role": "user", "content": "Hello!"} ... ] >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) - "No chat template provided, using tokenizer's default" >>> assert formatted == AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct").apply_chat_template(messages, tokenize=False) >>> # Using a passthrough template @@ -177,8 +177,8 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: ... "chat_template": None ... } >>> tokenizer = get_tokenizer(config) + Using passthrough chat template >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) - "Using passthrough chat template" >>> assert formatted == "".join(msg["content"] for msg in messages) >>> # Using a custom template @@ -187,8 +187,8 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> AutoTokenizer: ... "chat_template": "{% for message in messages %}{{ ' START: ' + message['content'] + ' END.' }}{% endfor %}" ... } >>> tokenizer = get_tokenizer(config) + Using custom chat template >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False) - "Using custom chat template" >>> assert formatted == " START: You are a helpful AI assistant. END. START: Hello! END." ``` """