Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
122b65d
Write out label_mask instead of labels in olmocore tokenization script
tyler-romero Jul 1, 2025
671f186
.
tyler-romero Jul 1, 2025
af5dadd
.
tyler-romero Jul 1, 2025
a45a88c
Better tqdm
tyler-romero Jul 1, 2025
5fdde57
less frequent updates tqdm
tyler-romero Jul 1, 2025
6db1ad4
debug statement
Jul 2, 2025
454d690
add tylertest chat template
tyler-romero Jul 2, 2025
e9c0902
Update dataset_transformation.py
jacob-morrison Jul 2, 2025
f42cbc1
Update dataset_transformation.py
jacob-morrison Jul 2, 2025
e0c735d
reorder
tyler-romero Jul 2, 2025
98fae2e
fix
tyler-romero Jul 2, 2025
ab67344
save too
tyler-romero Jul 2, 2025
0520b62
auto add chat template
tyler-romero Jul 2, 2025
56f647d
shuffle before write
tyler-romero Jul 2, 2025
5287676
Update dataset_transformation.py
jacob-morrison Jul 2, 2025
39b7265
Fix generation_config
tyler-romero Jul 4, 2025
fc009f1
test
jacob-morrison Jul 6, 2025
0521be8
add olmo toolu chat template
jacob-morrison Jul 7, 2025
e61fc56
add chat template
jacob-morrison Jul 7, 2025
5e66013
turn off generation config
jacob-morrison Jul 7, 2025
1b6e1d6
new chat templates
jacob-morrison Jul 9, 2025
1e89733
fix
jacob-morrison Jul 10, 2025
2a528b5
Merge branch 'main' into tyler/olmocore-tokenization-bug-fix-label-mask
jacob-morrison Jul 15, 2025
b012b93
use my workspace
jacob-morrison Jul 15, 2025
4deb643
Merge branch 'olmo3-chat-templates' into tyler/olmocore-tokenization-…
jacob-morrison Jul 16, 2025
a959ec9
Merge branch 'main' into tyler/olmocore-tokenization-bug-fix-label-mask
jacob-morrison Jul 16, 2025
bd02fc0
test
jacob-morrison Jul 16, 2025
27cb495
test
jacob-morrison Jul 16, 2025
de1424d
update
jacob-morrison Jul 16, 2025
1696897
update
jacob-morrison Jul 16, 2025
83ea35d
update doc
jacob-morrison Jul 16, 2025
797c54d
delete
jacob-morrison Jul 16, 2025
728710f
reset
jacob-morrison Jul 16, 2025
f7c2371
update
jacob-morrison Jul 16, 2025
f913633
update
jacob-morrison Jul 16, 2025
1266096
update
jacob-morrison Jul 16, 2025
065da6a
remove comment
jacob-morrison Jul 16, 2025
ac04d2d
update
jacob-morrison Jul 16, 2025
f77a53a
Also write metadata files to support GCS
tyler-romero Jul 16, 2025
3231305
data mixing qol fixes
jacob-morrison Jul 21, 2025
c37e26a
fix style + update saving logic for ppo/grpo
jacob-morrison Jul 21, 2025
ca54316
Merge branch 'tyler/olmocore-tokenization-bug-fix-label-mask' of http…
jacob-morrison Jul 21, 2025
ae33877
add logging
jacob-morrison Jul 21, 2025
ba0f05f
update sampling
jacob-morrison Jul 21, 2025
a980e4e
Merge branch 'main' into tyler/olmocore-tokenization-bug-fix-label-mask
jacob-morrison Jul 21, 2025
695acb4
Merge branch 'olmo3-chat-templates' into tyler/olmocore-tokenization-…
jacob-morrison Jul 21, 2025
550764c
update oe-eval script
jacob-morrison Jul 21, 2025
2f5baf5
Merge branch 'tyler/olmocore-tokenization-bug-fix-label-mask' of http…
jacob-morrison Jul 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
292 changes: 276 additions & 16 deletions open_instruct/dataset_transformation.py

Large diffs are not rendered by default.

42 changes: 36 additions & 6 deletions open_instruct/grpo_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
from rich.pretty import pprint
from torch.utils.tensorboard import SummaryWriter
from transformers import AutoModelForCausalLM, PreTrainedModel, PreTrainedTokenizer, get_scheduler
from transformers import AutoModelForCausalLM, GenerationConfig, PreTrainedModel, PreTrainedTokenizer, get_scheduler
from transformers.integrations import HfDeepSpeedConfig
from vllm import SamplingParams

Expand Down Expand Up @@ -983,8 +983,19 @@ def save_checkpoint_state(self, checkpoint_state_dir: str, client_state: Dict[st
checkpoint_state_dir, args.gs_checkpoint_state_dir
)

def save_model(self, output_dir: str) -> None:
def save_model(self, output_dir: str, chat_template_name: str, tokenizer: PreTrainedTokenizer) -> None:
model_to_save = self.model
if "olmo" in chat_template_name:
# New chat template has no bos token, and two eos tokens: <|im_end|> and <|endoftext|>
model_to_save.generation_config = GenerationConfig(
temperature=None,
top_p=None,
eos_token_id=[
tokenizer.convert_tokens_to_ids("<|im_end|>"),
tokenizer.convert_tokens_to_ids("<|endoftext|>"),
],
)

if self.rank == 0:
os.makedirs(output_dir, exist_ok=True)

Expand Down Expand Up @@ -1774,6 +1785,7 @@ def one_training_step(
train_dataset,
writer,
wandb_url,
chat_template_name,
):
"""Train the model for one step."""
update_ref_policy_future = []
Expand Down Expand Up @@ -1820,7 +1832,12 @@ def one_training_step(
checkpoint_dir = f"{args.output_dir}_checkpoints"
step_dir = os.path.join(checkpoint_dir, f"step_{training_step}")
logger.info(f"Saving model at step {training_step} to {step_dir}")
ray.get([policy_group.models[i].save_model.remote(step_dir) for i in range(args.world_size)])
ray.get(
[
policy_group.models[i].save_model.remote(step_dir, chat_template_name, tokenizer)
for i in range(args.world_size)
]
)
if args.try_launch_beaker_eval_jobs_on_weka and is_beaker_job():
leaderboard_name = f"{args.hf_repo_revision}_step_{training_step}"
for i in range(args.world_size):
Expand Down Expand Up @@ -1917,11 +1934,23 @@ def maybe_evaluate(
logger.warning("[Main Thread] 🙈 Evaluation responses not received")


def save_final_model(args: Args, policy_group: ModelGroup, training_step: int, wandb_url: str):
def save_final_model(
args: Args,
policy_group: ModelGroup,
tokenizer: PreTrainedTokenizer,
training_step: int,
wandb_url: str,
chat_template_name: str,
):
"""Save the final model and launch evaluation jobs if configured."""
logger.info(f"Saving final model at step {training_step} to {args.output_dir}")
with Timer("[Main Thread] 🗡️ Saving model"):
ray.get([policy_group.models[i].save_model.remote(args.output_dir) for i in range(args.world_size)])
ray.get(
[
policy_group.models[i].save_model.remote(args.output_dir, chat_template_name, tokenizer)
for i in range(args.world_size)
]
)
if args.try_launch_beaker_eval_jobs_on_weka and is_beaker_job():
leaderboard_name = args.hf_repo_revision
for i in range(args.world_size):
Expand Down Expand Up @@ -2189,6 +2218,7 @@ def main(args: Args, tc: TokenizerConfig, model_config: ModelConfig, num_eval_sa
train_dataset,
writer,
wandb_url,
tc.chat_template_name,
)

maybe_evaluate(
Expand All @@ -2204,7 +2234,7 @@ def main(args: Args, tc: TokenizerConfig, model_config: ModelConfig, num_eval_sa
writer,
)

save_final_model(args, policy_group, training_step, wandb_url)
save_final_model(args, policy_group, tokenizer, training_step, wandb_url, tc.chat_template_name)

except Exception as e:
logger.error(f"Training error occurred: {str(e)}\n{traceback.format_exc()}")
Expand Down
21 changes: 18 additions & 3 deletions open_instruct/grpo_vllm_thread_ray_gtrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
from transformers import (
AutoModelForCausalLM,
AutoModelForSequenceClassification,
GenerationConfig,
PreTrainedModel,
PreTrainedTokenizer,
get_scheduler,
Expand Down Expand Up @@ -791,6 +792,7 @@ def train(
train_dataset: Dataset,
eval_dataset: Dataset,
tokenizer: PreTrainedTokenizer,
tc: TokenizerConfig,
vllm_engines: List[ray.actor.ActorHandle],
metrics_queue: RayQueue,
data_collator: Callable,
Expand Down Expand Up @@ -1378,7 +1380,7 @@ def generate_with_engines(prompts: List[List[int]], sampling_params: SamplingPar
checkpoint_dir = f"{args.output_dir}_checkpoints"
step_dir = os.path.join(checkpoint_dir, f"step_{training_step}")
print(f"Saving model at step {training_step} to {step_dir}")
self.save_model(self.model, step_dir)
self.save_model(self.model, tc.chat_template_name, tokenizer, step_dir)
if args.try_launch_beaker_eval_jobs_on_weka:
leaderboard_name = f"{args.hf_repo_revision}_step_{training_step}"
if self.rank == 0 and is_beaker_job():
Expand All @@ -1404,7 +1406,7 @@ def generate_with_engines(prompts: List[List[int]], sampling_params: SamplingPar
print(f"Eval future {eval_futures[0]} is done")
eval_futures.popleft()
print(f"Saving final model at step {training_step} to {args.output_dir}")
self.save_model(self.model, args.output_dir)
self.save_model(self.model, tc.chat_template_name, tokenizer, args.output_dir)
if args.try_launch_beaker_eval_jobs_on_weka:
leaderboard_name = args.hf_repo_revision
if self.rank == 0 and is_beaker_job():
Expand Down Expand Up @@ -1438,14 +1440,27 @@ def generate_with_engines(prompts: List[List[int]], sampling_params: SamplingPar
shutil.copytree(args.output_dir, "/output", dirs_exist_ok=True)
print("finished training")

def save_model(self, model_to_save: PreTrainedModel, output_dir: str) -> None:
def save_model(
self, model_to_save: PreTrainedModel, chat_template_name: str, tokenizer: PreTrainedTokenizer, output_dir: str
) -> None:
if self.rank == 0:
os.makedirs(output_dir, exist_ok=True)

# save model weights for ZeRO2/3
if hasattr(model_to_save, "module"):
model_to_save = model_to_save.module

if "olmo" in chat_template_name:
# New chat template has no bos token, and two eos tokens: <|im_end|> and <|endoftext|>
model_to_save.generation_config = GenerationConfig(
temperature=None,
top_p=None,
eos_token_id=[
tokenizer.convert_tokens_to_ids("<|im_end|>"),
tokenizer.convert_tokens_to_ids("<|endoftext|>"),
],
)

# gather parameters
output_state_dict = {}
for k, v in model_to_save.named_parameters():
Expand Down
1 change: 1 addition & 0 deletions open_instruct/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ def save_with_accelerate(
# otherwise, we get an error thrown at save time.
if "olmo" in chat_template_name:
# New chat template has no bos token, and two eos tokens: <|im_end|> and <|endoftext|>
logger.log(f"Detected olmo chat template: {chat_template_name}, updating model generation config.")
model.generation_config = transformers.GenerationConfig(
temperature=None,
top_p=None,
Expand Down
28 changes: 25 additions & 3 deletions open_instruct/ppo_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
AutoConfig,
AutoModelForCausalLM,
AutoModelForSequenceClassification,
GenerationConfig,
PreTrainedModel,
PreTrainedTokenizer,
get_scheduler,
Expand Down Expand Up @@ -1074,7 +1075,7 @@ def train(
self.offload_to_cpu(self.model)
return metrics_list

def save_model(self, output_dir: str) -> None:
def save_model(self, output_dir: str, chat_template_name: str, tokenizer: PreTrainedTokenizer) -> None:
model_to_save = self.model
if self.rank == 0:
os.makedirs(output_dir, exist_ok=True)
Expand All @@ -1083,6 +1084,17 @@ def save_model(self, output_dir: str) -> None:
if hasattr(model_to_save, "module"):
model_to_save = model_to_save.module

if "olmo" in chat_template_name:
# New chat template has no bos token, and two eos tokens: <|im_end|> and <|endoftext|>
model_to_save.generation_config = GenerationConfig(
temperature=None,
top_p=None,
eos_token_id=[
tokenizer.convert_tokens_to_ids("<|im_end|>"),
tokenizer.convert_tokens_to_ids("<|endoftext|>"),
],
)

# gather parameters
output_state_dict = {}
for k, v in model_to_save.named_parameters():
Expand Down Expand Up @@ -1819,7 +1831,12 @@ def main(args: Args, tc: TokenizerConfig, model_config: ModelConfig, reward_fn:
checkpoint_dir = f"{args.output_dir}_checkpoints"
step_dir = os.path.join(checkpoint_dir, f"step_{training_step}")
print(f"Saving model at step {training_step} to {step_dir}")
ray.get([policy_group.models[i].save_model.remote(step_dir) for i in range(args.world_size)])
ray.get(
[
policy_group.models[i].save_model.remote(step_dir, tc.chat_template_name, tokenizer)
for i in range(args.world_size)
]
)
if args.try_launch_beaker_eval_jobs_on_weka and is_beaker_job():
leaderboard_name = f"{args.hf_repo_revision}_step_{training_step}"
for i in range(args.world_size):
Expand Down Expand Up @@ -1889,7 +1906,12 @@ def main(args: Args, tc: TokenizerConfig, model_config: ModelConfig, reward_fn:

print(f"Saving final model at step {training_step} to {args.output_dir}")
with Timer("[Main Thread] 🗡️ Saving model"):
ray.get([policy_group.models[i].save_model.remote(args.output_dir) for i in range(args.world_size)])
ray.get(
[
policy_group.models[i].save_model.remote(args.output_dir, tc.chat_template_name, tokenizer)
for i in range(args.world_size)
]
)
if args.try_launch_beaker_eval_jobs_on_weka and is_beaker_job():
leaderboard_name = args.hf_repo_revision
for i in range(args.world_size):
Expand Down
16 changes: 15 additions & 1 deletion open_instruct/ppo_vllm_thread_ray_gtrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
from transformers import (
AutoModelForCausalLM,
AutoModelForSequenceClassification,
GenerationConfig,
PreTrainedModel,
PreTrainedTokenizer,
get_scheduler,
Expand Down Expand Up @@ -1513,14 +1514,27 @@ def generate_with_engines(prompts: List[List[int]], sampling_params: SamplingPar
shutil.copytree(args.output_dir, "/output", dirs_exist_ok=True)
print("finished training")

def save_model(self, model_to_save: PreTrainedModel, output_dir: str) -> None:
def save_model(
self, model_to_save: PreTrainedModel, chat_template_name: str, tokenizer: PreTrainedTokenizer, output_dir: str
) -> None:
if self.rank == 0:
os.makedirs(output_dir, exist_ok=True)

# save model weights for ZeRO2/3
if hasattr(model_to_save, "module"):
model_to_save = model_to_save.module

if "olmo" in chat_template_name:
# New chat template has no bos token, and two eos tokens: <|im_end|> and <|endoftext|>
model_to_save.generation_config = GenerationConfig(
temperature=None,
top_p=None,
eos_token_id=[
tokenizer.convert_tokens_to_ids("<|im_end|>"),
tokenizer.convert_tokens_to_ids("<|endoftext|>"),
],
)

# gather parameters
output_state_dict = {}
for k, v in model_to_save.named_parameters():
Expand Down
Loading