Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmark/benchmark_level1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ python benchmark/benchmark.py \
--slurm-template-path benchmark/trl.slurm_template

python benchmark/benchmark.py \
--command "python examples/scripts/reward_modeling.py --model_name_or_path=facebook/opt-350m --output_dir="reward_modeling_anthropic_hh" --per_device_train_batch_size=64 --num_train_epochs=1 --gradient_accumulation_steps=16 --gradient_checkpointing=True --learning_rate=1.41e-5 --report_to="wandb" --remove_unused_columns=False --optim="adamw_torch" --logging_steps=10 --evaluation_strategy="steps" --max_length=512" \
--command "python examples/scripts/reward_modeling.py --model_name_or_path=facebook/opt-350m --output_dir="reward_modeling_anthropic_hh" --per_device_train_batch_size=64 --num_train_epochs=1 --gradient_accumulation_steps=16 --gradient_checkpointing=True --learning_rate=1.41e-5 --report_to="wandb" --remove_unused_columns=False --optim="adamw_torch" --logging_steps=10 --eval_strategy="steps" --max_length=512" \
--num-seeds 3 \
--start-seed 1 \
--workers 10 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ class ScriptArguments:
per_device_eval_batch_size=script_args.per_device_eval_batch_size,
num_train_epochs=script_args.num_train_epochs,
weight_decay=script_args.weight_decay,
evaluation_strategy="steps",
eval_strategy="steps",
eval_steps=500,
save_strategy="steps",
save_steps=500,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def run_training(args, train_data, val_data):
training_args = TrainingArguments(
output_dir=args.output_dir,
dataloader_drop_last=True,
evaluation_strategy="steps",
eval_strategy="steps",
max_steps=args.max_steps,
eval_steps=args.eval_freq,
save_steps=args.save_freq,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def return_prompt_and_responses(samples) -> Dict[str, str]:
gradient_accumulation_steps=script_args.gradient_accumulation_steps,
gradient_checkpointing=script_args.gradient_checkpointing,
learning_rate=script_args.learning_rate,
evaluation_strategy="steps",
eval_strategy="steps",
eval_steps=script_args.eval_steps,
output_dir=script_args.output_dir,
report_to=script_args.report_to,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/reward_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
--remove_unused_columns=False \
--optim="adamw_torch" \
--logging_steps=10 \
--evaluation_strategy="steps" \
--eval_strategy="steps" \
--eval_steps=500 \
--max_length=512 \
"""
Expand Down
6 changes: 3 additions & 3 deletions tests/slow/test_dpo_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
remove_unused_columns=False,
gradient_accumulation_steps=2,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
fp16=True,
logging_strategy="no",
report_to="none",
Expand Down Expand Up @@ -121,7 +121,7 @@ def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_
remove_unused_columns=False,
gradient_accumulation_steps=2,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
fp16=True,
logging_strategy="no",
report_to="none",
Expand Down Expand Up @@ -185,7 +185,7 @@ def test_dpo_peft_model_qlora(self, model_id, loss_type, pre_compute_logits, gra
remove_unused_columns=False,
gradient_accumulation_steps=2,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
fp16=True,
logging_strategy="no",
report_to="none",
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_cpo_trainer(self, name, loss_type):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
loss_type=loss_type,
)
Expand Down Expand Up @@ -152,7 +152,7 @@ def test_cpo_trainer_with_lora(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down
26 changes: 13 additions & 13 deletions tests/test_dpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_dpo_trainer(self, name, loss_type, pre_compute):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
loss_type=loss_type,
precompute_ref_log_probs=pre_compute,
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_dpo_trainer_without_providing_ref_model(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
precompute_ref_log_probs=True,
)
Expand Down Expand Up @@ -206,7 +206,7 @@ def test_dpo_trainer_without_providing_ref_model_with_lora(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
precompute_ref_log_probs=True,
)
Expand Down Expand Up @@ -246,7 +246,7 @@ def test_dpo_trainer_padding_token_is_none(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -281,7 +281,7 @@ def test_dpo_trainer_w_dataset_num_proc(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
dataset_num_proc=5,
)
Expand Down Expand Up @@ -318,7 +318,7 @@ def test_dpo_trainer_generate_during_eval_no_wandb(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
generate_during_eval=True,
)
Expand Down Expand Up @@ -364,7 +364,7 @@ def test_dpo_lora_save(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
precompute_ref_log_probs=True,
)
Expand Down Expand Up @@ -423,7 +423,7 @@ def test_dpo_lora_bf16_autocast_llama(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
bf16=True,
beta=0.1,
generate_during_eval=True,
Expand Down Expand Up @@ -495,7 +495,7 @@ def test_dpo_lora_bf16_autocast(self, name, loss_type, pre_compute, gen_during_e
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
bf16=True,
beta=0.1,
generate_during_eval=gen_during_eval,
Expand Down Expand Up @@ -548,7 +548,7 @@ def test_dpo_lora_tags(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -583,7 +583,7 @@ def test_dpo_tags(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -628,7 +628,7 @@ def test_dpo_lora_force_use_ref(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand All @@ -654,7 +654,7 @@ def test_dpo_lora_force_use_ref(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
force_use_ref_model=True,
)
Expand Down
12 changes: 6 additions & 6 deletions tests/test_kto_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_kto_trainer(self, name, loss_type, pre_compute, eval_dataset):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
precompute_ref_log_probs=pre_compute,
loss_type=loss_type,
Expand Down Expand Up @@ -153,7 +153,7 @@ def test_tokenize_and_process_tokens(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -234,7 +234,7 @@ def test_kto_trainer_without_providing_ref_model(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -331,7 +331,7 @@ def test_kto_trainer_without_providing_ref_model_with_lora(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -371,7 +371,7 @@ def test_kto_trainer_generate_during_eval_no_wandb(self):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
generate_during_eval=True,
)
Expand Down Expand Up @@ -417,7 +417,7 @@ def test_kto_lora_save(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_orpo_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_orpo_trainer(self, name):
remove_unused_columns=False,
gradient_accumulation_steps=1,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down Expand Up @@ -144,7 +144,7 @@ def test_orpo_trainer_with_lora(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
beta=0.1,
)

Expand Down
8 changes: 4 additions & 4 deletions tests/test_reward_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_reward_trainer(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
)

# fmt: off
Expand Down Expand Up @@ -124,7 +124,7 @@ def test_reward_trainer_peft(self):
remove_unused_columns=False,
gradient_accumulation_steps=2,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
)

# fmt: off
Expand Down Expand Up @@ -268,7 +268,7 @@ def test_reward_trainer_margin(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
)

# fmt: off
Expand Down Expand Up @@ -319,7 +319,7 @@ def test_reward_trainer_tags(self):
remove_unused_columns=False,
gradient_accumulation_steps=4,
learning_rate=9e-1,
evaluation_strategy="steps",
eval_strategy="steps",
)

# fmt: off
Expand Down
2 changes: 1 addition & 1 deletion tests/test_rich_progress_callback.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_rich_progress_callback_logging(self):
per_device_eval_batch_size=2,
per_device_train_batch_size=2,
num_train_epochs=4,
evaluation_strategy="steps",
eval_strategy="steps",
eval_steps=1,
logging_strategy="steps",
logging_steps=1,
Expand Down
Loading