diff --git a/examples/data_preprocess/full_hh_rlhf.py b/examples/data_preprocess/full_hh_rlhf.py index daaa59cfde7..07e0884cb91 100644 --- a/examples/data_preprocess/full_hh_rlhf.py +++ b/examples/data_preprocess/full_hh_rlhf.py @@ -88,7 +88,7 @@ def generate_rl_dataset(target_hdfs_path_dir, local_dir='~/data/full_hh_rlhf/rl' train_dataset = dataset['train'] data_source = 'Dahoas/full-hh-rlhf' - + # add a row to each data item that represents a unique id def make_map_fn(split): @@ -105,7 +105,7 @@ def process_fn(example, idx): "ability": "alignment", "reward_model": { "style": "model", - "ground_truth": response # should not be used + "ground_truth": response # should not be used }, "extra_info": { 'split': split, diff --git a/examples/sft/gsm8k/run_deepseek_6b7.sh b/examples/sft/gsm8k/run_deepseek_6b7.sh index 8a28cc7d606..f944a141f27 100644 --- a/examples/sft/gsm8k/run_deepseek_6b7.sh +++ b/examples/sft/gsm8k/run_deepseek_6b7.sh @@ -2,10 +2,13 @@ set -x hdfs_path=hdfs://user/verl/experiments/gsm8k/deepseek-coder-6.7b-instruct/ # replace to your own hdfs/local path -TORCHRUN -m verl.trainer.fsdp_sft_trainer \ +nproc_per_node=$1 + +torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ + -m verl.trainer.fsdp_sft_trainer \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ - data.prompt_key=question \ + data.prompt_key=prompt \ data.response_key=answer \ data.micro_batch_size=8 \ model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \ diff --git a/examples/sft/gsm8k/run_gemma_2b.sh b/examples/sft/gsm8k/run_gemma_2b.sh index fb1b987f4a2..fb5773c94e8 100644 --- a/examples/sft/gsm8k/run_gemma_2b.sh +++ b/examples/sft/gsm8k/run_gemma_2b.sh @@ -4,10 +4,13 @@ set -x hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-2b-it/ # replace to your own hdfs/local path -TORCHRUN -m verl.trainer.fsdp_sft_trainer \ +nproc_per_node=$1 + +torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ + -m verl.trainer.fsdp_sft_trainer \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ - data.prompt_key=question \ + data.prompt_key=prompt \ data.response_key=answer \ data.micro_batch_size=32 \ model.partial_pretrain=google/gemma-2b-it \ diff --git a/examples/sft/gsm8k/run_gemma_7b.sh b/examples/sft/gsm8k/run_gemma_7b.sh index 4955c078c9c..82391361d45 100644 --- a/examples/sft/gsm8k/run_gemma_7b.sh +++ b/examples/sft/gsm8k/run_gemma_7b.sh @@ -2,10 +2,13 @@ set -x hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-1.1-7b-it/ # replace to your own hdfs/local path -TORCHRUN -m verl.trainer.fsdp_sft_trainer \ +nproc_per_node=$1 + +torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ + -m verl.trainer.fsdp_sft_trainer \ data.train_files=$HOME/data/gsm8k/train.parquet \ data.val_files=$HOME/data/gsm8k/test.parquet \ - data.prompt_key=question \ + data.prompt_key=prompt \ data.response_key=answer \ data.micro_batch_size=8 \ model.partial_pretrain=google/gemma-1.1-7b-it \ diff --git a/verl/models/llama/__init__.py b/verl/models/llama/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/verl/models/llama/megatron/layers/parallel_linear.py b/verl/models/llama/megatron/layers/parallel_linear.py index 82f9c2d0492..bfe5cf4e65e 100644 --- a/verl/models/llama/megatron/layers/parallel_linear.py +++ b/verl/models/llama/megatron/layers/parallel_linear.py @@ -1,5 +1,5 @@ # Copyright 2024 Bytedance Ltd. and/or its affiliates -# Copyright 2023 The vLLM team. +# Copyright 2023 The vLLM team. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/verl/third_party/vllm/vllm_v_0_3_1/config.py b/verl/third_party/vllm/vllm_v_0_3_1/config.py index 734dfc7a2e4..1e1fead8628 100644 --- a/verl/third_party/vllm/vllm_v_0_3_1/config.py +++ b/verl/third_party/vllm/vllm_v_0_3_1/config.py @@ -13,7 +13,6 @@ # limitations under the License. # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/config.py - from typing import Optional, Union, ClassVar from dataclasses import dataclass import torch diff --git a/verl/third_party/vllm/vllm_v_0_4_2/config.py b/verl/third_party/vllm/vllm_v_0_4_2/config.py index 76ccd591298..6af04417b43 100644 --- a/verl/third_party/vllm/vllm_v_0_4_2/config.py +++ b/verl/third_party/vllm/vllm_v_0_4_2/config.py @@ -13,7 +13,6 @@ # limitations under the License. # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/config.py - import enum import json from typing import List, Optional, Union diff --git a/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py b/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py index 7efb856906f..0830093bca6 100644 --- a/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py +++ b/verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py @@ -3,7 +3,6 @@ # Adapted from # https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/parallel_state.py # Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - """Model and data parallel groups.""" import os import torch diff --git a/verl/trainer/ppo/actor/dp_actor.py b/verl/trainer/ppo/actor/dp_actor.py index 02c3328e82c..885ff0032a0 100644 --- a/verl/trainer/ppo/actor/dp_actor.py +++ b/verl/trainer/ppo/actor/dp_actor.py @@ -46,9 +46,9 @@ def _forward_micro_batch(self, micro_batch, temperature): response_length = micro_batch['responses'].size(-1) with torch.autocast(device_type='cuda', dtype=torch.bfloat16): output = self.actor_module(input_ids=micro_batch['input_ids'], - attention_mask=micro_batch['attention_mask'], - position_ids=micro_batch['position_ids'], - use_cache=False) # prevent model thinks we are generating + attention_mask=micro_batch['attention_mask'], + position_ids=micro_batch['position_ids'], + use_cache=False) # prevent model thinks we are generating logits = output.logits / temperature logits = logits[:, -response_length - 1:-1] log_probs = logprobs_from_logits(logits, micro_batch['responses']) diff --git a/verl/trainer/ppo/ray_trainer.py b/verl/trainer/ppo/ray_trainer.py index bd726499ca8..1316e141033 100644 --- a/verl/trainer/ppo/ray_trainer.py +++ b/verl/trainer/ppo/ray_trainer.py @@ -241,30 +241,30 @@ def _create_dataloader(self): # TODO: we have to make sure the batch size is divisible by the dp size from verl.utils.dataset.rl_dataset import RLHFDataset, collate_fn self.train_dataset = RLHFDataset(parquet_files=self.config.data.train_files, - tokenizer=self.tokenizer, - prompt_key=self.config.data.prompt_key, - max_prompt_length=self.config.data.max_prompt_length, - filter_prompts=True, - return_raw_chat=self.config.data.get('return_raw_chat', False), - truncation='error') + tokenizer=self.tokenizer, + prompt_key=self.config.data.prompt_key, + max_prompt_length=self.config.data.max_prompt_length, + filter_prompts=True, + return_raw_chat=self.config.data.get('return_raw_chat', False), + truncation='error') self.train_dataloader = DataLoader(dataset=self.train_dataset, - batch_size=self.config.data.train_batch_size, - shuffle=True, - drop_last=True, - collate_fn=collate_fn) + batch_size=self.config.data.train_batch_size, + shuffle=True, + drop_last=True, + collate_fn=collate_fn) self.val_dataset = RLHFDataset(parquet_files=self.config.data.val_files, - tokenizer=self.tokenizer, - prompt_key=self.config.data.prompt_key, - max_prompt_length=self.config.data.max_prompt_length, - filter_prompts=True, - return_raw_chat=self.config.data.get('return_raw_chat', False), - truncation='error') + tokenizer=self.tokenizer, + prompt_key=self.config.data.prompt_key, + max_prompt_length=self.config.data.max_prompt_length, + filter_prompts=True, + return_raw_chat=self.config.data.get('return_raw_chat', False), + truncation='error') self.val_dataloader = DataLoader(dataset=self.val_dataset, - batch_size=self.config.data.val_batch_size, - shuffle=True, - drop_last=True, - collate_fn=collate_fn) + batch_size=self.config.data.val_batch_size, + shuffle=True, + drop_last=True, + collate_fn=collate_fn) assert len(self.train_dataloader) >= 1 assert len(self.val_dataloader) >= 1 diff --git a/verl/utils/dataset/rl_dataset.py b/verl/utils/dataset/rl_dataset.py index d64f6b53e56..7ebc39a843c 100644 --- a/verl/utils/dataset/rl_dataset.py +++ b/verl/utils/dataset/rl_dataset.py @@ -154,9 +154,9 @@ def __getitem__(self, item): tokenizer = AutoTokenizer.from_pretrained(local_path) dataset = RLHFDataset(parquet_files='~/data/rlhf/gsm8k/train.parquet', - tokenizer=tokenizer, - prompt_key='prompt', - max_prompt_length=256) + tokenizer=tokenizer, + prompt_key='prompt', + max_prompt_length=256) dataloader = DataLoader(dataset=dataset, batch_size=16, shuffle=True, drop_last=True, collate_fn=collate_fn) diff --git a/verl/utils/megatron_utils.py b/verl/utils/megatron_utils.py index 2f08027d0d1..fcb6b65a79e 100644 --- a/verl/utils/megatron_utils.py +++ b/verl/utils/megatron_utils.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Pretrain utilities.""" from typing import Any, Dict import time diff --git a/verl/utils/reward_score/gsm8k.py b/verl/utils/reward_score/gsm8k.py index 680620b65e4..9e21d589672 100644 --- a/verl/utils/reward_score/gsm8k.py +++ b/verl/utils/reward_score/gsm8k.py @@ -1,5 +1,5 @@ # Copyright 2024 Bytedance Ltd. and/or its affiliates -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/verl/version/version b/verl/version/version index 6da28dde76d..23f753615d2 100644 --- a/verl/version/version +++ b/verl/version/version @@ -1 +1 @@ -0.1.1 \ No newline at end of file +0.1.pre \ No newline at end of file