Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/data_preprocess/full_hh_rlhf.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def generate_rl_dataset(target_hdfs_path_dir, local_dir='~/data/full_hh_rlhf/rl'
train_dataset = dataset['train']

data_source = 'Dahoas/full-hh-rlhf'

# add a row to each data item that represents a unique id
def make_map_fn(split):

Expand All @@ -105,7 +105,7 @@ def process_fn(example, idx):
"ability": "alignment",
"reward_model": {
"style": "model",
"ground_truth": response # should not be used
"ground_truth": response # should not be used
},
"extra_info": {
'split': split,
Expand Down
7 changes: 5 additions & 2 deletions examples/sft/gsm8k/run_deepseek_6b7.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ set -x

hdfs_path=hdfs://user/verl/experiments/gsm8k/deepseek-coder-6.7b-instruct/ # replace to your own hdfs/local path

TORCHRUN -m verl.trainer.fsdp_sft_trainer \
nproc_per_node=$1

torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
-m verl.trainer.fsdp_sft_trainer \
data.train_files=$HOME/data/gsm8k/train.parquet \
data.val_files=$HOME/data/gsm8k/test.parquet \
data.prompt_key=question \
data.prompt_key=prompt \
data.response_key=answer \
data.micro_batch_size=8 \
model.partial_pretrain=deepseek-ai/deepseek-coder-6.7b-instruct \
Expand Down
7 changes: 5 additions & 2 deletions examples/sft/gsm8k/run_gemma_2b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ set -x

hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-2b-it/ # replace to your own hdfs/local path

TORCHRUN -m verl.trainer.fsdp_sft_trainer \
nproc_per_node=$1

torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
-m verl.trainer.fsdp_sft_trainer \
data.train_files=$HOME/data/gsm8k/train.parquet \
data.val_files=$HOME/data/gsm8k/test.parquet \
data.prompt_key=question \
data.prompt_key=prompt \
data.response_key=answer \
data.micro_batch_size=32 \
model.partial_pretrain=google/gemma-2b-it \
Expand Down
7 changes: 5 additions & 2 deletions examples/sft/gsm8k/run_gemma_7b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ set -x

hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-1.1-7b-it/ # replace to your own hdfs/local path

TORCHRUN -m verl.trainer.fsdp_sft_trainer \
nproc_per_node=$1

torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \
-m verl.trainer.fsdp_sft_trainer \
data.train_files=$HOME/data/gsm8k/train.parquet \
data.val_files=$HOME/data/gsm8k/test.parquet \
data.prompt_key=question \
data.prompt_key=prompt \
data.response_key=answer \
data.micro_batch_size=8 \
model.partial_pretrain=google/gemma-1.1-7b-it \
Expand Down
Empty file added verl/models/llama/__init__.py
Empty file.
2 changes: 1 addition & 1 deletion verl/models/llama/megatron/layers/parallel_linear.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright 2024 Bytedance Ltd. and/or its affiliates
# Copyright 2023 The vLLM team.
# Copyright 2023 The vLLM team.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down
1 change: 0 additions & 1 deletion verl/third_party/vllm/vllm_v_0_3_1/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.
# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/config.py


from typing import Optional, Union, ClassVar
from dataclasses import dataclass
import torch
Expand Down
1 change: 0 additions & 1 deletion verl/third_party/vllm/vllm_v_0_4_2/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.
# Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/config.py


import enum
import json
from typing import List, Optional, Union
Expand Down
1 change: 0 additions & 1 deletion verl/third_party/vllm/vllm_v_0_5_4/parallel_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# Adapted from
# https://github.com/NVIDIA/Megatron-LM/blob/main/megatron/core/parallel_state.py
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

"""Model and data parallel groups."""
import os
import torch
Expand Down
6 changes: 3 additions & 3 deletions verl/trainer/ppo/actor/dp_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def _forward_micro_batch(self, micro_batch, temperature):
response_length = micro_batch['responses'].size(-1)
with torch.autocast(device_type='cuda', dtype=torch.bfloat16):
output = self.actor_module(input_ids=micro_batch['input_ids'],
attention_mask=micro_batch['attention_mask'],
position_ids=micro_batch['position_ids'],
use_cache=False) # prevent model thinks we are generating
attention_mask=micro_batch['attention_mask'],
position_ids=micro_batch['position_ids'],
use_cache=False) # prevent model thinks we are generating
logits = output.logits / temperature
logits = logits[:, -response_length - 1:-1]
log_probs = logprobs_from_logits(logits, micro_batch['responses'])
Expand Down
40 changes: 20 additions & 20 deletions verl/trainer/ppo/ray_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,30 +241,30 @@ def _create_dataloader(self):
# TODO: we have to make sure the batch size is divisible by the dp size
from verl.utils.dataset.rl_dataset import RLHFDataset, collate_fn
self.train_dataset = RLHFDataset(parquet_files=self.config.data.train_files,
tokenizer=self.tokenizer,
prompt_key=self.config.data.prompt_key,
max_prompt_length=self.config.data.max_prompt_length,
filter_prompts=True,
return_raw_chat=self.config.data.get('return_raw_chat', False),
truncation='error')
tokenizer=self.tokenizer,
prompt_key=self.config.data.prompt_key,
max_prompt_length=self.config.data.max_prompt_length,
filter_prompts=True,
return_raw_chat=self.config.data.get('return_raw_chat', False),
truncation='error')
self.train_dataloader = DataLoader(dataset=self.train_dataset,
batch_size=self.config.data.train_batch_size,
shuffle=True,
drop_last=True,
collate_fn=collate_fn)
batch_size=self.config.data.train_batch_size,
shuffle=True,
drop_last=True,
collate_fn=collate_fn)

self.val_dataset = RLHFDataset(parquet_files=self.config.data.val_files,
tokenizer=self.tokenizer,
prompt_key=self.config.data.prompt_key,
max_prompt_length=self.config.data.max_prompt_length,
filter_prompts=True,
return_raw_chat=self.config.data.get('return_raw_chat', False),
truncation='error')
tokenizer=self.tokenizer,
prompt_key=self.config.data.prompt_key,
max_prompt_length=self.config.data.max_prompt_length,
filter_prompts=True,
return_raw_chat=self.config.data.get('return_raw_chat', False),
truncation='error')
self.val_dataloader = DataLoader(dataset=self.val_dataset,
batch_size=self.config.data.val_batch_size,
shuffle=True,
drop_last=True,
collate_fn=collate_fn)
batch_size=self.config.data.val_batch_size,
shuffle=True,
drop_last=True,
collate_fn=collate_fn)

assert len(self.train_dataloader) >= 1
assert len(self.val_dataloader) >= 1
Expand Down
6 changes: 3 additions & 3 deletions verl/utils/dataset/rl_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ def __getitem__(self, item):
tokenizer = AutoTokenizer.from_pretrained(local_path)

dataset = RLHFDataset(parquet_files='~/data/rlhf/gsm8k/train.parquet',
tokenizer=tokenizer,
prompt_key='prompt',
max_prompt_length=256)
tokenizer=tokenizer,
prompt_key='prompt',
max_prompt_length=256)

dataloader = DataLoader(dataset=dataset, batch_size=16, shuffle=True, drop_last=True, collate_fn=collate_fn)

Expand Down
1 change: 0 additions & 1 deletion verl/utils/megatron_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Pretrain utilities."""
from typing import Any, Dict
import time
Expand Down
2 changes: 1 addition & 1 deletion verl/utils/reward_score/gsm8k.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright 2024 Bytedance Ltd. and/or its affiliates
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down
2 changes: 1 addition & 1 deletion verl/version/version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.1
0.1.pre