From 8768fe6176ea6cb14a08d961c9b57096c80a00ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 17 May 2024 15:17:15 +0000 Subject: [PATCH 01/43] Remove extra whitespaces --- examples/scripts/vsft_llava.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/scripts/vsft_llava.py b/examples/scripts/vsft_llava.py index 85cb98d5f3c..8a26477737d 100644 --- a/examples/scripts/vsft_llava.py +++ b/examples/scripts/vsft_llava.py @@ -32,7 +32,7 @@ # peft: python examples/scripts/vsft_llava.py \ - --dataset_name="HuggingFaceH4/llava-instruct-mix-vsft" \ + --dataset_name="HuggingFaceH4/llava-instruct-mix-vsft" \ --model_name_or_path="llava-hf/llava-1.5-7b-hf" \ --report_to="wandb" \ --learning_rate=1.4e-5 \ @@ -45,11 +45,11 @@ --gradient_checkpointing \ --remove_unused_columns=False \ --torch_dtype=float16 \ - --fp16=True \ + --fp16=True \ --use_peft=True \ --lora_r=64 \ --lora_alpha=16 \ - --lora_target_modules=all-linear" + --lora_target_modules=all-linear # evaluation: From 5d43f2b5615688f75232cdacff1c38837920f729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 17 May 2024 18:06:42 +0000 Subject: [PATCH 02/43] idefics --- examples/scripts/vsft_idefics2.py | 226 ++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 examples/scripts/vsft_idefics2.py diff --git a/examples/scripts/vsft_idefics2.py b/examples/scripts/vsft_idefics2.py new file mode 100644 index 00000000000..f74867afc64 --- /dev/null +++ b/examples/scripts/vsft_idefics2.py @@ -0,0 +1,226 @@ +# flake8: noqa +# Copyright 2024 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +# regular: +python examples/scripts/vsft_idefics2.py \ + --dataset_name=HuggingFaceH4/cord-v2 \ + --model_name_or_path=HuggingFaceM4/idefics2-8b \ + --report_to=wandb \ + --learning_rate=1e-4 \ + --per_device_train_batch_size=2 \ + --gradient_accumulation_steps=8 \ + --output_dir=data/vsft-idefics2 \ + --logging_steps=5 \ + --num_train_epochs=1 \ + --push_to_hub \ + --gradient_checkpointing \ + --remove_unused_columns=False \ + --torch_dtype=float16 + + +# peft: +python examples/scripts/vsft_idefics2.py \ + --model_name_or_path="HuggingFaceM4/idefics2-tfrm-compatible" \ + --report_to="wandb" \ + --learning_rate=1.4e-5 \ + --per_device_train_batch_size=8 \ + --gradient_accumulation_steps=1 \ + --output_dir="data/vsft-llava-1.5-7b-hf" \ + --logging_steps=5 \ + --num_train_epochs=1 \ + --push_to_hub \ + --gradient_checkpointing \ + --remove_unused_columns=False \ + --torch_dtype=float16 \ + --fp16=True \ + --dataset_name=HuggingFaceH4/llava-instruct-mix-vsft \ + --use_peft=True \ + --lora_r=64 \ + --lora_alpha=16 \ + --lora_target_modules=all-linear" + +# evaluation: + +To evaluate, first install the lmms-eval framework: pip install git+https://github.com/EvolvingLMMs-Lab/lmms-eval.git +then run: +accelerate launch --num_processes=8 -m lmms_eval \ + --model llava_hf \ + --model_args pretrained=llava-hf/llava-1.5-7b-hf \ + --tasks mmbench \ + --batch_size 1 \ + --output_path ./logs/ \ + --log_sample +""" +import logging +import os +from contextlib import nullcontext + +TRL_USE_RICH = os.environ.get("TRL_USE_RICH", False) + +from trl.commands.cli_utils import init_zero_verbose, TrlParser, SFTScriptArguments + +if TRL_USE_RICH: + init_zero_verbose() + FORMAT = "%(message)s" + + from rich.console import Console + from rich.logging import RichHandler + +import torch +from accelerate import Accelerator +from datasets import load_dataset + +from tqdm.rich import tqdm +from transformers import AutoTokenizer, AutoProcessor, Idefics2ForConditionalGeneration + +from trl import ( + ModelConfig, + RichProgressCallback, + SFTTrainer, + SFTConfig, + get_peft_config, + get_quantization_config, + get_kbit_device_map, +) + +tqdm.pandas() + +if TRL_USE_RICH: + logging.basicConfig(format=FORMAT, datefmt="[%X]", handlers=[RichHandler()], level=logging.INFO) + + +if __name__ == "__main__": + parser = TrlParser((SFTScriptArguments, SFTConfig, ModelConfig)) + args, training_args, model_config = parser.parse_args_and_config() + training_args.gradient_checkpointing_kwargs = dict(use_reentrant=True) + # Force use our print callback + if TRL_USE_RICH: + training_args.disable_tqdm = True + console = Console() + + ################ + # Model, Tokenizer & Processor + ################ + # IDEFICS2_CHAT_TEMPLATE = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}""" + + torch_dtype = model_config.torch_dtype if model_config.torch_dtype in ["auto", None] else getattr(torch, model_config.torch_dtype) + quantization_config = get_quantization_config(model_config) + model_kwargs = dict( + revision=model_config.model_revision, + trust_remote_code=model_config.trust_remote_code, + attn_implementation=model_config.attn_implementation, + torch_dtype=torch_dtype, + use_cache=False, + device_map=get_kbit_device_map() if quantization_config is not None else None, + quantization_config=quantization_config, + ) + # tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path, use_fast=True) + # tokenizer.chat_template = IDEFICS2_CHAT_TEMPLATE + processor = AutoProcessor.from_pretrained(model_config.model_name_or_path, do_image_splitting=False) + # processor.tokenizer = tokenizer + + # model = Idefics2ForConditionalGeneration.from_pretrained(model_config.model_name_or_path, **model_kwargs) + + ###### + from transformers import AutoProcessor, BitsAndBytesConfig, Idefics2ForConditionalGeneration + from peft import LoraConfig + + bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16) + model = Idefics2ForConditionalGeneration.from_pretrained( + "HuggingFaceM4/idefics2-8b", torch_dtype=torch.float16, quantization_config=bnb_config + ) + lora_config = LoraConfig( + r=8, + lora_alpha=8, + lora_dropout=0.1, + target_modules=".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$", + use_dora=False, + init_lora_weights="gaussian", + ) + model.add_adapter(lora_config) + model.enable_adapters() + ###### + + ################ + # Create a data collator to encode text and image pairs + ################ + + class Idefics2DataCollator: + def __init__(self, processor): + self.processor = processor + + def __call__(self, examples): + texts = [] + images = [] + for example in examples: + if len(example["images"]) > 1: + raise ValueError("This collator only supports one image per example") + messages = example["messages"] + text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=False) + texts.append(text) + images.append([example["images"][0]]) + + batch = self.processor(texts, images, return_tensors="pt", padding=True) + + labels = batch["input_ids"].clone() + if self.processor.tokenizer.pad_token_id is not None: + labels[labels == self.processor.tokenizer.pad_token_id] = -100 + batch["labels"] = labels + + return batch + + data_collator = Idefics2DataCollator(processor) + + ################ + # Dataset + ################ + raw_datasets = load_dataset(args.dataset_name) + train_dataset = raw_datasets["train"] + eval_dataset = raw_datasets["test"] + + ################ + # Optional rich context managers + ############### + init_context = nullcontext() if not TRL_USE_RICH else console.status("[bold green]Initializing the SFTTrainer...") + save_context = ( + nullcontext() + if not TRL_USE_RICH + else console.status(f"[bold green]Training completed! Saving the model to {training_args.output_dir}") + ) + + ################ + # Training + ################ + with init_context: + trainer = SFTTrainer( + model=model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + dataset_text_field="text", # need a dummy field + tokenizer=processor.tokenizer, + # peft_config=get_peft_config(model_config), + callbacks=[RichProgressCallback] if TRL_USE_RICH else None, + data_collator=data_collator, + dataset_kwargs={"skip_prepare_dataset": True}, + ) + + trainer.train() + + with save_context: + trainer.save_model(training_args.output_dir) + trainer.push_to_hub() + if Accelerator().is_main_process: + processor.push_to_hub(training_args.hub_model_id) From f5a3237f4ffcccd049aee53e40ef28d766a9e242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 27 May 2024 18:09:59 +0000 Subject: [PATCH 03/43] vdpo --- examples/scripts/vdpo.py | 183 ++++++++++++++++++++++++++++++++++++ trl/trainer/model_config.py | 4 +- 2 files changed, 185 insertions(+), 2 deletions(-) create mode 100644 examples/scripts/vdpo.py diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py new file mode 100644 index 00000000000..f0510e0d7b3 --- /dev/null +++ b/examples/scripts/vdpo.py @@ -0,0 +1,183 @@ +# flake8: noqa +# Copyright 2023 The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +# regular: # OOM error +python examples/scripts/vdpo.py \ + --dataset_name=HuggingFaceH4/vqa_preferences \ + --model_name_or_path=HuggingFaceM4/idefics2-8b \ + --per_device_train_batch_size 4 \ + --learning_rate 1e-3 \ + --gradient_accumulation_steps 1 \ + --logging_steps 10 \ + --eval_steps 500 \ + --output_dir="dpo_anthropic_hh" \ + --warmup_steps 150 \ + --report_to wandb \ + --bf16 \ + --logging_first_step \ + --no_remove_unused_columns + +# peft: +python examples/scripts/vdpo.py \ + --dataset_name=HuggingFaceH4/vqa_preferences \ + --model_name_or_path=HuggingFaceM4/idefics2-8b \ + --per_device_train_batch_size 4 \ + --learning_rate 1e-3 \ + --gradient_accumulation_steps 1 \ + --logging_steps 10 \ + --eval_steps 500 \ + --output_dir="dpo_anthropic_hh" \ + --optim rmsprop \ + --warmup_steps 150 \ + --report_to wandb \ + --bf16 \ + --logging_first_step \ + --no_remove_unused_columns \ + --use_peft \ + --load_in_4bit \ + --lora_target_module .*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$ + +""" + +import logging +import multiprocessing +import os +from contextlib import nullcontext + +TRL_USE_RICH = os.environ.get("TRL_USE_RICH", False) + +from trl.commands.cli_utils import DPOScriptArguments, init_zero_verbose, TrlParser + +if TRL_USE_RICH: + init_zero_verbose() + FORMAT = "%(message)s" + + from rich.console import Console + from rich.logging import RichHandler + +import torch +from datasets import load_dataset +from transformers import Idefics2ForConditionalGeneration, AutoTokenizer, AutoProcessor + +from trl import ( + DPOConfig, + DPOTrainer, + ModelConfig, + RichProgressCallback, + get_kbit_device_map, + get_peft_config, + get_quantization_config, +) + + +if TRL_USE_RICH: + logging.basicConfig(format=FORMAT, datefmt="[%X]", handlers=[RichHandler()], level=logging.INFO) + + +if __name__ == "__main__": + parser = TrlParser((DPOScriptArguments, DPOConfig, ModelConfig)) + args, training_args, model_config = parser.parse_args_and_config() + + # Force use our print callback + if TRL_USE_RICH: + training_args.disable_tqdm = True + console = Console() + + ################ + # Model & Tokenizer + ################ + torch_dtype = ( + model_config.torch_dtype + if model_config.torch_dtype in ["auto", None] + else getattr(torch, model_config.torch_dtype) + ) + quantization_config = get_quantization_config(model_config) + model_kwargs = dict( + revision=model_config.model_revision, + trust_remote_code=model_config.trust_remote_code, + attn_implementation=model_config.attn_implementation, + torch_dtype=torch_dtype, + use_cache=False if training_args.gradient_checkpointing else True, + device_map=get_kbit_device_map() if quantization_config is not None else None, + quantization_config=quantization_config, + ) + model = Idefics2ForConditionalGeneration.from_pretrained(model_config.model_name_or_path, **model_kwargs) + peft_config = get_peft_config(model_config) + if peft_config is None: + model_ref = Idefics2ForConditionalGeneration.from_pretrained(model_config.model_name_or_path, **model_kwargs) + else: + model_ref = None + tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path) + processor = AutoProcessor.from_pretrained(model_config.model_name_or_path, do_image_splitting=False) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + if tokenizer.chat_template is None: + tokenizer.chat_template = "{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\n\n'}}{% endfor %}{{ eos_token }}" + if args.ignore_bias_buffers: + # torch distributed hack + model._ddp_params_and_buffers_to_ignore = [ + name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool + ] + + ################ + # Optional rich context managers + ############### + init_context = nullcontext() if not TRL_USE_RICH else console.status("[bold green]Initializing the DPOTrainer...") + save_context = ( + nullcontext() + if not TRL_USE_RICH + else console.status(f"[bold green]Training completed! Saving the model to {training_args.output_dir}") + ) + + ################ + # Dataset + ################ + ds = load_dataset(args.dataset_name) + if args.sanity_check: + for key in ds: + ds[key] = ds[key].select(range(50)) + + def process(row): + row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False) + row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False) + return row + + ds = ds.map( + process, + # num_proc=multiprocessing.cpu_count(), + load_from_cache_file=False, + ) + train_dataset = ds[args.dataset_train_split] + eval_dataset = ds[args.dataset_test_split] + + ################ + # Training + ################ + with init_context: + trainer = DPOTrainer( + model, + model_ref, + args=training_args, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + tokenizer=tokenizer, + peft_config=get_peft_config(model_config), + callbacks=[RichProgressCallback] if TRL_USE_RICH else None, + ) + + trainer.train() + + with save_context: + trainer.save_model(training_args.output_dir) diff --git a/trl/trainer/model_config.py b/trl/trainer/model_config.py index c30fa4ae497..b16a07421db 100644 --- a/trl/trainer/model_config.py +++ b/trl/trainer/model_config.py @@ -86,5 +86,5 @@ def __post_init__(self): if self.load_in_8bit and self.load_in_4bit: raise ValueError("You can't use 8 bit and 4 bit precision at the same time") - if self.lora_target_modules == ["all-linear"]: - self.lora_target_modules = "all-linear" + if isinstance(self.lora_target_modules, list) and len(self.lora_target_modules) == 1: + self.lora_target_modules = self.lora_target_modules[0] From 682c0345c53ff4656908fbcdbc673a26c20e208e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 27 May 2024 18:17:28 +0000 Subject: [PATCH 04/43] sft idefics --- examples/scripts/sft_idefics.py | 113 ++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 examples/scripts/sft_idefics.py diff --git a/examples/scripts/sft_idefics.py b/examples/scripts/sft_idefics.py new file mode 100644 index 00000000000..6a1d5eea575 --- /dev/null +++ b/examples/scripts/sft_idefics.py @@ -0,0 +1,113 @@ +""" +`CUDA_VISIBLE_DEVICES=1 python mre.py` works fine +without bnb: `CUDA_VISIBLE_DEVICES=1 python mre.py` doesn't work (diverges) +`accelerate launch mre.py` diverges + +Seems to be training without bnb that fails! +""" + +import torch +from datasets import load_dataset +from peft import LoraConfig +from transformers import AutoProcessor, Idefics2ForConditionalGeneration, Trainer, TrainingArguments, BitsAndBytesConfig +from trl import get_kbit_device_map + +USE_QLORA = True # QLora + +if __name__ == "__main__": + # Load the model and processor + model_name = "HuggingFaceM4/idefics2-8b" + if USE_QLORA: + quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16) + else: + quantization_config = None + model = Idefics2ForConditionalGeneration.from_pretrained( + model_name, + torch_dtype=torch.float16, + quantization_config=quantization_config, + device_map=get_kbit_device_map() if quantization_config is not None else None, + ) + lora_config = LoraConfig( + r=8, + lora_alpha=8, + lora_dropout=0.1, + target_modules=".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$", + init_lora_weights="gaussian", + use_dora=False if USE_QLORA else True, + ) + model.add_adapter(lora_config) + model.enable_adapters() + + processor = AutoProcessor.from_pretrained(model_name, do_image_splitting=False) + + # Load a dataset + dataset = load_dataset("HuggingFaceH4/llava-instruct-mix-vsft") + # dataset = load_dataset("HuggingFaceH4/cord-v2") + + # Process the dataset + def data_collator(examples, add_generation_prompt=False): + messages = [example["messages"] for example in examples] + images = [example["images"] for example in examples] + text = processor.apply_chat_template(messages, add_generation_prompt=add_generation_prompt) + batch = processor(text, images, return_tensors="pt", padding=True) + labels = batch["input_ids"].clone() + if processor.tokenizer.pad_token_id is not None: + image_token = processor.tokenizer("", add_special_tokens=False).input_ids[0] + labels[labels == processor.tokenizer.pad_token_id] = image_token + batch["labels"] = labels + return batch + + # Test before training + # example = dataset["test"][0] + # example["messages"] = example["messages"][:-1] # remove the last message (it's the answer) + # example["images"][0].save("image.jpg") + # inputs = data_collator([example], add_generation_prompt=True) + # exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids + # bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids + # generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_new_tokens=1000) + # generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True) + # for i, t in enumerate(generated_text): + # print(f"{i}:\n{t}\n") + + trainer = Trainer( + model=model, + args=TrainingArguments( + output_dir="./results", + per_device_train_batch_size=1, + gradient_accumulation_steps=8, + logging_steps=10, + num_train_epochs=1, + logging_dir="./logs", + remove_unused_columns=False, + max_grad_norm=1.0, + ), + train_dataset=dataset["train"], + data_collator=data_collator, + ) + + trainer.train() + + # Save the model + model.save_pretrained("idefics2-8b-fst-llava-instruct-mix") + + # Test after training + # example = dataset["test"][0] + # example["messages"] = example["messages"][:-1] # remove the last message (it's the answer) + # inputs = data_collator([example], add_generation_prompt=True) + # exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids + # bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids + # generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_new_tokens=1000) + # generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True) + # for i, t in enumerate(generated_text): + # print(f"{i}:\n{t}\n") + + +# accelerate launch python sft_idefics.py +# OK + +# Issues: + +# python mre.py +# TypeError: DynamicCache.__init__() takes 1 positional argument but 2 were given + +# python mre.py with LORA and no QLORA, diverges (all numbers of devices) From bf01bf306a804b5d0aba34ac268bf2b85d8518f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 30 May 2024 18:25:18 +0000 Subject: [PATCH 05/43] pad with test --- tests/test_utils.py | 60 ++++++++++++++++++++++++++++++++++ trl/trainer/utils.py | 78 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 128 insertions(+), 10 deletions(-) create mode 100644 tests/test_utils.py diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000000..e50e383db7f --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,60 @@ +import unittest + +import torch + +from trl.trainer.utils import pad + + +class TestPad(unittest.TestCase): + def test_pad_1_dim_left(self): + x = torch.tensor([1, 2, 3]) + y = torch.tensor([4, 5]) + output = pad((x, y), padding_value=0, padding_side="left") + expected = torch.tensor([[1, 2, 3], [0, 4, 5]]) + self.assertTrue(torch.equal(output, expected)) + + def test_pad_1_dim_right(self): + x = torch.tensor([1, 2, 3]) + y = torch.tensor([4, 5]) + output = pad((x, y), padding_value=0, padding_side="right") + expected = torch.tensor([[1, 2, 3], [4, 5, 0]]) + self.assertTrue(torch.equal(output, expected)) + + def test_pad_2_dim_left(self): + x = torch.tensor([[1, 2], [3, 4]]) + y = torch.tensor([[5, 6]]) + output = pad((x, y), padding_value=0, padding_side="left") + expected = torch.tensor( + [ + [[1, 2], [3, 4]], + [[0, 0], [5, 6]], + ] + ) + self.assertTrue(torch.equal(output, expected)) + + def test_pad_2_dim_right(self): + x = torch.tensor([[1, 2], [3, 4]]) + y = torch.tensor([[5, 6]]) + output = pad((x, y), padding_value=0, padding_side="right") + expected = torch.tensor( + [ + [[1, 2], [3, 4]], + [[5, 6], [0, 0]], + ] + ) + self.assertTrue(torch.equal(output, expected)) + + def test_pad_2_dim_right_multidim(self): + x = torch.tensor([[1, 2], [3, 4]]) + y = torch.tensor([[5]]) + output = pad((x, y), padding_value=0, padding_side="right") + expected = torch.tensor( + [ + [[1, 2], [3, 4]], + [[5, 0], [0, 0]], + ] + ) + self.assertTrue(torch.equal(output, expected)) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index d959e7edff8..ec0065def35 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -296,6 +296,55 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: return batch + +def pad(tensors: List[torch.Tensor], padding_value: int = 0, padding_side: str = "right") -> torch.Tensor: + """ + Pads a list of tensors to the same shape along the first dimension. + + Args: + tensors (`List[torch.Tensor]`): + List of input tensors to pad. + padding_value (`int`): + Value to use for padding. Default is 0. + padding_side (`str`): + Side on which to add padding. Must be 'left' or 'right'. Default is 'right'. + + Returns: + `torch.Tensor`: + A single tensor containing the padded tensors. + + Examples: + >>> import torch + >>> pad([torch.tensor([1, 2, 3]), torch.tensor([4, 5])]) + tensor([[1, 2, 3], + [4, 5, 0]]) + >>> pad([torch.tensor([[1, 2], [3, 4]]), torch.tensor([[5, 6]])]) + tensor([[[1, 2], + [3, 4]], + + [[5, 6], + [0, 0]]]) + """ + # Determine the maximum shape for each dimension + output_shape = np.max([t.shape for t in tensors], 0).tolist() + + # Create an output tensor filled with the padding value + output = torch.full((len(tensors), *output_shape), padding_value, dtype=tensors[0].dtype, device=tensors[0].device) + + for i, t in enumerate(tensors): + # Determine the slice for the sequence dimension + if padding_side == "left": + seq_slice = slice(output_shape[0] - t.shape[0], output_shape[0]) + elif padding_side == "right": + seq_slice = slice(0, t.shape[0]) + else: + raise ValueError("padding_side must be 'left' or 'right'") + + slices = (seq_slice,) + tuple(slice(0, s) for s in t.shape[1:]) + output[i][slices] = t + + return output + @dataclass class DPODataCollatorWithPadding: r""" @@ -317,7 +366,7 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: # first, pad everything to the same length padded_batch = {} for k in features[0].keys(): - if k.endswith("_input_ids") or k.endswith("_attention_mask") or k.endswith("_labels"): + if k.endswith(("_input_ids", "_attention_mask", "_labels", "_pixel_values")): if self.is_encoder_decoder: to_pad = [torch.LongTensor(ex[k]) for ex in features] @@ -337,11 +386,7 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: raise ValueError(f"Unexpected key in batch '{k}'") padded_batch[k] = pad_sequence(to_pad, batch_first=True, padding_value=padding_value) else: - # adapted from https://stackoverflow.com/questions/73256206 - if "prompt" in k: - to_pad = [torch.LongTensor(ex[k][::-1]) for ex in features] - else: - to_pad = [torch.LongTensor(ex[k]) for ex in features] + # Set padding value based on the key if k.endswith("_input_ids"): if self.pad_token_id is None: raise ValueError( @@ -354,13 +399,26 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: padding_value = self.label_pad_token_id elif k.endswith("_attention_mask"): padding_value = 0 + elif k.endswith("_pixel_values"): + padding_value = 0 # TODO: check if this is correct else: raise ValueError(f"Unexpected key in batch '{k}'") - padded_batch[k] = pad_sequence(to_pad, batch_first=True, padding_value=padding_value) - # for the prompt, flip back so padding is on left side - if "prompt" in k: - padded_batch[k] = padded_batch[k].flip(dims=[1]) + # Set padding side based on the key + if k in ["prompt_input_ids", "prompt_attention_mask"]: + padding_side = "left" + else: + padding_side = "right" + + # Set the dtype + if k.endswith("_pixel_values"): + dtype = torch.float32 + else: + dtype = torch.int64 + + # Convert to tensor and pad + to_pad = [torch.tensor(ex[k], dtype=dtype) for ex in features] + padded_batch[k] = pad(to_pad, padding_value=padding_value, padding_side=padding_side) elif k.endswith("_logps"): # the cached reference model logprobs padded_batch[k] = torch.tensor([ex[k] for ex in features]) From aed1aebddd9b9247ea9915654ad50a7b2a06d46d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 30 May 2024 18:25:45 +0000 Subject: [PATCH 06/43] use prompt instead of tokenizer --- examples/scripts/vdpo.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py index f0510e0d7b3..da9ddf9972e 100644 --- a/examples/scripts/vdpo.py +++ b/examples/scripts/vdpo.py @@ -130,6 +130,9 @@ model._ddp_params_and_buffers_to_ignore = [ name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool ] + processor.pad_token_id = tokenizer.pad_token_id + processor.bos_token_id = tokenizer.bos_token_id # needed for DPOTrainer + processor.eos_token_id = tokenizer.eos_token_id # needed for DPOTrainer ################ # Optional rich context managers @@ -150,6 +153,7 @@ ds[key] = ds[key].select(range(50)) def process(row): + row["prompt"] = processor.apply_chat_template(row["prompt"], tokenize=False) row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False) row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False) return row @@ -172,7 +176,7 @@ def process(row): args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, - tokenizer=tokenizer, + tokenizer=processor, peft_config=get_peft_config(model_config), callbacks=[RichProgressCallback] if TRL_USE_RICH else None, ) From e814f88f50e0abaf24056428dfb14c828d988f34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 30 May 2024 18:28:31 +0000 Subject: [PATCH 07/43] rm name main --- tests/test_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index e50e383db7f..cd8d7871f74 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -56,5 +56,3 @@ def test_pad_2_dim_right_multidim(self): ) self.assertTrue(torch.equal(output, expected)) -if __name__ == "__main__": - unittest.main() \ No newline at end of file From fd5d71b8801a4c4cc0af139477c8ae480a8327a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 30 May 2024 18:38:46 +0000 Subject: [PATCH 08/43] support vlm in tokenize row --- trl/trainer/dpo_trainer.py | 61 ++++++++++++++++++++++++++++---------- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 36ce79d3a4d..5aa57cf1b75 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -314,6 +314,12 @@ def make_inputs_require_grad(module, input, output): else: self.is_encoder_decoder = args.is_encoder_decoder + if model is not None: + self.is_vision_model = model.config.model_type in ["idefics2"] # TODO: find a better way to check if its a vision model + else: + warnings.warn("No model provided, cannot determine if it is a vision model. Setting is_vision_model to False.") + self.is_vision_model = False + self.is_peft_model = is_peft_available() and isinstance(model, PeftModel) if model_adapter_name is not None: warnings.warn( @@ -489,9 +495,9 @@ def make_inputs_require_grad(module, input, output): # see: https://github.com/huggingface/trl/pull/1255 with PartialState().local_main_process_first(): # tokenize the dataset - train_dataset = train_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc) + train_dataset = train_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, load_from_cache_file=False) if eval_dataset is not None: - eval_dataset = eval_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc) + eval_dataset = eval_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, load_from_cache_file=False) super().__init__( model=model, @@ -663,16 +669,22 @@ def get_eval_dataloader(self, eval_dataset: Optional[Dataset] = None) -> DataLoa return super().get_eval_dataloader(eval_dataset=eval_dataset) - def build_tokenized_answer(self, prompt, answer): + def build_tokenized_answer(self, prompt, answer, images=None): """ Llama tokenizer does satisfy `enc(a + b) = enc(a) + enc(b)`. It does ensure `enc(a + b) = enc(a) + enc(a + b)[len(enc(a)):]`. Reference: https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257 """ - - full_tokenized = self.tokenizer(prompt + answer, add_special_tokens=False) - prompt_input_ids = self.tokenizer(prompt, add_special_tokens=False)["input_ids"] + if self.is_vision_model: + if answer.count("") > 0: + raise NotImplementedError("Answer contains token, which is not supported yet.") + full_tokenized = self.tokenizer(prompt + answer, images=images, add_special_tokens=False) + full_tokenized = {k: v[0] for k, v in full_tokenized.items()} # Unbatch, not done when using idefics + prompt_input_ids = self.tokenizer(prompt, images=images, add_special_tokens=False)["input_ids"][0] + else: + full_tokenized = self.tokenizer(prompt + answer, add_special_tokens=False) + prompt_input_ids = self.tokenizer(prompt, add_special_tokens=False)["input_ids"] answer_input_ids = full_tokenized["input_ids"][len(prompt_input_ids) :] answer_attention_mask = full_tokenized["attention_mask"][len(prompt_input_ids) :] @@ -706,12 +718,23 @@ def build_tokenized_answer(self, prompt, answer): answer_input_ids = full_tokenized["input_ids"][response_token_ids_start_idx:] answer_attention_mask = full_tokenized["attention_mask"][response_token_ids_start_idx:] - return dict( - prompt_input_ids=prompt_input_ids, - prompt_attention_mask=prompt_attention_mask, - input_ids=answer_input_ids, - attention_mask=answer_attention_mask, - ) + if self.is_vision_model: + return dict( + prompt_input_ids=prompt_input_ids, + prompt_attention_mask=prompt_attention_mask, + prompt_pixel_values=full_tokenized["pixel_values"], + prompt_pixel_attention_mask=full_tokenized["pixel_attention_mask"], + input_ids=answer_input_ids, + attention_mask=answer_attention_mask, + ) + else: + return dict( + prompt_input_ids=prompt_input_ids, + prompt_attention_mask=prompt_attention_mask, + input_ids=answer_input_ids, + attention_mask=answer_attention_mask, + pixel_value=full_tokenized + ) def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module]] = None) -> Dict: """Tokenize a single row from a DPO specific dataset. @@ -728,6 +751,8 @@ def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module prompt = feature["prompt"] chosen = feature["chosen"] rejected = feature["rejected"] + if self.is_vision_model: + images = feature["images"] if not self.is_encoder_decoder: # Check issues below for more details @@ -737,16 +762,22 @@ def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module if not isinstance(prompt, str): raise ValueError(f"prompt should be an str but got {type(prompt)}") - prompt_tokens = self.tokenizer(prompt, add_special_tokens=False) + if self.is_vision_model: + prompt_tokens = self.tokenizer(prompt, images=images, add_special_tokens=False) + prompt_tokens = {k: v[0] for k, v in prompt_tokens.items()} # Unbatch, not done when using idefics + else: + prompt_tokens = self.tokenizer(prompt, add_special_tokens=False) + prompt_tokens = {f"prompt_{k}": v for k, v in prompt_tokens.items()} if not isinstance(chosen, str): raise ValueError(f"chosen should be an str but got {type(chosen)}") - chosen_tokens = self.build_tokenized_answer(prompt, chosen) + + chosen_tokens = self.build_tokenized_answer(prompt, chosen, images) if not isinstance(rejected, str): raise ValueError(f"rejected should be an str but got {type(rejected)}") - rejected_tokens = self.build_tokenized_answer(prompt, rejected) + rejected_tokens = self.build_tokenized_answer(prompt, rejected, images) # Last prompt token might get merged by tokenizer and # it should not be included for generation if that happens From e1b87552e2579ffdceae38a10e23e39e2ddb5c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 30 May 2024 18:39:32 +0000 Subject: [PATCH 09/43] temp fix for regex in lora_target_module --- trl/trainer/model_config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/trl/trainer/model_config.py b/trl/trainer/model_config.py index b16a07421db..6bb50031ed8 100644 --- a/trl/trainer/model_config.py +++ b/trl/trainer/model_config.py @@ -86,5 +86,7 @@ def __post_init__(self): if self.load_in_8bit and self.load_in_4bit: raise ValueError("You can't use 8 bit and 4 bit precision at the same time") + # if self.lora_target_modules == ["all-linear"]: + # self.lora_target_modules = "all-linear" if isinstance(self.lora_target_modules, list) and len(self.lora_target_modules) == 1: self.lora_target_modules = self.lora_target_modules[0] From 8075419b7dbd788fa75494f32165cd56939d0121 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 31 May 2024 13:54:38 +0000 Subject: [PATCH 10/43] format --- trl/trainer/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index ec0065def35..8d9adf43370 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -312,7 +312,7 @@ def pad(tensors: List[torch.Tensor], padding_value: int = 0, padding_side: str = Returns: `torch.Tensor`: A single tensor containing the padded tensors. - + Examples: >>> import torch >>> pad([torch.tensor([1, 2, 3]), torch.tensor([4, 5])]) From 1b815c2888b2e0328ca05457e2f6a8672a6ff65c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Fri, 31 May 2024 18:14:50 +0000 Subject: [PATCH 11/43] vdpo --- examples/scripts/vdpo.py | 57 +++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py index da9ddf9972e..f5bcd326f2e 100644 --- a/examples/scripts/vdpo.py +++ b/examples/scripts/vdpo.py @@ -31,24 +31,22 @@ # peft: python examples/scripts/vdpo.py \ - --dataset_name=HuggingFaceH4/vqa_preferences \ - --model_name_or_path=HuggingFaceM4/idefics2-8b \ - --per_device_train_batch_size 4 \ - --learning_rate 1e-3 \ - --gradient_accumulation_steps 1 \ - --logging_steps 10 \ - --eval_steps 500 \ - --output_dir="dpo_anthropic_hh" \ - --optim rmsprop \ - --warmup_steps 150 \ + --dataset_name HuggingFaceH4/vqa_preferences \ + --model_name_or_path HuggingFaceM4/idefics2-8b \ + --per_device_train_batch_size 8 \ + --learning_rate 1e-5 \ + --gradient_accumulation_steps 8 \ + --logging_steps 5 \ + --output_dir dpo_idefics \ + --warmup_steps 10 \ --report_to wandb \ --bf16 \ + --torch_dtype bfloat16 \ --logging_first_step \ --no_remove_unused_columns \ --use_peft \ - --load_in_4bit \ + --dataloader_num_workers 8 --lora_target_module .*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$ - """ import logging @@ -59,7 +57,7 @@ TRL_USE_RICH = os.environ.get("TRL_USE_RICH", False) from trl.commands.cli_utils import DPOScriptArguments, init_zero_verbose, TrlParser - +from accelerate import PartialState if TRL_USE_RICH: init_zero_verbose() FORMAT = "%(message)s" @@ -69,7 +67,7 @@ import torch from datasets import load_dataset -from transformers import Idefics2ForConditionalGeneration, AutoTokenizer, AutoProcessor +from transformers import AutoModelForVision2Seq, AutoProcessor from trl import ( DPOConfig, @@ -98,11 +96,7 @@ ################ # Model & Tokenizer ################ - torch_dtype = ( - model_config.torch_dtype - if model_config.torch_dtype in ["auto", None] - else getattr(torch, model_config.torch_dtype) - ) + torch_dtype = model_config.torch_dtype if model_config.torch_dtype in ["auto", None] else getattr(torch, model_config.torch_dtype) quantization_config = get_quantization_config(model_config) model_kwargs = dict( revision=model_config.model_revision, @@ -113,26 +107,26 @@ device_map=get_kbit_device_map() if quantization_config is not None else None, quantization_config=quantization_config, ) - model = Idefics2ForConditionalGeneration.from_pretrained(model_config.model_name_or_path, **model_kwargs) + model = AutoModelForVision2Seq.from_pretrained(model_config.model_name_or_path, **model_kwargs) peft_config = get_peft_config(model_config) if peft_config is None: - model_ref = Idefics2ForConditionalGeneration.from_pretrained(model_config.model_name_or_path, **model_kwargs) + model_ref = AutoModelForVision2Seq.from_pretrained(model_config.model_name_or_path, **model_kwargs) else: model_ref = None - tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path) processor = AutoProcessor.from_pretrained(model_config.model_name_or_path, do_image_splitting=False) + tokenizer = processor.tokenizer if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token if tokenizer.chat_template is None: tokenizer.chat_template = "{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\n\n'}}{% endfor %}{{ eos_token }}" if args.ignore_bias_buffers: # torch distributed hack - model._ddp_params_and_buffers_to_ignore = [ - name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool - ] + model._ddp_params_and_buffers_to_ignore = [name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool] + + # DPOTrainer needs the processor to have these attributes processor.pad_token_id = tokenizer.pad_token_id - processor.bos_token_id = tokenizer.bos_token_id # needed for DPOTrainer - processor.eos_token_id = tokenizer.eos_token_id # needed for DPOTrainer + processor.bos_token_id = tokenizer.bos_token_id + processor.eos_token_id = tokenizer.eos_token_id ################ # Optional rich context managers @@ -157,12 +151,9 @@ def process(row): row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False) row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False) return row - - ds = ds.map( - process, - # num_proc=multiprocessing.cpu_count(), - load_from_cache_file=False, - ) + + with PartialState().local_main_process_first(): + ds = ds.map(process, num_proc=multiprocessing.cpu_count()) train_dataset = ds[args.dataset_train_split] eval_dataset = ds[args.dataset_test_split] From 6d6a1946c094fc31e58b16b018443360d1b69470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 3 Jun 2024 09:45:08 +0000 Subject: [PATCH 12/43] tmp float16 hard code --- trl/trainer/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index 8d9adf43370..605e228e435 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -412,7 +412,7 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: # Set the dtype if k.endswith("_pixel_values"): - dtype = torch.float32 + dtype = torch.bfloat16 # TODO: tmp fix else: dtype = torch.int64 From 1935d3dee73b8ce5c794efc0e94b99c5c86f83e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 3 Jun 2024 09:45:40 +0000 Subject: [PATCH 13/43] concatenated_forward support for vision --- trl/trainer/dpo_trainer.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 5aa57cf1b75..4f67524d8e1 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -495,9 +495,9 @@ def make_inputs_require_grad(module, input, output): # see: https://github.com/huggingface/trl/pull/1255 with PartialState().local_main_process_first(): # tokenize the dataset - train_dataset = train_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, load_from_cache_file=False) + train_dataset = train_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, writer_batch_size=10) if eval_dataset is not None: - eval_dataset = eval_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, load_from_cache_file=False) + eval_dataset = eval_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, writer_batch_size=10) super().__init__( model=model, @@ -935,6 +935,7 @@ def compute_reference_log_probs(self, padded_batch: Dict) -> Dict: def concatenated_inputs( batch: Dict[str, Union[List, torch.LongTensor]], is_encoder_decoder: bool = False, + is_vision_model: bool = False, label_pad_token_id: int = -100, padding_value: int = 0, device: Optional[torch.device] = None, @@ -991,6 +992,9 @@ def concatenated_inputs( batch["prompt_attention_mask"].repeat(2, 1).to(device=device) ) + if is_vision_model: + concatenated_batch["pixel_values"] = batch["prompt_pixel_values"].repeat(2, 1, 1, 1, 1).to(device=device) + concatenated_batch["pixel_attention_mask"] = batch["prompt_pixel_attention_mask"].repeat(2, 1, 1, 1).to(device=device) return concatenated_batch def dpo_loss( @@ -1147,20 +1151,23 @@ def concatenated_forward( concatenated_batch = self.concatenated_inputs( batch, is_encoder_decoder=self.is_encoder_decoder, + is_vision_model=self.is_vision_model, label_pad_token_id=self.label_pad_token_id, padding_value=self.padding_value, device=self.accelerator.device, ) len_chosen = batch["chosen_labels"].shape[0] - model_kwargs = ( - { - "labels": concatenated_batch["concatenated_labels"], - "decoder_input_ids": concatenated_batch.pop("concatenated_decoder_input_ids", None), - } - if self.is_encoder_decoder - else {} - ) + model_kwargs = {} + + if self.is_encoder_decoder: + model_kwargs["labels"] = concatenated_batch["concatenated_labels"] + model_kwargs["decoder_input_ids"] = concatenated_batch.pop("concatenated_decoder_input_ids", None) + + if self.is_vision_model: + model_kwargs["pixel_values"] = concatenated_batch["pixel_values"] + model_kwargs["pixel_attention_mask"] = concatenated_batch["pixel_attention_mask"] + all_logits = model( concatenated_batch["concatenated_input_ids"], attention_mask=concatenated_batch["concatenated_attention_mask"], From bdc2b955bc17452c753cdc47ab203692e0e1f5df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 17 Jun 2024 13:06:26 +0000 Subject: [PATCH 14/43] style and new command line --- examples/scripts/vdpo.py | 48 ++++++++++++++------------------------ tests/test_utils.py | 1 - trl/trainer/dpo_trainer.py | 16 +++++++++---- trl/trainer/utils.py | 2 +- 4 files changed, 30 insertions(+), 37 deletions(-) diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py index f5bcd326f2e..19c2f7417d5 100644 --- a/examples/scripts/vdpo.py +++ b/examples/scripts/vdpo.py @@ -13,40 +13,20 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -# regular: # OOM error -python examples/scripts/vdpo.py \ - --dataset_name=HuggingFaceH4/vqa_preferences \ - --model_name_or_path=HuggingFaceM4/idefics2-8b \ - --per_device_train_batch_size 4 \ - --learning_rate 1e-3 \ - --gradient_accumulation_steps 1 \ - --logging_steps 10 \ - --eval_steps 500 \ - --output_dir="dpo_anthropic_hh" \ - --warmup_steps 150 \ - --report_to wandb \ - --bf16 \ - --logging_first_step \ - --no_remove_unused_columns - -# peft: -python examples/scripts/vdpo.py \ - --dataset_name HuggingFaceH4/vqa_preferences \ +accelerate launch examples/scripts/vdpo.py \ + --dataset_name HuggingFaceH4/rlaif-v_formatted \ --model_name_or_path HuggingFaceM4/idefics2-8b \ - --per_device_train_batch_size 8 \ + --per_device_train_batch_size 1 \ --learning_rate 1e-5 \ - --gradient_accumulation_steps 8 \ --logging_steps 5 \ - --output_dir dpo_idefics \ - --warmup_steps 10 \ - --report_to wandb \ + --output_dir dpo_idefics_rlaif-v \ --bf16 \ --torch_dtype bfloat16 \ --logging_first_step \ --no_remove_unused_columns \ + --sanity_check \ --use_peft \ - --dataloader_num_workers 8 - --lora_target_module .*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$ + --lora_target_modules=all-linear """ import logging @@ -58,6 +38,7 @@ from trl.commands.cli_utils import DPOScriptArguments, init_zero_verbose, TrlParser from accelerate import PartialState + if TRL_USE_RICH: init_zero_verbose() FORMAT = "%(message)s" @@ -96,8 +77,13 @@ ################ # Model & Tokenizer ################ - torch_dtype = model_config.torch_dtype if model_config.torch_dtype in ["auto", None] else getattr(torch, model_config.torch_dtype) + torch_dtype = ( + model_config.torch_dtype + if model_config.torch_dtype in ["auto", None] + else getattr(torch, model_config.torch_dtype) + ) quantization_config = get_quantization_config(model_config) + model_kwargs = dict( revision=model_config.model_revision, trust_remote_code=model_config.trust_remote_code, @@ -121,8 +107,10 @@ tokenizer.chat_template = "{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\n\n'}}{% endfor %}{{ eos_token }}" if args.ignore_bias_buffers: # torch distributed hack - model._ddp_params_and_buffers_to_ignore = [name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool] - + model._ddp_params_and_buffers_to_ignore = [ + name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool + ] + # DPOTrainer needs the processor to have these attributes processor.pad_token_id = tokenizer.pad_token_id processor.bos_token_id = tokenizer.bos_token_id @@ -151,7 +139,7 @@ def process(row): row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False) row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False) return row - + with PartialState().local_main_process_first(): ds = ds.map(process, num_proc=multiprocessing.cpu_count()) train_dataset = ds[args.dataset_train_split] diff --git a/tests/test_utils.py b/tests/test_utils.py index cd8d7871f74..5e5c3ec9c9b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -55,4 +55,3 @@ def test_pad_2_dim_right_multidim(self): ] ) self.assertTrue(torch.equal(output, expected)) - diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 4f67524d8e1..5af7703d69e 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -315,9 +315,11 @@ def make_inputs_require_grad(module, input, output): self.is_encoder_decoder = args.is_encoder_decoder if model is not None: - self.is_vision_model = model.config.model_type in ["idefics2"] # TODO: find a better way to check if its a vision model + self.is_vision_model = model.config.model_type in ["idefics2"] # TODO: find a better way else: - warnings.warn("No model provided, cannot determine if it is a vision model. Setting is_vision_model to False.") + warnings.warn( + "No model provided, cannot determine if it is a vision model. Setting is_vision_model to False." + ) self.is_vision_model = False self.is_peft_model = is_peft_available() and isinstance(model, PeftModel) @@ -497,7 +499,9 @@ def make_inputs_require_grad(module, input, output): # tokenize the dataset train_dataset = train_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, writer_batch_size=10) if eval_dataset is not None: - eval_dataset = eval_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, writer_batch_size=10) + eval_dataset = eval_dataset.map( + self.tokenize_row, num_proc=self.dataset_num_proc, writer_batch_size=10 + ) super().__init__( model=model, @@ -733,7 +737,7 @@ def build_tokenized_answer(self, prompt, answer, images=None): prompt_attention_mask=prompt_attention_mask, input_ids=answer_input_ids, attention_mask=answer_attention_mask, - pixel_value=full_tokenized + pixel_value=full_tokenized, ) def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module]] = None) -> Dict: @@ -994,7 +998,9 @@ def concatenated_inputs( if is_vision_model: concatenated_batch["pixel_values"] = batch["prompt_pixel_values"].repeat(2, 1, 1, 1, 1).to(device=device) - concatenated_batch["pixel_attention_mask"] = batch["prompt_pixel_attention_mask"].repeat(2, 1, 1, 1).to(device=device) + concatenated_batch["pixel_attention_mask"] = ( + batch["prompt_pixel_attention_mask"].repeat(2, 1, 1, 1).to(device=device) + ) return concatenated_batch def dpo_loss( diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index 605e228e435..c2a25b0fb1f 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -296,7 +296,6 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: return batch - def pad(tensors: List[torch.Tensor], padding_value: int = 0, padding_side: str = "right") -> torch.Tensor: """ Pads a list of tensors to the same shape along the first dimension. @@ -345,6 +344,7 @@ def pad(tensors: List[torch.Tensor], padding_value: int = 0, padding_side: str = return output + @dataclass class DPODataCollatorWithPadding: r""" From 24b08f51bfe66005e1656ee9a0f9e8d1d0579617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Mon, 17 Jun 2024 13:13:04 +0000 Subject: [PATCH 15/43] all-linear --- trl/trainer/model_config.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/trl/trainer/model_config.py b/trl/trainer/model_config.py index 6bb50031ed8..b16a07421db 100644 --- a/trl/trainer/model_config.py +++ b/trl/trainer/model_config.py @@ -86,7 +86,5 @@ def __post_init__(self): if self.load_in_8bit and self.load_in_4bit: raise ValueError("You can't use 8 bit and 4 bit precision at the same time") - # if self.lora_target_modules == ["all-linear"]: - # self.lora_target_modules = "all-linear" if isinstance(self.lora_target_modules, list) and len(self.lora_target_modules) == 1: self.lora_target_modules = self.lora_target_modules[0] From c5ff8d71eab03af332d8640b2902b9a3e07b0925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 18 Jun 2024 07:52:54 +0000 Subject: [PATCH 16/43] format --- examples/scripts/sft_idefics.py | 14 ++++++++++++-- examples/scripts/vdpo.py | 10 +++++++++- examples/scripts/vsft_idefics2.py | 16 ++++++++++------ examples/scripts/vsft_llava.py | 12 +++++------- 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/examples/scripts/sft_idefics.py b/examples/scripts/sft_idefics.py index 6a1d5eea575..e942d2e34db 100644 --- a/examples/scripts/sft_idefics.py +++ b/examples/scripts/sft_idefics.py @@ -9,16 +9,26 @@ import torch from datasets import load_dataset from peft import LoraConfig -from transformers import AutoProcessor, Idefics2ForConditionalGeneration, Trainer, TrainingArguments, BitsAndBytesConfig +from transformers import ( + AutoProcessor, + BitsAndBytesConfig, + Idefics2ForConditionalGeneration, + Trainer, + TrainingArguments, +) + from trl import get_kbit_device_map + USE_QLORA = True # QLora if __name__ == "__main__": # Load the model and processor model_name = "HuggingFaceM4/idefics2-8b" if USE_QLORA: - quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16) + quantization_config = BitsAndBytesConfig( + load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16 + ) else: quantization_config = None model = Idefics2ForConditionalGeneration.from_pretrained( diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py index 19c2f7417d5..7f752ab4dde 100644 --- a/examples/scripts/vdpo.py +++ b/examples/scripts/vdpo.py @@ -17,14 +17,17 @@ --dataset_name HuggingFaceH4/rlaif-v_formatted \ --model_name_or_path HuggingFaceM4/idefics2-8b \ --per_device_train_batch_size 1 \ + --gradient_accumulation_steps 16 \ --learning_rate 1e-5 \ --logging_steps 5 \ --output_dir dpo_idefics_rlaif-v \ + --push_to_hub --hub_model_id HuggingFaceH4/idefics2-8b-dpo-rlaif-v \ --bf16 \ --torch_dtype bfloat16 \ --logging_first_step \ --no_remove_unused_columns \ - --sanity_check \ + --dataset_num_proc 50 \ + --dataload_num_workers 16 \ --use_peft \ --lora_target_modules=all-linear """ @@ -138,6 +141,11 @@ def process(row): row["prompt"] = processor.apply_chat_template(row["prompt"], tokenize=False) row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False) row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False) + for idx, img in enumerate(row["images"]): # Resize image so that the largest side is 640 + ratio = min(1.0, 640 / max(img.size)) + new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio)) + row["images"][idx] = img.resize(new_size) + row["images"] = row["images"] return row with PartialState().local_main_process_first(): diff --git a/examples/scripts/vsft_idefics2.py b/examples/scripts/vsft_idefics2.py index f74867afc64..c5516ed6b94 100644 --- a/examples/scripts/vsft_idefics2.py +++ b/examples/scripts/vsft_idefics2.py @@ -15,19 +15,19 @@ """ # regular: python examples/scripts/vsft_idefics2.py \ - --dataset_name=HuggingFaceH4/cord-v2 \ --model_name_or_path=HuggingFaceM4/idefics2-8b \ + --dataset_name=HuggingFaceH4/cord-v2 \ --report_to=wandb \ - --learning_rate=1e-4 \ - --per_device_train_batch_size=2 \ - --gradient_accumulation_steps=8 \ + --learning_rate=2.0e-5 \ + --per_device_train_batch_size=8 \ + --gradient_accumulation_steps=1 \ --output_dir=data/vsft-idefics2 \ --logging_steps=5 \ --num_train_epochs=1 \ --push_to_hub \ --gradient_checkpointing \ --remove_unused_columns=False \ - --torch_dtype=float16 + --torch_dtype=bfloat16 # peft: @@ -115,7 +115,11 @@ ################ # IDEFICS2_CHAT_TEMPLATE = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}""" - torch_dtype = model_config.torch_dtype if model_config.torch_dtype in ["auto", None] else getattr(torch, model_config.torch_dtype) + torch_dtype = ( + model_config.torch_dtype + if model_config.torch_dtype in ["auto", None] + else getattr(torch, model_config.torch_dtype) + ) quantization_config = get_quantization_config(model_config) model_kwargs = dict( revision=model_config.model_revision, diff --git a/examples/scripts/vsft_llava.py b/examples/scripts/vsft_llava.py index 8a26477737d..59753927faf 100644 --- a/examples/scripts/vsft_llava.py +++ b/examples/scripts/vsft_llava.py @@ -18,9 +18,9 @@ --dataset_name="HuggingFaceH4/llava-instruct-mix-vsft" \ --model_name_or_path="llava-hf/llava-1.5-7b-hf" \ --report_to="wandb" \ - --learning_rate=1.4e-5 \ - --per_device_train_batch_size=8 \ - --gradient_accumulation_steps=1 \ + --learning_rate=1.4e-7 \ + --per_device_train_batch_size=2 \ + --gradient_accumulation_steps=32 \ --output_dir="data/vsft-llava-1.5-7b-hf" \ --logging_steps=5 \ --num_train_epochs=1 \ @@ -28,16 +28,15 @@ --gradient_checkpointing \ --remove_unused_columns=False \ --torch_dtype=float16 \ - --fp16=True # peft: python examples/scripts/vsft_llava.py \ - --dataset_name="HuggingFaceH4/llava-instruct-mix-vsft" \ + --dataset_name="HuggingFaceH4/llava-instruct-mix" \ --model_name_or_path="llava-hf/llava-1.5-7b-hf" \ --report_to="wandb" \ --learning_rate=1.4e-5 \ --per_device_train_batch_size=8 \ - --gradient_accumulation_steps=1 \ + --gradient_accumulation_steps=128 \ --output_dir="data/vsft-llava-1.5-7b-hf" \ --logging_steps=5 \ --num_train_epochs=1 \ @@ -45,7 +44,6 @@ --gradient_checkpointing \ --remove_unused_columns=False \ --torch_dtype=float16 \ - --fp16=True \ --use_peft=True \ --lora_r=64 \ --lora_alpha=16 \ From a7d17327df6d1b76d64f213ddd335be2407b0160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 18 Jun 2024 07:55:21 +0000 Subject: [PATCH 17/43] delete old examples --- examples/scripts/sft_idefics.py | 123 ---------------- examples/scripts/vsft_idefics2.py | 230 ------------------------------ 2 files changed, 353 deletions(-) delete mode 100644 examples/scripts/sft_idefics.py delete mode 100644 examples/scripts/vsft_idefics2.py diff --git a/examples/scripts/sft_idefics.py b/examples/scripts/sft_idefics.py deleted file mode 100644 index e942d2e34db..00000000000 --- a/examples/scripts/sft_idefics.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -`CUDA_VISIBLE_DEVICES=1 python mre.py` works fine -without bnb: `CUDA_VISIBLE_DEVICES=1 python mre.py` doesn't work (diverges) -`accelerate launch mre.py` diverges - -Seems to be training without bnb that fails! -""" - -import torch -from datasets import load_dataset -from peft import LoraConfig -from transformers import ( - AutoProcessor, - BitsAndBytesConfig, - Idefics2ForConditionalGeneration, - Trainer, - TrainingArguments, -) - -from trl import get_kbit_device_map - - -USE_QLORA = True # QLora - -if __name__ == "__main__": - # Load the model and processor - model_name = "HuggingFaceM4/idefics2-8b" - if USE_QLORA: - quantization_config = BitsAndBytesConfig( - load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16 - ) - else: - quantization_config = None - model = Idefics2ForConditionalGeneration.from_pretrained( - model_name, - torch_dtype=torch.float16, - quantization_config=quantization_config, - device_map=get_kbit_device_map() if quantization_config is not None else None, - ) - lora_config = LoraConfig( - r=8, - lora_alpha=8, - lora_dropout=0.1, - target_modules=".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$", - init_lora_weights="gaussian", - use_dora=False if USE_QLORA else True, - ) - model.add_adapter(lora_config) - model.enable_adapters() - - processor = AutoProcessor.from_pretrained(model_name, do_image_splitting=False) - - # Load a dataset - dataset = load_dataset("HuggingFaceH4/llava-instruct-mix-vsft") - # dataset = load_dataset("HuggingFaceH4/cord-v2") - - # Process the dataset - def data_collator(examples, add_generation_prompt=False): - messages = [example["messages"] for example in examples] - images = [example["images"] for example in examples] - text = processor.apply_chat_template(messages, add_generation_prompt=add_generation_prompt) - batch = processor(text, images, return_tensors="pt", padding=True) - labels = batch["input_ids"].clone() - if processor.tokenizer.pad_token_id is not None: - image_token = processor.tokenizer("", add_special_tokens=False).input_ids[0] - labels[labels == processor.tokenizer.pad_token_id] = image_token - batch["labels"] = labels - return batch - - # Test before training - # example = dataset["test"][0] - # example["messages"] = example["messages"][:-1] # remove the last message (it's the answer) - # example["images"][0].save("image.jpg") - # inputs = data_collator([example], add_generation_prompt=True) - # exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids - # bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids - # generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_new_tokens=1000) - # generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True) - # for i, t in enumerate(generated_text): - # print(f"{i}:\n{t}\n") - - trainer = Trainer( - model=model, - args=TrainingArguments( - output_dir="./results", - per_device_train_batch_size=1, - gradient_accumulation_steps=8, - logging_steps=10, - num_train_epochs=1, - logging_dir="./logs", - remove_unused_columns=False, - max_grad_norm=1.0, - ), - train_dataset=dataset["train"], - data_collator=data_collator, - ) - - trainer.train() - - # Save the model - model.save_pretrained("idefics2-8b-fst-llava-instruct-mix") - - # Test after training - # example = dataset["test"][0] - # example["messages"] = example["messages"][:-1] # remove the last message (it's the answer) - # inputs = data_collator([example], add_generation_prompt=True) - # exit_condition = processor.tokenizer("", add_special_tokens=False).input_ids - # bad_words_ids = processor.tokenizer(["", ""], add_special_tokens=False).input_ids - # generated_ids = model.generate(**inputs, eos_token_id=exit_condition, bad_words_ids=bad_words_ids, max_new_tokens=1000) - # generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True) - # for i, t in enumerate(generated_text): - # print(f"{i}:\n{t}\n") - - -# accelerate launch python sft_idefics.py -# OK - -# Issues: - -# python mre.py -# TypeError: DynamicCache.__init__() takes 1 positional argument but 2 were given - -# python mre.py with LORA and no QLORA, diverges (all numbers of devices) diff --git a/examples/scripts/vsft_idefics2.py b/examples/scripts/vsft_idefics2.py deleted file mode 100644 index c5516ed6b94..00000000000 --- a/examples/scripts/vsft_idefics2.py +++ /dev/null @@ -1,230 +0,0 @@ -# flake8: noqa -# Copyright 2024 The HuggingFace Inc. team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -# regular: -python examples/scripts/vsft_idefics2.py \ - --model_name_or_path=HuggingFaceM4/idefics2-8b \ - --dataset_name=HuggingFaceH4/cord-v2 \ - --report_to=wandb \ - --learning_rate=2.0e-5 \ - --per_device_train_batch_size=8 \ - --gradient_accumulation_steps=1 \ - --output_dir=data/vsft-idefics2 \ - --logging_steps=5 \ - --num_train_epochs=1 \ - --push_to_hub \ - --gradient_checkpointing \ - --remove_unused_columns=False \ - --torch_dtype=bfloat16 - - -# peft: -python examples/scripts/vsft_idefics2.py \ - --model_name_or_path="HuggingFaceM4/idefics2-tfrm-compatible" \ - --report_to="wandb" \ - --learning_rate=1.4e-5 \ - --per_device_train_batch_size=8 \ - --gradient_accumulation_steps=1 \ - --output_dir="data/vsft-llava-1.5-7b-hf" \ - --logging_steps=5 \ - --num_train_epochs=1 \ - --push_to_hub \ - --gradient_checkpointing \ - --remove_unused_columns=False \ - --torch_dtype=float16 \ - --fp16=True \ - --dataset_name=HuggingFaceH4/llava-instruct-mix-vsft \ - --use_peft=True \ - --lora_r=64 \ - --lora_alpha=16 \ - --lora_target_modules=all-linear" - -# evaluation: - -To evaluate, first install the lmms-eval framework: pip install git+https://github.com/EvolvingLMMs-Lab/lmms-eval.git -then run: -accelerate launch --num_processes=8 -m lmms_eval \ - --model llava_hf \ - --model_args pretrained=llava-hf/llava-1.5-7b-hf \ - --tasks mmbench \ - --batch_size 1 \ - --output_path ./logs/ \ - --log_sample -""" -import logging -import os -from contextlib import nullcontext - -TRL_USE_RICH = os.environ.get("TRL_USE_RICH", False) - -from trl.commands.cli_utils import init_zero_verbose, TrlParser, SFTScriptArguments - -if TRL_USE_RICH: - init_zero_verbose() - FORMAT = "%(message)s" - - from rich.console import Console - from rich.logging import RichHandler - -import torch -from accelerate import Accelerator -from datasets import load_dataset - -from tqdm.rich import tqdm -from transformers import AutoTokenizer, AutoProcessor, Idefics2ForConditionalGeneration - -from trl import ( - ModelConfig, - RichProgressCallback, - SFTTrainer, - SFTConfig, - get_peft_config, - get_quantization_config, - get_kbit_device_map, -) - -tqdm.pandas() - -if TRL_USE_RICH: - logging.basicConfig(format=FORMAT, datefmt="[%X]", handlers=[RichHandler()], level=logging.INFO) - - -if __name__ == "__main__": - parser = TrlParser((SFTScriptArguments, SFTConfig, ModelConfig)) - args, training_args, model_config = parser.parse_args_and_config() - training_args.gradient_checkpointing_kwargs = dict(use_reentrant=True) - # Force use our print callback - if TRL_USE_RICH: - training_args.disable_tqdm = True - console = Console() - - ################ - # Model, Tokenizer & Processor - ################ - # IDEFICS2_CHAT_TEMPLATE = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}""" - - torch_dtype = ( - model_config.torch_dtype - if model_config.torch_dtype in ["auto", None] - else getattr(torch, model_config.torch_dtype) - ) - quantization_config = get_quantization_config(model_config) - model_kwargs = dict( - revision=model_config.model_revision, - trust_remote_code=model_config.trust_remote_code, - attn_implementation=model_config.attn_implementation, - torch_dtype=torch_dtype, - use_cache=False, - device_map=get_kbit_device_map() if quantization_config is not None else None, - quantization_config=quantization_config, - ) - # tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path, use_fast=True) - # tokenizer.chat_template = IDEFICS2_CHAT_TEMPLATE - processor = AutoProcessor.from_pretrained(model_config.model_name_or_path, do_image_splitting=False) - # processor.tokenizer = tokenizer - - # model = Idefics2ForConditionalGeneration.from_pretrained(model_config.model_name_or_path, **model_kwargs) - - ###### - from transformers import AutoProcessor, BitsAndBytesConfig, Idefics2ForConditionalGeneration - from peft import LoraConfig - - bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16) - model = Idefics2ForConditionalGeneration.from_pretrained( - "HuggingFaceM4/idefics2-8b", torch_dtype=torch.float16, quantization_config=bnb_config - ) - lora_config = LoraConfig( - r=8, - lora_alpha=8, - lora_dropout=0.1, - target_modules=".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$", - use_dora=False, - init_lora_weights="gaussian", - ) - model.add_adapter(lora_config) - model.enable_adapters() - ###### - - ################ - # Create a data collator to encode text and image pairs - ################ - - class Idefics2DataCollator: - def __init__(self, processor): - self.processor = processor - - def __call__(self, examples): - texts = [] - images = [] - for example in examples: - if len(example["images"]) > 1: - raise ValueError("This collator only supports one image per example") - messages = example["messages"] - text = self.processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=False) - texts.append(text) - images.append([example["images"][0]]) - - batch = self.processor(texts, images, return_tensors="pt", padding=True) - - labels = batch["input_ids"].clone() - if self.processor.tokenizer.pad_token_id is not None: - labels[labels == self.processor.tokenizer.pad_token_id] = -100 - batch["labels"] = labels - - return batch - - data_collator = Idefics2DataCollator(processor) - - ################ - # Dataset - ################ - raw_datasets = load_dataset(args.dataset_name) - train_dataset = raw_datasets["train"] - eval_dataset = raw_datasets["test"] - - ################ - # Optional rich context managers - ############### - init_context = nullcontext() if not TRL_USE_RICH else console.status("[bold green]Initializing the SFTTrainer...") - save_context = ( - nullcontext() - if not TRL_USE_RICH - else console.status(f"[bold green]Training completed! Saving the model to {training_args.output_dir}") - ) - - ################ - # Training - ################ - with init_context: - trainer = SFTTrainer( - model=model, - args=training_args, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - dataset_text_field="text", # need a dummy field - tokenizer=processor.tokenizer, - # peft_config=get_peft_config(model_config), - callbacks=[RichProgressCallback] if TRL_USE_RICH else None, - data_collator=data_collator, - dataset_kwargs={"skip_prepare_dataset": True}, - ) - - trainer.train() - - with save_context: - trainer.save_model(training_args.output_dir) - trainer.push_to_hub() - if Accelerator().is_main_process: - processor.push_to_hub(training_args.hub_model_id) From 2303c40364feb41f14845ba5885d3b7338b6a0ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 18 Jun 2024 09:19:51 +0000 Subject: [PATCH 18/43] get image --- trl/trainer/dpo_trainer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 5af7703d69e..05611ac66ce 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -755,8 +755,7 @@ def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module prompt = feature["prompt"] chosen = feature["chosen"] rejected = feature["rejected"] - if self.is_vision_model: - images = feature["images"] + images = feature.get("images") if not self.is_encoder_decoder: # Check issues below for more details From b606190082f9ac8e7fe3973468707d5cd8fae742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 18 Jun 2024 09:25:11 +0000 Subject: [PATCH 19/43] upcast --- trl/trainer/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py index c2a25b0fb1f..ef68ee57a60 100644 --- a/trl/trainer/utils.py +++ b/trl/trainer/utils.py @@ -412,7 +412,7 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]: # Set the dtype if k.endswith("_pixel_values"): - dtype = torch.bfloat16 # TODO: tmp fix + dtype = torch.float32 # will be downcasted if necessary by the Trainer else: dtype = torch.int64 From 4f78ee57fc67062185f15e36240b568c4d94a49f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 18 Jun 2024 17:07:59 +0000 Subject: [PATCH 20/43] new test --- tests/my_new_test.py | 107 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 tests/my_new_test.py diff --git a/tests/my_new_test.py b/tests/my_new_test.py new file mode 100644 index 00000000000..24c31267d83 --- /dev/null +++ b/tests/my_new_test.py @@ -0,0 +1,107 @@ +import torch +from datasets import Dataset +from transformers import AutoModelForVision2Seq, AutoProcessor +from PIL import Image +from trl import DPOConfig, DPOTrainer +import datasets + + +# Get the model +model_id = "trl-internal-testing/tiny-random-idefics2" +model = AutoModelForVision2Seq.from_pretrained(model_id) +ref_model = AutoModelForVision2Seq.from_pretrained(model_id) +processor = AutoProcessor.from_pretrained(model_id) + +# Get the training args +training_args = DPOConfig( + output_dir=".", + per_device_train_batch_size=2, + max_steps=3, + remove_unused_columns=False, + gradient_accumulation_steps=1, + learning_rate=9e-1, + evaluation_strategy="steps", + beta=0.1, + loss_type="sigmoid", + precompute_ref_log_probs=True, +) + +dummy_dataset_dict = { + "images": [ + [Image.new("RGB", (100, 100), color="black")], + [Image.new("RGB", (133, 100), color="red")], + [Image.new("RGB", (100, 133), color="green")], + [Image.new("RGB", (133, 133), color="blue")], + [Image.new("RGB", (200, 50), color="yellow")], + [Image.new("RGB", (50, 200), color="magenta")], + [Image.new("RGB", (200, 200), color="cyan")], + # [Image.new("RGB", (50, 50), color="white")], + # [Image.new("RGB", (100, 100), color="orange")], + ], + "prompt": [ + " hello", + " how are you", + " What is your name?", + " What is your name?", + " Which is the best programming language?", + " Which is the best programming language?", + " Which is the best programming language?", + # "[INST] How is the stock price? [/INST]", + # "[INST] How is the stock price? [/INST] ", + ], + "chosen": [ + "hi nice to meet you", + "I am fine", + "My name is Mary", + "My name is Mary", + "Python", + "Python", + "Python", + # "$46 as of 10am EST", + # "46 as of 10am EST", + ], + "rejected": [ + "leave me alone", + "I am not fine", + "Whats it to you?", + "I dont have a name", + "Javascript", + "C++", + "Java", + # " $46 as of 10am EST", + # " 46 as of 10am EST", + ], +} + +features = datasets.Features( + { + "images": datasets.Sequence(datasets.Image(decode=True)), # datasets still handles badly sequence of images + "prompt": datasets.Value("string"), + "chosen": datasets.Value("string"), + "rejected": datasets.Value("string"), + } +) +dataset = Dataset.from_dict(dummy_dataset_dict, features=features) + + +trainer = DPOTrainer( + model=model, + ref_model=ref_model, + args=training_args, + tokenizer=processor, + train_dataset=dataset, + eval_dataset=dataset, +) + +previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} + +trainer.train() + +assert trainer.state.log_history[-1]["train_loss"] is not None + +# check the params have changed +for n, param in previous_trainable_params.items(): + new_param = trainer.model.get_parameter(n) + # check the params have changed - ignore 0 biases + if param.sum() != 0: + assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) From c4433c0fdda5485fd0dfcd854eb2bdbfdc228184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 18 Jun 2024 17:09:41 +0000 Subject: [PATCH 21/43] modified test --- tests/test_dpo_trainer.py | 113 +++++++++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 2 deletions(-) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index a2aac261f51..f00e2cf2a98 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -18,8 +18,8 @@ from datasets import Dataset from parameterized import parameterized from pytest import mark -from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer - +from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForVision2Seq, AutoProcessor +from PIL import Image from trl import DPOConfig, DPOTrainer from .testing_utils import require_bitsandbytes, require_no_wandb, require_peft @@ -40,6 +40,12 @@ def setUpClass(cls): cls.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) cls.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) + # get idefics2 model + model_id = "trl-internal-testing/tiny-random-idefics2" + cls.idefics2_model = AutoModelForVision2Seq.from_pretrained(model_id) + cls.idefics2_ref_model = AutoModelForVision2Seq.from_pretrained(model_id) + cls.idefics2_processor = AutoProcessor.from_pretrained(model_id) + def _init_dummy_dataset(self): # fmt: off dummy_dataset_dict = { @@ -80,6 +86,57 @@ def _init_dummy_dataset(self): # fmt: on return Dataset.from_dict(dummy_dataset_dict) + def _init_dummy_image_dataset(self): + # fmt: off + dummy_dataset_dict = { + "images": [ + [Image.new("RGB", (100, 100), color="black")], + [Image.new("RGB", (133, 100), color="red")], + [Image.new("RGB", (100, 133), color="green")], + [Image.new("RGB", (133, 133), color="blue")], + [Image.new("RGB", (200, 50), color="yellow")], + [Image.new("RGB", (50, 200), color="magenta")], + [Image.new("RGB", (200, 200), color="cyan")], + [Image.new("RGB", (50, 50), color="white")], + [Image.new("RGB", (100, 100), color="orange")], + ], + "prompt": [ + "hello", + "how are you", + "What is your name?", + "What is your name?", + "Which is the best programming language?", + "Which is the best programming language?", + "Which is the best programming language?", + "[INST] How is the stock price? [/INST]", + "[INST] How is the stock price? [/INST] ", + ], + "chosen": [ + "hi nice to meet you", + "I am fine", + "My name is Mary", + "My name is Mary", + "Python", + "Python", + "Python", + "$46 as of 10am EST", + "46 as of 10am EST", + ], + "rejected": [ + "leave me alone", + "I am not fine", + "Whats it to you?", + "I dont have a name", + "Javascript", + "C++", + "Java", + " $46 as of 10am EST", + " 46 as of 10am EST", + ], + } + # fmt: on + return Dataset.from_dict(dummy_dataset_dict) + @parameterized.expand( [ ["gpt2", "sigmoid", True], @@ -147,6 +204,58 @@ def test_dpo_trainer(self, name, loss_type, pre_compute): if param.sum() != 0: assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) + + @parameterized.expand( + [ + ["sigmoid", True], + ] + ) + def test_vdpo_trainer(self, loss_type, pre_compute): + with tempfile.TemporaryDirectory() as tmp_dir: + training_args = DPOConfig( + output_dir=tmp_dir, + per_device_train_batch_size=2, + max_steps=3, + remove_unused_columns=False, + gradient_accumulation_steps=1, + learning_rate=9e-1, + evaluation_strategy="steps", + beta=0.1, + loss_type=loss_type, + precompute_ref_log_probs=pre_compute, + ) + + dummy_dataset = self._init_dummy_image_dataset() + + model = self.idefics2_model + ref_model = self.idefics2_ref_model + processor = self.idefics2_processor + + processor.pad_token_id = processor.tokenizer.pad_token_id + + trainer = DPOTrainer( + model=model, + ref_model=ref_model, + args=training_args, + tokenizer=processor, + train_dataset=dummy_dataset, + eval_dataset=dummy_dataset, + ) + + previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} + + trainer.train() + + assert trainer.state.log_history[-1]["train_loss"] is not None + + # check the params have changed + for n, param in previous_trainable_params.items(): + new_param = trainer.model.get_parameter(n) + # check the params have changed - ignore 0 biases + if param.sum() != 0: + assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) + + def test_dpo_trainer_without_providing_ref_model(self): with tempfile.TemporaryDirectory() as tmp_dir: training_args = DPOConfig( From 7a8a94fafe1adfa4a34637cd293e8ee1c82b487b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 18 Jun 2024 17:12:14 +0000 Subject: [PATCH 22/43] new strat for tokenizer --- trl/trainer/dpo_trainer.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 05611ac66ce..ba1c002e63a 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -321,6 +321,12 @@ def make_inputs_require_grad(module, input, output): "No model provided, cannot determine if it is a vision model. Setting is_vision_model to False." ) self.is_vision_model = False + + if self.is_vision_model: + self.processor = tokenizer + self.tokenizer = tokenizer.tokenizer # tokenizer is actually a processor at this point + else: + self.tokenizer = tokenizer self.is_peft_model = is_peft_available() and isinstance(model, PeftModel) if model_adapter_name is not None: @@ -407,7 +413,7 @@ def make_inputs_require_grad(module, input, output): args.label_pad_token_id = label_pad_token_id if data_collator is None: data_collator = DPODataCollatorWithPadding( - pad_token_id=tokenizer.pad_token_id, + pad_token_id=self.tokenizer.pad_token_id, label_pad_token_id=args.label_pad_token_id, is_encoder_decoder=self.is_encoder_decoder, ) @@ -443,7 +449,7 @@ def make_inputs_require_grad(module, input, output): "You passed `padding_value` to the DPOTrainer, the value you passed will override the one in the `DPOConfig`." ) args.padding_value = padding_value - self.padding_value = args.padding_value if padding_value is not None else tokenizer.pad_token_id + self.padding_value = args.padding_value if padding_value is not None else self.tokenizer.pad_token_id self.max_prompt_length = args.max_prompt_length if truncation_mode != "keep_end": warnings.warn( @@ -452,7 +458,6 @@ def make_inputs_require_grad(module, input, output): args.truncation_mode = truncation_mode self.truncation_mode = args.truncation_mode self.max_target_length = args.max_target_length - self.tokenizer = tokenizer self.precompute_ref_log_probs = args.precompute_ref_log_probs # Since ref_logs are precomputed on the first call to get_train/eval_dataloader @@ -683,9 +688,9 @@ def build_tokenized_answer(self, prompt, answer, images=None): if self.is_vision_model: if answer.count("") > 0: raise NotImplementedError("Answer contains token, which is not supported yet.") - full_tokenized = self.tokenizer(prompt + answer, images=images, add_special_tokens=False) + full_tokenized = self.processor(prompt + answer, images=images, add_special_tokens=False) full_tokenized = {k: v[0] for k, v in full_tokenized.items()} # Unbatch, not done when using idefics - prompt_input_ids = self.tokenizer(prompt, images=images, add_special_tokens=False)["input_ids"][0] + prompt_input_ids = self.processor(prompt, images=images, add_special_tokens=False)["input_ids"][0] else: full_tokenized = self.tokenizer(prompt + answer, add_special_tokens=False) prompt_input_ids = self.tokenizer(prompt, add_special_tokens=False)["input_ids"] @@ -766,7 +771,7 @@ def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module if not isinstance(prompt, str): raise ValueError(f"prompt should be an str but got {type(prompt)}") if self.is_vision_model: - prompt_tokens = self.tokenizer(prompt, images=images, add_special_tokens=False) + prompt_tokens = self.processor(prompt, images=images, add_special_tokens=False) prompt_tokens = {k: v[0] for k, v in prompt_tokens.items()} # Unbatch, not done when using idefics else: prompt_tokens = self.tokenizer(prompt, add_special_tokens=False) From 995571058b1eaf1592ef1bc74c0890756979b785 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 10:27:11 +0000 Subject: [PATCH 23/43] rm token transfer --- examples/scripts/vdpo.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py index 7f752ab4dde..2d3fa4d3eba 100644 --- a/examples/scripts/vdpo.py +++ b/examples/scripts/vdpo.py @@ -114,11 +114,6 @@ name for name, buffer in model.named_buffers() if buffer.dtype == torch.bool ] - # DPOTrainer needs the processor to have these attributes - processor.pad_token_id = tokenizer.pad_token_id - processor.bos_token_id = tokenizer.bos_token_id - processor.eos_token_id = tokenizer.eos_token_id - ################ # Optional rich context managers ############### @@ -169,6 +164,6 @@ def process(row): ) trainer.train() - + trainer.push_to_hub with save_context: trainer.save_model(training_args.output_dir) From f6ee370f1016bb6118cc8bfca8a8982ca6fa98ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 10:52:45 +0000 Subject: [PATCH 24/43] integrate vision in dpo example --- examples/scripts/dpo.py | 55 +++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/examples/scripts/dpo.py b/examples/scripts/dpo.py index 56a11d9dc3e..1094de950b1 100644 --- a/examples/scripts/dpo.py +++ b/examples/scripts/dpo.py @@ -48,6 +48,19 @@ --use_peft \ --lora_r=16 \ --lora_alpha=16 + +# vision with peft: +accelerate launch examples/scripts/dpo.py \ + --dataset_name HuggingFaceH4/rlaif-v_formatted \ + --model_name_or_path HuggingFaceM4/idefics2-8b \ + --output_dir dpo_idefics_rlaif-v \ + --per_device_train_batch_size 1 \ + --gradient_accumulation_steps 16 \ + --learning_rate 1e-5 \ + --bf16 \ + --torch_dtype bfloat16 \ + --use_peft \ + --lora_target_modules=all-linear """ import logging @@ -58,6 +71,7 @@ TRL_USE_RICH = os.environ.get("TRL_USE_RICH", False) from trl.commands.cli_utils import DPOScriptArguments, init_zero_verbose, TrlParser +from accelerate import PartialState if TRL_USE_RICH: init_zero_verbose() @@ -68,7 +82,7 @@ import torch from datasets import load_dataset -from transformers import AutoModelForCausalLM, AutoTokenizer +from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForVision2Seq, AutoProcessor from trl import ( DPOConfig, @@ -112,13 +126,25 @@ device_map=get_kbit_device_map() if quantization_config is not None else None, quantization_config=quantization_config, ) - model = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs) + is_vision_model = model_config.model_name_or_path in ['HuggingFaceM4/idefics2-8b'] + if is_vision_model: + model = AutoModelForVision2Seq.from_pretrained(model_config.model_name_or_path, **model_kwargs) + else: + model = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs) peft_config = get_peft_config(model_config) if peft_config is None: - model_ref = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs) + if is_vision_model: + model_ref = AutoModelForVision2Seq.from_pretrained(model_config.model_name_or_path, **model_kwargs) + else: + model_ref = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs) else: model_ref = None - tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path) + if is_vision_model: + processor = AutoProcessor.from_pretrained(model_config.model_name_or_path, do_image_splitting=True) + tokenizer = processor.tokenizer + else: + tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path) + processor = AutoProcessor.from_pretrained(model_config.model_name_or_path, do_image_splitting=False) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token if tokenizer.chat_template is None: @@ -148,16 +174,19 @@ ds[key] = ds[key].select(range(50)) def process(row): - row["prompt"] = tokenizer.apply_chat_template(row["chosen"][:-1], tokenize=False) - row["chosen"] = tokenizer.apply_chat_template([row["chosen"][-1]], tokenize=False) - row["rejected"] = tokenizer.apply_chat_template([row["rejected"][-1]], tokenize=False) + row["prompt"] = tokenizer.apply_chat_template(row["prompt"], tokenize=False) + row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False) + row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False) + if "images" in row: + for idx, img in enumerate(row["images"]): # Resize image so that the largest side is 640 + ratio = min(1.0, 640 / max(img.size)) + new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio)) + row["images"][idx] = img.resize(new_size) + row["images"] = row["images"] return row - ds = ds.map( - process, - num_proc=multiprocessing.cpu_count(), - load_from_cache_file=False, - ) + with PartialState().local_main_process_first(): + ds = ds.map(process, num_proc=multiprocessing.cpu_count()) train_dataset = ds[args.dataset_train_split] eval_dataset = ds[args.dataset_test_split] @@ -171,7 +200,7 @@ def process(row): args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, - tokenizer=tokenizer, + tokenizer=processor if is_vision_model else tokenizer, peft_config=get_peft_config(model_config), callbacks=[RichProgressCallback] if TRL_USE_RICH else None, ) From 56fb036e4900505f38264d2630a1b4128e4c8a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 10:53:35 +0000 Subject: [PATCH 25/43] format --- examples/scripts/dpo.py | 2 +- tests/test_dpo_trainer.py | 15 ++++++++++----- trl/trainer/dpo_trainer.py | 4 ++-- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/examples/scripts/dpo.py b/examples/scripts/dpo.py index 1094de950b1..b845187e295 100644 --- a/examples/scripts/dpo.py +++ b/examples/scripts/dpo.py @@ -126,7 +126,7 @@ device_map=get_kbit_device_map() if quantization_config is not None else None, quantization_config=quantization_config, ) - is_vision_model = model_config.model_name_or_path in ['HuggingFaceM4/idefics2-8b'] + is_vision_model = model_config.model_name_or_path in ["HuggingFaceM4/idefics2-8b"] if is_vision_model: model = AutoModelForVision2Seq.from_pretrained(model_config.model_name_or_path, **model_kwargs) else: diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index 5c1ded8875e..b151c4fa8b8 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -17,10 +17,17 @@ import torch from datasets import Dataset from parameterized import parameterized -from pytest import mark -from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForVision2Seq, AutoProcessor from PIL import Image -from trl import DPOConfig, DPOTrainer, FDivergenceType +from pytest import mark +from transformers import ( + AutoModelForCausalLM, + AutoModelForSeq2SeqLM, + AutoModelForVision2Seq, + AutoProcessor, + AutoTokenizer, +) + +from trl import DPOConfig, DPOTrainer from .testing_utils import require_bitsandbytes, require_no_wandb, require_peft @@ -207,7 +214,6 @@ def test_dpo_trainer(self, name, loss_type, pre_compute): if param.sum() != 0: assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) - @parameterized.expand( [ ["sigmoid", True], @@ -258,7 +264,6 @@ def test_vdpo_trainer(self, loss_type, pre_compute): if param.sum() != 0: assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) - def test_dpo_trainer_without_providing_ref_model(self): with tempfile.TemporaryDirectory() as tmp_dir: training_args = DPOConfig( diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 147bb965963..d2fa213db0f 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -323,10 +323,10 @@ def make_inputs_require_grad(module, input, output): "No model provided, cannot determine if it is a vision model. Setting is_vision_model to False." ) self.is_vision_model = False - + if self.is_vision_model: self.processor = tokenizer - self.tokenizer = tokenizer.tokenizer # tokenizer is actually a processor at this point + self.tokenizer = tokenizer.tokenizer # tokenizer is actually a processor at this point else: self.tokenizer = tokenizer From c3249e52460de9930e9b3b5391786c1e6b24f0af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 12:38:47 +0000 Subject: [PATCH 26/43] add FDivergenceType back --- tests/test_dpo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index b151c4fa8b8..aefc6b5ba30 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -27,7 +27,7 @@ AutoTokenizer, ) -from trl import DPOConfig, DPOTrainer +from trl import DPOConfig, DPOTrainer, FDivergenceType from .testing_utils import require_bitsandbytes, require_no_wandb, require_peft From f69bb1c37ebeea825ce20c5d1976de9937eaf185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 12:43:00 +0000 Subject: [PATCH 27/43] precommit --- tests/my_new_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/my_new_test.py b/tests/my_new_test.py index 24c31267d83..14eef6fe2c2 100644 --- a/tests/my_new_test.py +++ b/tests/my_new_test.py @@ -1,9 +1,10 @@ +import datasets import torch from datasets import Dataset -from transformers import AutoModelForVision2Seq, AutoProcessor from PIL import Image +from transformers import AutoModelForVision2Seq, AutoProcessor + from trl import DPOConfig, DPOTrainer -import datasets # Get the model From 6d859cf997d87cd4b3ed076bdf919695c04e42a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 12:56:31 +0000 Subject: [PATCH 28/43] pillow test dep --- setup.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 65ae9293d57..7180babb93d 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,16 @@ "tyro>=0.5.11", ] EXTRAS = { - "test": ["parameterized", "pytest", "pytest-xdist", "accelerate", "pytest-cov", "pytest-xdist", "scikit-learn"], + "test": [ + "parameterized", + "pytest", + "pytest-xdist", + "accelerate", + "pytest-cov", + "pytest-xdist", + "scikit-learn", + "Pillow", + ], "peft": ["peft>=0.4.0"], "diffusers": ["diffusers>=0.18.0"], "deepspeed": ["deepspeed>=0.9.5"], From 48db3e17a19a47b9397159600ec6ea644998c16d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 13:04:38 +0000 Subject: [PATCH 29/43] optional prompt --- examples/scripts/dpo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/scripts/dpo.py b/examples/scripts/dpo.py index b845187e295..27ebb8a4df1 100644 --- a/examples/scripts/dpo.py +++ b/examples/scripts/dpo.py @@ -174,7 +174,8 @@ ds[key] = ds[key].select(range(50)) def process(row): - row["prompt"] = tokenizer.apply_chat_template(row["prompt"], tokenize=False) + if "prompt" in row: + row["prompt"] = tokenizer.apply_chat_template(row["prompt"], tokenize=False) row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False) row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False) if "images" in row: From dea765b6e012d602dcea252484a9e98ff2704ce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 13:41:41 +0000 Subject: [PATCH 30/43] `evaluation_strategy` to `eval_strategy` --- tests/my_new_test.py | 2 +- tests/test_dpo_trainer.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/my_new_test.py b/tests/my_new_test.py index 14eef6fe2c2..5a6ae35d354 100644 --- a/tests/my_new_test.py +++ b/tests/my_new_test.py @@ -21,7 +21,7 @@ remove_unused_columns=False, gradient_accumulation_steps=1, learning_rate=9e-1, - evaluation_strategy="steps", + eval_strategy="steps", beta=0.1, loss_type="sigmoid", precompute_ref_log_probs=True, diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index aefc6b5ba30..c531d5685ea 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -228,7 +228,7 @@ def test_vdpo_trainer(self, loss_type, pre_compute): remove_unused_columns=False, gradient_accumulation_steps=1, learning_rate=9e-1, - evaluation_strategy="steps", + eval_strategy="steps", beta=0.1, loss_type=loss_type, precompute_ref_log_probs=pre_compute, @@ -855,7 +855,7 @@ def test_dpo_loss_alpha_div_f(self): remove_unused_columns=False, gradient_accumulation_steps=4, learning_rate=9e-1, - evaluation_strategy="steps", + eval_strategy="steps", f_divergence_type=FDivergenceType.ALPHA_DIVERGENCE.value, f_alpha_divergence_coef=0.5, ) @@ -897,7 +897,7 @@ def test_dpo_loss_js_div_f(self): remove_unused_columns=False, gradient_accumulation_steps=4, learning_rate=9e-1, - evaluation_strategy="steps", + eval_strategy="steps", f_divergence_type=FDivergenceType.JS_DIVERGENCE.value, f_alpha_divergence_coef=0.5, ) From d6dc3ba7e91f97f7a5e63c2293bf173e5efd9ce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 13:43:58 +0000 Subject: [PATCH 31/43] revert vsft change (oos) --- examples/scripts/vsft_llava.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/scripts/vsft_llava.py b/examples/scripts/vsft_llava.py index 59753927faf..85cb98d5f3c 100644 --- a/examples/scripts/vsft_llava.py +++ b/examples/scripts/vsft_llava.py @@ -18,9 +18,9 @@ --dataset_name="HuggingFaceH4/llava-instruct-mix-vsft" \ --model_name_or_path="llava-hf/llava-1.5-7b-hf" \ --report_to="wandb" \ - --learning_rate=1.4e-7 \ - --per_device_train_batch_size=2 \ - --gradient_accumulation_steps=32 \ + --learning_rate=1.4e-5 \ + --per_device_train_batch_size=8 \ + --gradient_accumulation_steps=1 \ --output_dir="data/vsft-llava-1.5-7b-hf" \ --logging_steps=5 \ --num_train_epochs=1 \ @@ -28,15 +28,16 @@ --gradient_checkpointing \ --remove_unused_columns=False \ --torch_dtype=float16 \ + --fp16=True # peft: python examples/scripts/vsft_llava.py \ - --dataset_name="HuggingFaceH4/llava-instruct-mix" \ + --dataset_name="HuggingFaceH4/llava-instruct-mix-vsft" \ --model_name_or_path="llava-hf/llava-1.5-7b-hf" \ --report_to="wandb" \ --learning_rate=1.4e-5 \ --per_device_train_batch_size=8 \ - --gradient_accumulation_steps=128 \ + --gradient_accumulation_steps=1 \ --output_dir="data/vsft-llava-1.5-7b-hf" \ --logging_steps=5 \ --num_train_epochs=1 \ @@ -44,10 +45,11 @@ --gradient_checkpointing \ --remove_unused_columns=False \ --torch_dtype=float16 \ + --fp16=True \ --use_peft=True \ --lora_r=64 \ --lora_alpha=16 \ - --lora_target_modules=all-linear + --lora_target_modules=all-linear" # evaluation: From 3a1f5b8e1fd07e4882958df766be24222346d3c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 14:59:05 +0000 Subject: [PATCH 32/43] update test --- tests/test_dpo_trainer.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index c531d5685ea..ff742eb5832 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -15,7 +15,7 @@ import unittest import torch -from datasets import Dataset +from datasets import Dataset, features from parameterized import parameterized from PIL import Image from pytest import mark @@ -142,7 +142,15 @@ def _init_dummy_image_dataset(self): ], } # fmt: on - return Dataset.from_dict(dummy_dataset_dict) + f = features.Features( + { + "images": features.Sequence(features.Image(decode=True)), # datasets handles badly sequence of images + "prompt": features.Value("string"), + "chosen": features.Value("string"), + "rejected": features.Value("string"), + } + ) + return Dataset.from_dict(dummy_dataset_dict, features=f) @parameterized.expand( [ From 5545825bc9f619f2c5719c167182fd317a0b593a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Tue, 25 Jun 2024 21:53:53 +0000 Subject: [PATCH 33/43] test --- tests/my_new_test.py | 108 ------------------------------------- tests/test_dpo_trainer.py | 70 +++++++++++------------- trl/trainer/dpo_trainer.py | 3 +- 3 files changed, 33 insertions(+), 148 deletions(-) delete mode 100644 tests/my_new_test.py diff --git a/tests/my_new_test.py b/tests/my_new_test.py deleted file mode 100644 index 5a6ae35d354..00000000000 --- a/tests/my_new_test.py +++ /dev/null @@ -1,108 +0,0 @@ -import datasets -import torch -from datasets import Dataset -from PIL import Image -from transformers import AutoModelForVision2Seq, AutoProcessor - -from trl import DPOConfig, DPOTrainer - - -# Get the model -model_id = "trl-internal-testing/tiny-random-idefics2" -model = AutoModelForVision2Seq.from_pretrained(model_id) -ref_model = AutoModelForVision2Seq.from_pretrained(model_id) -processor = AutoProcessor.from_pretrained(model_id) - -# Get the training args -training_args = DPOConfig( - output_dir=".", - per_device_train_batch_size=2, - max_steps=3, - remove_unused_columns=False, - gradient_accumulation_steps=1, - learning_rate=9e-1, - eval_strategy="steps", - beta=0.1, - loss_type="sigmoid", - precompute_ref_log_probs=True, -) - -dummy_dataset_dict = { - "images": [ - [Image.new("RGB", (100, 100), color="black")], - [Image.new("RGB", (133, 100), color="red")], - [Image.new("RGB", (100, 133), color="green")], - [Image.new("RGB", (133, 133), color="blue")], - [Image.new("RGB", (200, 50), color="yellow")], - [Image.new("RGB", (50, 200), color="magenta")], - [Image.new("RGB", (200, 200), color="cyan")], - # [Image.new("RGB", (50, 50), color="white")], - # [Image.new("RGB", (100, 100), color="orange")], - ], - "prompt": [ - " hello", - " how are you", - " What is your name?", - " What is your name?", - " Which is the best programming language?", - " Which is the best programming language?", - " Which is the best programming language?", - # "[INST] How is the stock price? [/INST]", - # "[INST] How is the stock price? [/INST] ", - ], - "chosen": [ - "hi nice to meet you", - "I am fine", - "My name is Mary", - "My name is Mary", - "Python", - "Python", - "Python", - # "$46 as of 10am EST", - # "46 as of 10am EST", - ], - "rejected": [ - "leave me alone", - "I am not fine", - "Whats it to you?", - "I dont have a name", - "Javascript", - "C++", - "Java", - # " $46 as of 10am EST", - # " 46 as of 10am EST", - ], -} - -features = datasets.Features( - { - "images": datasets.Sequence(datasets.Image(decode=True)), # datasets still handles badly sequence of images - "prompt": datasets.Value("string"), - "chosen": datasets.Value("string"), - "rejected": datasets.Value("string"), - } -) -dataset = Dataset.from_dict(dummy_dataset_dict, features=features) - - -trainer = DPOTrainer( - model=model, - ref_model=ref_model, - args=training_args, - tokenizer=processor, - train_dataset=dataset, - eval_dataset=dataset, -) - -previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()} - -trainer.train() - -assert trainer.state.log_history[-1]["train_loss"] is not None - -# check the params have changed -for n, param in previous_trainable_params.items(): - new_param = trainer.model.get_parameter(n) - # check the params have changed - ignore 0 biases - if param.sum() != 0: - assert not torch.allclose(param, new_param, rtol=1e-12, atol=1e-12) diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py index ff742eb5832..12ede448c0d 100644 --- a/tests/test_dpo_trainer.py +++ b/tests/test_dpo_trainer.py @@ -97,48 +97,40 @@ def _init_dummy_image_dataset(self): # fmt: off dummy_dataset_dict = { "images": [ - [Image.new("RGB", (100, 100), color="black")], - [Image.new("RGB", (133, 100), color="red")], - [Image.new("RGB", (100, 133), color="green")], - [Image.new("RGB", (133, 133), color="blue")], - [Image.new("RGB", (200, 50), color="yellow")], - [Image.new("RGB", (50, 200), color="magenta")], - [Image.new("RGB", (200, 200), color="cyan")], - [Image.new("RGB", (50, 50), color="white")], - [Image.new("RGB", (100, 100), color="orange")], + [Image.new("RGB", (100, 50), color="black")], + # None, + # [Image.new("RGB", (100, 100), color="blue"), Image.new("RGB", (150, 50), color="red")], + [Image.new("RGB", (200, 100), color="green")], + # [Image.new("RGB", (150, 150), color="yellow"), Image.new("RGB", (50, 150), color="purple")], + [Image.new("RGB", (80, 120), color="gray")], + [Image.new("RGB", (120, 80), color="pink")], ], "prompt": [ - "hello", - "how are you", - "What is your name?", - "What is your name?", - "Which is the best programming language?", - "Which is the best programming language?", - "Which is the best programming language?", - "[INST] How is the stock price? [/INST]", - "[INST] How is the stock price? [/INST] ", + " Hello", + # "How are you?", + # " Let's chat", + " Good morning", + # " What's up?", + "Can you see this? ", + "Here is something interesting: ", ], "chosen": [ - "hi nice to meet you", - "I am fine", - "My name is Mary", - "My name is Mary", - "Python", - "Python", - "Python", - "$46 as of 10am EST", - "46 as of 10am EST", + "Hi nice to meet you!", + # "I'm doing well, thank you!", + # "Sure, let's talk!", + "Good morning to you too!", + # "Not much, just working.", + "Yes, I can see it clearly.", + "That's quite interesting indeed.", ], "rejected": [ - "leave me alone", - "I am not fine", - "Whats it to you?", - "I dont have a name", - "Javascript", - "C++", - "Java", - " $46 as of 10am EST", - " 46 as of 10am EST", + "Leave me alone!", + # "I'm not interested.", + # "I don't want to chat.", + "I'm still sleepy.", + # "Busy right now, talk later.", + "No, I can't see it.", + "I'm not sure what that is.", ], } # fmt: on @@ -248,8 +240,6 @@ def test_vdpo_trainer(self, loss_type, pre_compute): ref_model = self.idefics2_ref_model processor = self.idefics2_processor - processor.pad_token_id = processor.tokenizer.pad_token_id - trainer = DPOTrainer( model=model, ref_model=ref_model, @@ -931,3 +921,7 @@ def test_dpo_loss_js_div_f(self): policy_chosen_logps, policy_rejected_logps, reference_chosen_logps, reference_rejected_logps ) assert torch.isfinite(losses).cpu().numpy().all() + + +if __name__ == "__main__": + unittest.main() diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index d2fa213db0f..26161773dcf 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -746,7 +746,7 @@ def build_tokenized_answer(self, prompt, answer, images=None): answer_input_ids = full_tokenized["input_ids"][response_token_ids_start_idx:] answer_attention_mask = full_tokenized["attention_mask"][response_token_ids_start_idx:] - if self.is_vision_model: + if "pixel_values" in full_tokenized: return dict( prompt_input_ids=prompt_input_ids, prompt_attention_mask=prompt_attention_mask, @@ -761,7 +761,6 @@ def build_tokenized_answer(self, prompt, answer, images=None): prompt_attention_mask=prompt_attention_mask, input_ids=answer_input_ids, attention_mask=answer_attention_mask, - pixel_value=full_tokenized, ) def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module]] = None) -> Dict: From 5197d6debf923bfaf6222f6d2a3e8c9014c4d7df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 08:33:47 +0000 Subject: [PATCH 34/43] comment and support more in process --- examples/scripts/dpo.py | 19 +++++++++++++++++-- examples/scripts/vdpo.py | 33 +++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/examples/scripts/dpo.py b/examples/scripts/dpo.py index 27ebb8a4df1..aede4ea609c 100644 --- a/examples/scripts/dpo.py +++ b/examples/scripts/dpo.py @@ -174,16 +174,31 @@ ds[key] = ds[key].select(range(50)) def process(row): - if "prompt" in row: + # The prompt can be either a string or a list. In some datasets, the prompt is just a common string + # for both rejected and chosen (already included in chosen and rejected) and is not meant to be used + # separately. In other datasets, the prompt is intended to be used as a prefix for rejected and chosen, + # and in such cases, it is properly formatted as a list with keys "role" and "content". + # Example 1: + # row = {"prompt": "What does detox mean?", + # "chosen": [{"content": "What does detox mean?", "role": "user"}, {"content": "It means to get rid of the toxins.", "role": "assistant"}], + # "rejected": [{"content": "What does detox mean?", "role": "assistant"}, {"content": "I don't know.", "role": "user"}]} + # Example 2: + # row = {"prompt": [{"content": "What does detox mean?", "role": "user"}], + # "chosen": [{"content": "It means to get rid of the toxins.", "role": "assistant"}], + # "rejected": [{"content": "I don't know.", "role": "user"}]} + if "prompt" in row and isinstance(row["prompt"], list): row["prompt"] = tokenizer.apply_chat_template(row["prompt"], tokenize=False) + row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False) row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False) + if "images" in row: - for idx, img in enumerate(row["images"]): # Resize image so that the largest side is 640 + for idx, img in enumerate(row["images"]): # Resize each image so the largest side is 640 pixels ratio = min(1.0, 640 / max(img.size)) new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio)) row["images"][idx] = img.resize(new_size) row["images"] = row["images"] + return row with PartialState().local_main_process_first(): diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py index 2d3fa4d3eba..264fb727375 100644 --- a/examples/scripts/vdpo.py +++ b/examples/scripts/vdpo.py @@ -133,14 +133,31 @@ ds[key] = ds[key].select(range(50)) def process(row): - row["prompt"] = processor.apply_chat_template(row["prompt"], tokenize=False) - row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False) - row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False) - for idx, img in enumerate(row["images"]): # Resize image so that the largest side is 640 - ratio = min(1.0, 640 / max(img.size)) - new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio)) - row["images"][idx] = img.resize(new_size) - row["images"] = row["images"] + # The prompt can be either a string or a list. In some datasets, the prompt is just a common string + # for both rejected and chosen (already included in chosen and rejected) and is not meant to be used + # separately. In other datasets, the prompt is intended to be used as a prefix for rejected and chosen, + # and in such cases, it is properly formatted as a list with keys "role" and "content". + # Example 1: + # row = {"prompt": "What does detox mean?", + # "chosen": [{"content": "What does detox mean?", "role": "user"}, {"content": "It means to get rid of the toxins.", "role": "assistant"}], + # "rejected": [{"content": "What does detox mean?", "role": "assistant"}, {"content": "I don't know.", "role": "user"}]} + # Example 2: + # row = {"prompt": [{"content": "What does detox mean?", "role": "user"}], + # "chosen": [{"content": "It means to get rid of the toxins.", "role": "assistant"}], + # "rejected": [{"content": "I don't know.", "role": "user"}]} + if "prompt" in row and isinstance(row["prompt"], list): + row["prompt"] = tokenizer.apply_chat_template(row["prompt"], tokenize=False) + + row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False) + row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False) + + if "images" in row: + for idx, img in enumerate(row["images"]): # Resize each image so the largest side is 640 pixels + ratio = min(1.0, 640 / max(img.size)) + new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio)) + row["images"][idx] = img.resize(new_size) + row["images"] = row["images"] + return row with PartialState().local_main_process_first(): From 45fda7e4d5eef14a2fd39ec2eafcc6fbe8e4f756 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 10:03:07 +0000 Subject: [PATCH 35/43] update process --- examples/scripts/dpo.py | 15 ++++++++++----- examples/scripts/vdpo.py | 6 +++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/examples/scripts/dpo.py b/examples/scripts/dpo.py index aede4ea609c..8a91ee7d31d 100644 --- a/examples/scripts/dpo.py +++ b/examples/scripts/dpo.py @@ -181,16 +181,21 @@ def process(row): # Example 1: # row = {"prompt": "What does detox mean?", # "chosen": [{"content": "What does detox mean?", "role": "user"}, {"content": "It means to get rid of the toxins.", "role": "assistant"}], - # "rejected": [{"content": "What does detox mean?", "role": "assistant"}, {"content": "I don't know.", "role": "user"}]} + # "rejected": [{"content": "What does detox mean?", "role": "assistant"}, {"content": "I don't know.", "role": "assistant"}]} # Example 2: # row = {"prompt": [{"content": "What does detox mean?", "role": "user"}], # "chosen": [{"content": "It means to get rid of the toxins.", "role": "assistant"}], - # "rejected": [{"content": "I don't know.", "role": "user"}]} + # "rejected": [{"content": "I don't know.", "role": "assistant"}]} + if is_vision_model: + apply_chat_template = processor.apply_chat_template + else: + apply_chat_template = tokenizer.apply_chat_template + if "prompt" in row and isinstance(row["prompt"], list): - row["prompt"] = tokenizer.apply_chat_template(row["prompt"], tokenize=False) + row["prompt"] = apply_chat_template(row["prompt"], tokenize=False) - row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False) - row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False) + row["chosen"] = apply_chat_template(row["chosen"], tokenize=False) + row["rejected"] = apply_chat_template(row["rejected"], tokenize=False) if "images" in row: for idx, img in enumerate(row["images"]): # Resize each image so the largest side is 640 pixels diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py index 264fb727375..bb3547c5882 100644 --- a/examples/scripts/vdpo.py +++ b/examples/scripts/vdpo.py @@ -146,10 +146,10 @@ def process(row): # "chosen": [{"content": "It means to get rid of the toxins.", "role": "assistant"}], # "rejected": [{"content": "I don't know.", "role": "user"}]} if "prompt" in row and isinstance(row["prompt"], list): - row["prompt"] = tokenizer.apply_chat_template(row["prompt"], tokenize=False) + row["prompt"] = processor.apply_chat_template(row["prompt"], tokenize=False) - row["chosen"] = tokenizer.apply_chat_template(row["chosen"], tokenize=False) - row["rejected"] = tokenizer.apply_chat_template(row["rejected"], tokenize=False) + row["chosen"] = processor.apply_chat_template(row["chosen"], tokenize=False) + row["rejected"] = processor.apply_chat_template(row["rejected"], tokenize=False) if "images" in row: for idx, img in enumerate(row["images"]): # Resize each image so the largest side is 640 pixels From 5a1dfa73f72430a9b45615bab7eebe9038e4134d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 10:03:42 +0000 Subject: [PATCH 36/43] update doc for vdpo --- docs/source/dpo_trainer.mdx | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/source/dpo_trainer.mdx b/docs/source/dpo_trainer.mdx index 4b95fa3552b..8da953a4bc4 100644 --- a/docs/source/dpo_trainer.mdx +++ b/docs/source/dpo_trainer.mdx @@ -70,8 +70,31 @@ dpo_dataset_dict = { where the `prompt` contains the context inputs, `chosen` contains the corresponding chosen responses and `rejected` contains the corresponding negative (rejected) responses. As can be seen a prompt can have multiple responses and this is reflected in the entries being repeated in the dictionary's value arrays. +`DPOTrainer` can be used to fine-tune visual language models (VLMs). In this case, the dataset must also contain the key `images`. For example, for Idefics2, the processor expects the dataset to have the following format: + +```py +dpo_dataset_dict = { + 'images': [ + [Image.open('beach.jpg')], + [Image.open('street.jpg')], + ], + 'prompt': [ + 'The image shows', + ' The image depicts', + ], + 'chosen': [ + 'a sunny beach with palm trees.', + 'a busy street with several cars and buildings.', + ], + 'rejected': [ + 'a snowy mountain with skiers.', + 'a calm countryside with green fields.', + ], +} +``` + ## Expected model format -The DPO trainer expects a model of `AutoModelForCausalLM`, compared to PPO that expects `AutoModelForCausalLMWithValueHead` for the value function. +The DPO trainer expects a model of `AutoModelForCausalLM` or `AutoModelForVision2Seq`, compared to PPO that expects `AutoModelForCausalLMWithValueHead` for the value function. ## Using the `DPOTrainer` @@ -86,7 +109,7 @@ dpo_trainer = DPOTrainer( model_ref, args=training_args, train_dataset=train_dataset, - tokenizer=tokenizer, + tokenizer=tokenizer, # for visual language models, use tokenizer=processor instead ) ``` After this one can then call: From 2c10ca870307d77bd99bbc4a5a0c712eab96eab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 10:11:26 +0000 Subject: [PATCH 37/43] caution about limited support --- docs/source/dpo_trainer.mdx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/dpo_trainer.mdx b/docs/source/dpo_trainer.mdx index 8da953a4bc4..614053316f6 100644 --- a/docs/source/dpo_trainer.mdx +++ b/docs/source/dpo_trainer.mdx @@ -72,6 +72,8 @@ where the `prompt` contains the context inputs, `chosen` contains the correspond `DPOTrainer` can be used to fine-tune visual language models (VLMs). In this case, the dataset must also contain the key `images`. For example, for Idefics2, the processor expects the dataset to have the following format: +Note: Currently, VLM support is exclusive to Idefics2 and does not extend to other VLMs. + ```py dpo_dataset_dict = { 'images': [ From 2e476334a6decefe1952a2baa740e65a8b38c759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Wed, 26 Jun 2024 15:04:22 +0200 Subject: [PATCH 38/43] Update docs/source/dpo_trainer.mdx Co-authored-by: Kashif Rasul --- docs/source/dpo_trainer.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/dpo_trainer.mdx b/docs/source/dpo_trainer.mdx index 614053316f6..4173eed5a65 100644 --- a/docs/source/dpo_trainer.mdx +++ b/docs/source/dpo_trainer.mdx @@ -70,7 +70,7 @@ dpo_dataset_dict = { where the `prompt` contains the context inputs, `chosen` contains the corresponding chosen responses and `rejected` contains the corresponding negative (rejected) responses. As can be seen a prompt can have multiple responses and this is reflected in the entries being repeated in the dictionary's value arrays. -`DPOTrainer` can be used to fine-tune visual language models (VLMs). In this case, the dataset must also contain the key `images`. For example, for Idefics2, the processor expects the dataset to have the following format: +`DPOTrainer` can be used to fine-tune visual language models (VLMs). In this case, the dataset must also contain the key `images`, and the trainer's `tokenizer` is the VLM's `processor`. For example, for Idefics2, the processor expects the dataset to have the following format: Note: Currently, VLM support is exclusive to Idefics2 and does not extend to other VLMs. From f960a2a4603edde36c21f6133a01f347c52c2c6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 13:32:52 +0000 Subject: [PATCH 39/43] revert DPO example changes --- examples/scripts/dpo.py | 76 +++++++---------------------------------- 1 file changed, 13 insertions(+), 63 deletions(-) diff --git a/examples/scripts/dpo.py b/examples/scripts/dpo.py index 8a91ee7d31d..56a11d9dc3e 100644 --- a/examples/scripts/dpo.py +++ b/examples/scripts/dpo.py @@ -48,19 +48,6 @@ --use_peft \ --lora_r=16 \ --lora_alpha=16 - -# vision with peft: -accelerate launch examples/scripts/dpo.py \ - --dataset_name HuggingFaceH4/rlaif-v_formatted \ - --model_name_or_path HuggingFaceM4/idefics2-8b \ - --output_dir dpo_idefics_rlaif-v \ - --per_device_train_batch_size 1 \ - --gradient_accumulation_steps 16 \ - --learning_rate 1e-5 \ - --bf16 \ - --torch_dtype bfloat16 \ - --use_peft \ - --lora_target_modules=all-linear """ import logging @@ -71,7 +58,6 @@ TRL_USE_RICH = os.environ.get("TRL_USE_RICH", False) from trl.commands.cli_utils import DPOScriptArguments, init_zero_verbose, TrlParser -from accelerate import PartialState if TRL_USE_RICH: init_zero_verbose() @@ -82,7 +68,7 @@ import torch from datasets import load_dataset -from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForVision2Seq, AutoProcessor +from transformers import AutoModelForCausalLM, AutoTokenizer from trl import ( DPOConfig, @@ -126,25 +112,13 @@ device_map=get_kbit_device_map() if quantization_config is not None else None, quantization_config=quantization_config, ) - is_vision_model = model_config.model_name_or_path in ["HuggingFaceM4/idefics2-8b"] - if is_vision_model: - model = AutoModelForVision2Seq.from_pretrained(model_config.model_name_or_path, **model_kwargs) - else: - model = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs) + model = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs) peft_config = get_peft_config(model_config) if peft_config is None: - if is_vision_model: - model_ref = AutoModelForVision2Seq.from_pretrained(model_config.model_name_or_path, **model_kwargs) - else: - model_ref = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs) + model_ref = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs) else: model_ref = None - if is_vision_model: - processor = AutoProcessor.from_pretrained(model_config.model_name_or_path, do_image_splitting=True) - tokenizer = processor.tokenizer - else: - tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path) - processor = AutoProcessor.from_pretrained(model_config.model_name_or_path, do_image_splitting=False) + tokenizer = AutoTokenizer.from_pretrained(model_config.model_name_or_path) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token if tokenizer.chat_template is None: @@ -174,40 +148,16 @@ ds[key] = ds[key].select(range(50)) def process(row): - # The prompt can be either a string or a list. In some datasets, the prompt is just a common string - # for both rejected and chosen (already included in chosen and rejected) and is not meant to be used - # separately. In other datasets, the prompt is intended to be used as a prefix for rejected and chosen, - # and in such cases, it is properly formatted as a list with keys "role" and "content". - # Example 1: - # row = {"prompt": "What does detox mean?", - # "chosen": [{"content": "What does detox mean?", "role": "user"}, {"content": "It means to get rid of the toxins.", "role": "assistant"}], - # "rejected": [{"content": "What does detox mean?", "role": "assistant"}, {"content": "I don't know.", "role": "assistant"}]} - # Example 2: - # row = {"prompt": [{"content": "What does detox mean?", "role": "user"}], - # "chosen": [{"content": "It means to get rid of the toxins.", "role": "assistant"}], - # "rejected": [{"content": "I don't know.", "role": "assistant"}]} - if is_vision_model: - apply_chat_template = processor.apply_chat_template - else: - apply_chat_template = tokenizer.apply_chat_template - - if "prompt" in row and isinstance(row["prompt"], list): - row["prompt"] = apply_chat_template(row["prompt"], tokenize=False) - - row["chosen"] = apply_chat_template(row["chosen"], tokenize=False) - row["rejected"] = apply_chat_template(row["rejected"], tokenize=False) - - if "images" in row: - for idx, img in enumerate(row["images"]): # Resize each image so the largest side is 640 pixels - ratio = min(1.0, 640 / max(img.size)) - new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio)) - row["images"][idx] = img.resize(new_size) - row["images"] = row["images"] - + row["prompt"] = tokenizer.apply_chat_template(row["chosen"][:-1], tokenize=False) + row["chosen"] = tokenizer.apply_chat_template([row["chosen"][-1]], tokenize=False) + row["rejected"] = tokenizer.apply_chat_template([row["rejected"][-1]], tokenize=False) return row - with PartialState().local_main_process_first(): - ds = ds.map(process, num_proc=multiprocessing.cpu_count()) + ds = ds.map( + process, + num_proc=multiprocessing.cpu_count(), + load_from_cache_file=False, + ) train_dataset = ds[args.dataset_train_split] eval_dataset = ds[args.dataset_test_split] @@ -221,7 +171,7 @@ def process(row): args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, - tokenizer=processor if is_vision_model else tokenizer, + tokenizer=tokenizer, peft_config=get_peft_config(model_config), callbacks=[RichProgressCallback] if TRL_USE_RICH else None, ) From e4c743616a3eb188a886ffe387eea29a4d56bee8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 13:33:29 +0000 Subject: [PATCH 40/43] cleaner way to check if a model is vision --- trl/trainer/dpo_trainer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index 26161773dcf..c579e77743e 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -37,6 +37,7 @@ PreTrainedTokenizerBase, Trainer, ) +from transformers.models.auto.modeling_auto import MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES from transformers.trainer_callback import TrainerCallback from transformers.trainer_utils import EvalLoopOutput @@ -317,7 +318,7 @@ def make_inputs_require_grad(module, input, output): self.is_encoder_decoder = args.is_encoder_decoder if model is not None: - self.is_vision_model = model.config.model_type in ["idefics2"] # TODO: find a better way + self.is_vision_model = model.config.model_type in MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES.keys() else: warnings.warn( "No model provided, cannot determine if it is a vision model. Setting is_vision_model to False." From bfb35d347935cbb4a1801479d28fb9de6b39e656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 14:03:49 +0000 Subject: [PATCH 41/43] comment --- trl/trainer/dpo_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py index c579e77743e..b4775b165e8 100644 --- a/trl/trainer/dpo_trainer.py +++ b/trl/trainer/dpo_trainer.py @@ -510,7 +510,7 @@ def make_inputs_require_grad(module, input, output): # Compute that only on the main process for faster data processing. # see: https://github.com/huggingface/trl/pull/1255 with PartialState().local_main_process_first(): - # tokenize the dataset + # tokenize the dataset, lower writer batch size to avoid OOM (frequent in vision models) train_dataset = train_dataset.map(self.tokenize_row, num_proc=self.dataset_num_proc, writer_batch_size=10) if eval_dataset is not None: eval_dataset = eval_dataset.map( From 7b22153f2175b2a8b7ab515a47bf74bb02694d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 14:06:13 +0000 Subject: [PATCH 42/43] update vdpo example --- examples/scripts/vdpo.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/examples/scripts/vdpo.py b/examples/scripts/vdpo.py index bb3547c5882..b602c8f0098 100644 --- a/examples/scripts/vdpo.py +++ b/examples/scripts/vdpo.py @@ -18,22 +18,15 @@ --model_name_or_path HuggingFaceM4/idefics2-8b \ --per_device_train_batch_size 1 \ --gradient_accumulation_steps 16 \ - --learning_rate 1e-5 \ - --logging_steps 5 \ + --dataset_num_proc 32 \ --output_dir dpo_idefics_rlaif-v \ - --push_to_hub --hub_model_id HuggingFaceH4/idefics2-8b-dpo-rlaif-v \ --bf16 \ --torch_dtype bfloat16 \ - --logging_first_step \ - --no_remove_unused_columns \ - --dataset_num_proc 50 \ - --dataload_num_workers 16 \ --use_peft \ --lora_target_modules=all-linear """ import logging -import multiprocessing import os from contextlib import nullcontext @@ -106,8 +99,6 @@ tokenizer = processor.tokenizer if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token - if tokenizer.chat_template is None: - tokenizer.chat_template = "{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\n\n'}}{% endfor %}{{ eos_token }}" if args.ignore_bias_buffers: # torch distributed hack model._ddp_params_and_buffers_to_ignore = [ @@ -161,7 +152,7 @@ def process(row): return row with PartialState().local_main_process_first(): - ds = ds.map(process, num_proc=multiprocessing.cpu_count()) + ds = ds.map(process, num_proc=training_args.dataset_num_proc) train_dataset = ds[args.dataset_train_split] eval_dataset = ds[args.dataset_test_split] From 515519491e150ae3f6b030d3f269b18789c0b26f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Wed, 26 Jun 2024 14:06:34 +0000 Subject: [PATCH 43/43] rename --- examples/scripts/{vdpo.py => dpo_visual.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/scripts/{vdpo.py => dpo_visual.py} (100%) diff --git a/examples/scripts/vdpo.py b/examples/scripts/dpo_visual.py similarity index 100% rename from examples/scripts/vdpo.py rename to examples/scripts/dpo_visual.py