From 4d7e619db146f15f56c3aa5d594deccf109f02e2 Mon Sep 17 00:00:00 2001 From: Mohit Deopujari Date: Thu, 24 Oct 2024 15:35:29 -0700 Subject: [PATCH 1/3] fix for ValueError: Asking to pad but the tokenizer does not have a padding token. --- examples/language-modeling/README.md | 1 - examples/language-modeling/run_lora_clm.py | 10 +++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md index 8ea0cdd554..f08cfecc0d 100644 --- a/examples/language-modeling/README.md +++ b/examples/language-modeling/README.md @@ -478,7 +478,6 @@ LOWER_LIST=ops_bf16.txt python ../gaudi_spawn.py \ --world_size 8 --use_mpi run_lora_clm.py \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --dataset_name tatsu-lab/alpaca \ - --bf16 True \ --output_dir ./model_lora_llama \ --num_train_epochs 3 \ --per_device_train_batch_size 16 \ diff --git a/examples/language-modeling/run_lora_clm.py b/examples/language-modeling/run_lora_clm.py index 1c6e29da25..4782ed58ae 100644 --- a/examples/language-modeling/run_lora_clm.py +++ b/examples/language-modeling/run_lora_clm.py @@ -700,6 +700,11 @@ def main(): raise ValueError("Must provide model_name_or_path to load a pretrained CausalLM model.") if model.config.model_type == "llama": + if model.generation_config.pad_token_id is None: + if isinstance(model.generation_config.eos_token_id, int): + model.generation_config.pad_token_id = model.generation_config.eos_token_id + elif isinstance(model.generation_config.eos_token_id, list): + model.generation_config.pad_token_id = model.generation_config.eos_token_id[0] if model_args.attn_softmax_bf16: model.generation_config.attn_softmax_bf16 = True if model_args.use_flash_attention: @@ -717,7 +722,10 @@ def main(): if hasattr(model.generation_config, "pad_token_id") and model.generation_config.pad_token_id is not None: tokenizer.pad_token_id = model.generation_config.pad_token_id if hasattr(model.generation_config, "eos_token_id") and model.generation_config.eos_token_id is not None: - tokenizer.eos_token_id = model.generation_config.eos_token_id + if isinstance(model.generation_config.eos_token_id, int): + tokenizer.eos_token_id = model.generation_config.eos_token_id + elif isinstance(model.generation_config.eos_token_id, list): + tokenizer.eos_token_id = model.generation_config.eos_token_id[0] if hasattr(model.generation_config, "bos_token_id") and model.generation_config.bos_token_id is not None: tokenizer.bos_token_id = model.generation_config.bos_token_id From 0a9b2f6b9cc684096fd271ed1a83db5018709589 Mon Sep 17 00:00:00 2001 From: Mohit Deopujari Date: Fri, 25 Oct 2024 16:45:10 -0700 Subject: [PATCH 2/3] Fix after review --- examples/language-modeling/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md index f08cfecc0d..1cd73268d0 100644 --- a/examples/language-modeling/README.md +++ b/examples/language-modeling/README.md @@ -478,6 +478,7 @@ LOWER_LIST=ops_bf16.txt python ../gaudi_spawn.py \ --world_size 8 --use_mpi run_lora_clm.py \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --dataset_name tatsu-lab/alpaca \ + --bf16 True \ --output_dir ./model_lora_llama \ --num_train_epochs 3 \ --per_device_train_batch_size 16 \ From aade9ebb85b8312e7f5b4bbcc3dbf9cdc53e92bc Mon Sep 17 00:00:00 2001 From: Mohit Deopujari Date: Fri, 25 Oct 2024 16:47:15 -0700 Subject: [PATCH 3/3] Fixed space instead of tab --- examples/language-modeling/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md index 1cd73268d0..8ea0cdd554 100644 --- a/examples/language-modeling/README.md +++ b/examples/language-modeling/README.md @@ -478,7 +478,7 @@ LOWER_LIST=ops_bf16.txt python ../gaudi_spawn.py \ --world_size 8 --use_mpi run_lora_clm.py \ --model_name_or_path meta-llama/Llama-2-7b-hf \ --dataset_name tatsu-lab/alpaca \ - --bf16 True \ + --bf16 True \ --output_dir ./model_lora_llama \ --num_train_epochs 3 \ --per_device_train_batch_size 16 \