diff --git a/examples/question-answering/README.md b/examples/question-answering/README.md index 677145387f..32d4917a82 100644 --- a/examples/question-answering/README.md +++ b/examples/question-answering/README.md @@ -133,6 +133,36 @@ Here is a DeepSpeed configuration you can use to train your models on Gaudi: ``` +## Fine-tuning Llama on SQuAD1.1 + +> [!NOTE] +> Llama/Llama2 for question answering requires Transformers v4.38.0 or newer, which supports the `LlamaForQuestionAnswering` class. + +Here is a command you can run to train a Llama model for question answering: +```bash +python ../gaudi_spawn.py \ + --world_size 8 --use_deepspeed run_qa.py \ + --model_name_or_path FlagAlpha/Llama2-Chinese-13b-Chat \ + --gaudi_config_name Habana/bert-large-uncased-whole-word-masking \ + --dataset_name squad \ + --do_train \ + --do_eval \ + --per_device_train_batch_size 8 \ + --per_device_eval_batch_size 8 \ + --learning_rate 3e-5 \ + --num_train_epochs 2 \ + --max_seq_length 384 \ + --doc_stride 128 \ + --output_dir /tmp/squad_output/ \ + --use_habana \ + --use_lazy_mode \ + --use_hpu_graphs_for_inference \ + --throughput_warmup_steps 3 \ + --max_train_samples 45080 \ + --deepspeed ../../tests/configs/deepspeed_zero_2.json +``` + + ## Inference To run only inference, you can start from the commands above and you just have to remove the training-only arguments such as `--do_train`, `--per_device_train_batch_size`, `--num_train_epochs`, etc... @@ -168,6 +198,8 @@ python run_qa.py \ | ALBERT XXL (single-card) | 5e-6 | 2 | 12 | 2 | | ALBERT XXL (multi-card) | 5e-5 | 2 | 12 | 2 | | DistilBERT | 5e-5 | 3 | 8 | 8 | +| meta-llama/Llama-2-13b-chat-hf (multi-card) | 3e-5 | 2 | 8 | 8 | +| FlagAlpha/Llama2-Chinese-13b-Chat (multi-card) | 3e-5 | 2 | 8 | 8 | ## Fine-tuning T5 on SQuAD2.0 diff --git a/examples/question-answering/run_qa.py b/examples/question-answering/run_qa.py index a72cc68aec..e95e014f92 100644 --- a/examples/question-answering/run_qa.py +++ b/examples/question-answering/run_qa.py @@ -377,6 +377,10 @@ def main(): token=model_args.token, trust_remote_code=model_args.trust_remote_code, ) + if config.model_type == "llama": + if tokenizer.pad_token is None: + tokenizer.add_special_tokens({"pad_token": "[PAD]"}) + tokenizer.cls_token = tokenizer.bos_token model = AutoModelForQuestionAnswering.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path),