diff --git a/tests/baselines/Llama_3_1_8B.json b/tests/baselines/Llama_3_1_8B.json new file mode 100644 index 0000000000..fa7b39c095 --- /dev/null +++ b/tests/baselines/Llama_3_1_8B.json @@ -0,0 +1,35 @@ +{ + "gaudi2": { + "tatsu-lab/alpaca": { + "num_train_epochs": 2, + "eval_batch_size": 1, + "distribution": { + "single_card": { + "learning_rate": 3e-4, + "train_batch_size": 10, + "perplexity": 2.7317, + "train_runtime": 1435.24322, + "train_samples_per_second": 13.3044, + "extra_arguments": [ + "--bf16", + "--gradient_checkpointing", + "--eval_strategy epoch", + "--eval_delay 2", + "--save_strategy no", + "--warmup_ratio 0.03", + "--lr_scheduler_type cosine", + "--logging_steps 1", + "--lora_rank 4", + "--lora_target_modules q_proj v_proj", + "--dataset_concatenation", + "--max_seq_length 512", + "--validation_split_percentage 10", + "--attn_softmax_bf16", + "--use_flash_attention True", + "--flash_attention_causal_mask True" + ] + } + } + } + } +} \ No newline at end of file diff --git a/tests/baselines/Meta_Llama_3_1_8B.json b/tests/baselines/Meta_Llama_3_1_8B.json new file mode 100644 index 0000000000..fa7b39c095 --- /dev/null +++ b/tests/baselines/Meta_Llama_3_1_8B.json @@ -0,0 +1,35 @@ +{ + "gaudi2": { + "tatsu-lab/alpaca": { + "num_train_epochs": 2, + "eval_batch_size": 1, + "distribution": { + "single_card": { + "learning_rate": 3e-4, + "train_batch_size": 10, + "perplexity": 2.7317, + "train_runtime": 1435.24322, + "train_samples_per_second": 13.3044, + "extra_arguments": [ + "--bf16", + "--gradient_checkpointing", + "--eval_strategy epoch", + "--eval_delay 2", + "--save_strategy no", + "--warmup_ratio 0.03", + "--lr_scheduler_type cosine", + "--logging_steps 1", + "--lora_rank 4", + "--lora_target_modules q_proj v_proj", + "--dataset_concatenation", + "--max_seq_length 512", + "--validation_split_percentage 10", + "--attn_softmax_bf16", + "--use_flash_attention True", + "--flash_attention_causal_mask True" + ] + } + } + } + } +} \ No newline at end of file diff --git a/tests/test_examples.py b/tests/test_examples.py index c5668e5b7c..db36747b25 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -262,6 +262,11 @@ def to_test( return False elif "llama" in model_name and "trl-sft-qwen" in task_name: return False + elif "Llama-3.1-8B" in model_name: + if multi_card: + return False + elif task_name == "tatsu-lab/alpaca": + return True elif "falcon" in model_name and task_name in ( "llama-adapter", "databricks/databricks-dolly-15k", @@ -803,7 +808,7 @@ class ProteinFoldingExampleTester2(ExampleTesterBase, metaclass=ExampleTestMeta, class CausalLanguageModelingLORAExampleTester( ExampleTesterBase, metaclass=ExampleTestMeta, example_name="run_lora_clm" ): - TASK_NAME = "databricks/databricks-dolly-15k" + TASK_NAME = ["tatsu-lab/alpaca", "databricks/databricks-dolly-15k"] class MultiCardCausalLanguageModelingLORAExampleTester2( diff --git a/tests/utils.py b/tests/utils.py index 7eab1b06be..795a090519 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -53,7 +53,7 @@ "clip": [("./clip-roberta", "Habana/clip")], "bridgetower": [("BridgeTower/bridgetower-large-itm-mlm-itc", "Habana/clip")], "gpt_neox": [("EleutherAI/gpt-neox-20b", "Habana/gpt2")], - "llama": [("huggyllama/llama-7b", "Habana/llama")], + "llama": [("huggyllama/llama-7b", "Habana/llama"), ("meta-llama/Llama-3.1-8B", "Habana/llama")], "falcon": [("tiiuae/falcon-40b", "Habana/falcon")], "bloom": [("bigscience/bloom-7b1", "Habana/roberta-base")], "whisper": [("openai/whisper-small", "Habana/whisper")],