From 63ff31c7ea5cbf0896f5ef50d81ee8f5226320b5 Mon Sep 17 00:00:00 2001 From: Harish Subramony Date: Tue, 10 Dec 2024 18:46:57 +0000 Subject: [PATCH 1/2] adjust bert and roberta targets --- tests/baselines/bert_large_uncased_whole_word_masking.json | 6 +++--- tests/baselines/roberta_large.json | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/baselines/bert_large_uncased_whole_word_masking.json b/tests/baselines/bert_large_uncased_whole_word_masking.json index 37948b9746..605e719faf 100755 --- a/tests/baselines/bert_large_uncased_whole_word_masking.json +++ b/tests/baselines/bert_large_uncased_whole_word_masking.json @@ -77,7 +77,7 @@ "learning_rate": 3e-5, "train_batch_size": 32, "eval_f1": 91.71, - "train_runtime": 77.307, + "train_runtime": 80.307, "train_samples_per_second": 2150.333, "extra_arguments": [ "--max_seq_length 384", @@ -95,7 +95,7 @@ "train_batch_size": 256, "eval_f1": 0.867, "train_runtime": 33.2909, - "train_samples_per_second": 1151.598, + "train_samples_per_second": 1100.598, "extra_arguments": [ "--max_seq_length 128", "--use_hpu_graphs_for_inference" @@ -115,4 +115,4 @@ } } } -} \ No newline at end of file +} diff --git a/tests/baselines/roberta_large.json b/tests/baselines/roberta_large.json index 8b9037b32b..4d7233e089 100755 --- a/tests/baselines/roberta_large.json +++ b/tests/baselines/roberta_large.json @@ -67,7 +67,7 @@ "learning_rate": 7e-5, "train_batch_size": 32, "eval_f1": 94.09, - "train_runtime": 77.333, + "train_runtime": 79.333, "train_samples_per_second": 2138.366, "extra_arguments": [ "--max_seq_length 384", @@ -95,4 +95,4 @@ } } } -} \ No newline at end of file +} From 36b68c8c0f491da8f8fa552229284747a535a35d Mon Sep 17 00:00:00 2001 From: Harish Subramony Date: Tue, 10 Dec 2024 19:09:21 +0000 Subject: [PATCH 2/2] adjust DeciLM baseline --- tests/test_text_generation_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py index c17d6cbdb7..610d846b59 100644 --- a/tests/test_text_generation_example.py +++ b/tests/test_text_generation_example.py @@ -44,7 +44,7 @@ ("google/gemma-7b", 1, False, 109.70751574382221, True), ("google/gemma-2-9b", 1, False, 92.302359446567, True), ("state-spaces/mamba-130m-hf", 1536, False, 5385.511100161605, False), - ("Deci/DeciLM-7B", 1, False, 120, False), + ("Deci/DeciLM-7B", 1, False, 115, False), ("Qwen/Qwen2-7B", 256, False, 8870.945160540245, True), ("Qwen/Qwen1.5-MoE-A2.7B", 1, True, 44.25834541569395, False), ("EleutherAI/gpt-neo-2.7B", 1, False, 257.2476416844122, False),