diff --git a/requirements.txt b/requirements.txt index d34f468a1a..d2ae236219 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ packaging==23.2 huggingface_hub>=1.1.7 peft>=0.18.0 tokenizers>=0.22.1 -transformers @ git+https://github.com/huggingface/transformers.git@main +transformers @ git+https://github.com/huggingface/transformers.git@v5.0.0rc2 accelerate==1.12.0 datasets==4.4.2 deepspeed>=0.18.3 diff --git a/setup.py b/setup.py index 10c9a84539..1257f35331 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ def parse_requirements(extras_require_map): if (major, minor) >= (2, 9): extras_require_map.pop("fbgemm-gpu") - extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.4.1"] + extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.4.2"] extras_require_map["vllm"] = ["vllm==0.11.1"] elif (major, minor) >= (2, 8): extras_require_map.pop("fbgemm-gpu") diff --git a/tests/conftest.py b/tests/conftest.py index 4c8c80cb7f..b542d377ba 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -83,6 +83,12 @@ def download_smollm2_135m_model(): snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M", repo_type="model") +@pytest.fixture(scope="session", autouse=True) +def download_smollm2_135m_instruct_model(): + # download the model + snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M-Instruct", repo_type="model") + + @pytest.fixture(scope="session", autouse=True) def download_smollm2_135m_gptq_model(): # download the model @@ -143,12 +149,20 @@ def download_argilla_distilabel_intel_orca_dpo_dataset(): ) -# @pytest.fixture(scope="session", autouse=True) -# def download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset(): -# # download the dataset -# snapshot_download_w_retry( -# "argilla/ultrafeedback-binarized-preferences-cleaned", repo_type="dataset" -# ) +@pytest.fixture(scope="session", autouse=True) +def download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset(): + # download the dataset + snapshot_download_w_retry( + "argilla/ultrafeedback-binarized-preferences-cleaned", repo_type="dataset" + ) + + +@pytest.fixture(scope="session", autouse=True) +def download_argilla_ultrafeedback_binarized_preferences_cleaned_kto_dataset(): + # download the dataset + snapshot_download_w_retry( + "argilla/ultrafeedback-binarized-preferences-cleaned-kto", repo_type="dataset" + ) # @pytest.fixture(scope="session", autouse=True) @@ -251,7 +265,9 @@ def download_llama_1b_model_fixture(): def download_llama3_8b_model_fixture(): # download the tokenizer only snapshot_download_w_retry( - "NousResearch/Meta-Llama-3-8B", repo_type="model", allow_patterns=["*token*"] + "NousResearch/Meta-Llama-3-8B", + repo_type="model", + allow_patterns=["*token*", "config.json"], ) @@ -261,7 +277,7 @@ def download_llama3_8b_instruct_model_fixture(): snapshot_download_w_retry( "NousResearch/Meta-Llama-3-8B-Instruct", repo_type="model", - allow_patterns=["*token*"], + allow_patterns=["*token*", "config.json"], ) @@ -269,7 +285,19 @@ def download_llama3_8b_instruct_model_fixture(): def download_phi_35_mini_model_fixture(): # download the tokenizer only snapshot_download_w_retry( - "microsoft/Phi-3.5-mini-instruct", repo_type="model", allow_patterns=["*token*"] + "microsoft/Phi-3.5-mini-instruct", + repo_type="model", + allow_patterns=["*token*", "config.json"], + ) + + +@pytest.fixture(scope="session", autouse=True) +def download_phi_4_reasoning_model_fixture(): + # download the tokenizer only + snapshot_download_w_retry( + "microsoft/Phi-4-reasoning", + repo_type="model", + allow_patterns=["*token*", "config.json"], ) @@ -279,7 +307,7 @@ def download_phi_3_medium_model_fixture(): snapshot_download_w_retry( "microsoft/Phi-3-medium-128k-instruct", repo_type="model", - allow_patterns=["*token*"], + allow_patterns=["*token*", "config.json"], ) @@ -562,6 +590,8 @@ def test_load_fixtures( download_mhenrichsen_alpaca_2k_dataset, download_mhenrichsen_alpaca_2k_w_revision_dataset, download_mlabonne_finetome_100k_dataset, + download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset, + download_argilla_ultrafeedback_binarized_preferences_cleaned_kto_dataset, download_argilla_distilabel_capybara_dpo_7k_binarized_dataset, download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset, download_argilla_dpo_pairs_dataset, @@ -573,6 +603,7 @@ def test_load_fixtures( download_llama3_8b_instruct_model_fixture, download_phi_35_mini_model_fixture, download_phi_3_medium_model_fixture, + download_phi_4_reasoning_model_fixture, download_mistral_7b_model_fixture, download_gemma_2b_model_fixture, download_gemma2_9b_model_fixture, diff --git a/tests/e2e/multigpu/test_gemma3.py b/tests/e2e/multigpu/test_gemma3.py index 3c77c7107e..4b74fcbb04 100644 --- a/tests/e2e/multigpu/test_gemma3.py +++ b/tests/e2e/multigpu/test_gemma3.py @@ -28,13 +28,12 @@ class TestMultiGPUGemma3: Test case for Gemma3 models using LoRA """ - @pytest.mark.skip( - reason="broken in transformers v5 due to embeddings bug fixed in https://github.com/huggingface/transformers/pull/42558" - ) def test_lora_ddp_packed(self, temp_dir): cfg = DictDefault( { "base_model": "axolotl-mirrors/gemma-3-4b-pt", + "model_type": "Gemma3ForCausalLM", + "cls_model_config": "Gemma3TextConfig", "sequence_len": 2048, "ddp_find_unused_parameters": True, "sample_packing": True, diff --git a/tests/prompt_strategies/test_chat_templates.py b/tests/prompt_strategies/test_chat_templates.py index 911a97922c..90e0e274b7 100644 --- a/tests/prompt_strategies/test_chat_templates.py +++ b/tests/prompt_strategies/test_chat_templates.py @@ -140,13 +140,13 @@ def test_phi35(self, phi35_tokenizer, assistant_dataset): # fmt: off expected_input_ids = [ 32010, # user - 12199, 32007, # user eot + 22172, 32007, # user eot 32001, # assistant - 12199, 32007, # assistant eot + 22172, 32007, # assistant eot 32010, # user - 16773, 26966, 32007, # user eot + 1781, 26966, 32007, # user eot 32001, # assistant - 16773, 26966, 32007, # assistant eot + 1781, 26966, 32007, # assistant eot ] expected_labels = [ -100, # user @@ -156,7 +156,7 @@ def test_phi35(self, phi35_tokenizer, assistant_dataset): -100, # user -100, -100, -100, # user eot -100, # assistant - 16773, 26966, 32007, # assistant eot + 1781, 26966, 32007, # assistant eot ] # fmt: on LOG.debug(f"Expected input_ids: {expected_input_ids}") diff --git a/tests/test_tokenizers.py b/tests/test_tokenizers.py index f308efbef3..114c2bea2d 100644 --- a/tests/test_tokenizers.py +++ b/tests/test_tokenizers.py @@ -84,7 +84,7 @@ def test_add_additional_special_tokens(self): } ) tokenizer = load_tokenizer(cfg) - assert tokenizer("<|im_start|>user")["input_ids"] == [1, 32000, 1792] + assert tokenizer("<|im_start|>user")["input_ids"] == [1, 32000, 1404] assert len(tokenizer) == 32001 # ensure reloading the tokenizer again from cfg results in same vocab length