-
Notifications
You must be signed in to change notification settings - Fork 32k
Fix MistralIntegrationTest
#31231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix MistralIntegrationTest
#31231
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -526,7 +526,7 @@ def test_model_7b_logits(self): | |
| # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, | ||
| # considering differences in hardware processing and potential deviations in output. | ||
| EXPECTED_SLICE = { | ||
| 7: torch.tensor([-5.8781, -5.8616, -0.1052, -4.7200, -5.8781, -5.8774, -5.8773, -5.8777, -5.8781, -5.8780, -5.8781, -5.8779, -1.0787, 1.7583, -5.8779, -5.8780, -5.8783, -5.8778, -5.8776, -5.8781, -5.8784, -5.8778, -5.8778, -5.8777, -5.8779, -5.8778, -5.8776, -5.8780, -5.8779, -5.8781]), | ||
| 7: torch.tensor([-5.8828, -5.8633, -0.1042, -4.7266, -5.8828, -5.8789, -5.8789, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -1.0801, 1.7598, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828, -5.8828]), | ||
| 8: torch.tensor([-5.8711, -5.8555, -0.1050, -4.7148, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -1.0781, 1.7568, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711, -5.8711]), | ||
| 9: torch.tensor([-5.8750, -5.8594, -0.1047, -4.7188, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -1.0781, 1.7578, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750, -5.8750]), | ||
| } # fmt: skip | ||
|
|
@@ -535,15 +535,11 @@ def test_model_7b_logits(self): | |
| out[0, 0, :30], EXPECTED_SLICE[self.cuda_compute_capability_major_version], atol=1e-4, rtol=1e-4 | ||
| ) | ||
|
|
||
| del model | ||
| backend_empty_cache(torch_device) | ||
| gc.collect() | ||
|
|
||
| @slow | ||
| @require_bitsandbytes | ||
| def test_model_7b_generation(self): | ||
| EXPECTED_TEXT_COMPLETION = { | ||
| 7: "My favourite condiment is 100% ketchup. I love it on everything. I'm not a big", | ||
| 7: "My favourite condiment is 100% ketchup. I’m not a fan of mustard, mayo,", | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should update in #29905 but forgot |
||
| 8: "My favourite condiment is 100% ketchup. I’m not a fan of mustard, mayo,", | ||
| } | ||
|
|
||
|
|
@@ -559,10 +555,6 @@ def test_model_7b_generation(self): | |
| text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) | ||
| self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], text) | ||
|
|
||
| del model | ||
| backend_empty_cache(torch_device) | ||
| gc.collect() | ||
|
|
||
|
Comment on lines
-562
to
-565
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not help and worse cause some GPU OOM in subsequent tests
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Happy to have this deleted but very confused why this would cause OOM 😭
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Got to say I am confused too.
but I was not expecting it would have undesired side-effect like this (even if it is not helpful). I don't check if
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of curiosity and keep info here for the record:
It's very mysterious to me. |
||
| @require_bitsandbytes | ||
| @slow | ||
| @require_flash_attn | ||
|
|
@@ -587,11 +579,6 @@ def test_model_7b_long_prompt(self): | |
| generated_ids = model.generate(input_ids, max_new_tokens=4, temperature=0) | ||
| self.assertEqual(EXPECTED_OUTPUT_TOKEN_IDS, generated_ids[0][-2:].tolist()) | ||
|
|
||
| del assistant_model | ||
| del model | ||
| backend_empty_cache(torch_device) | ||
| gc.collect() | ||
|
|
||
| @slow | ||
| @require_torch_sdpa | ||
| def test_model_7b_long_prompt_sdpa(self): | ||
|
|
@@ -635,7 +622,7 @@ def test_speculative_generation(self): | |
| # Note: Key 9 is currently set for MI300, but may need potential future adjustments for H100s, | ||
| # considering differences in hardware processing and potential deviations in generated text. | ||
| EXPECTED_TEXT_COMPLETION = { | ||
| 7: "My favourite condiment is 100% Sriracha. I love the heat, the tang and the fact costs", | ||
| 7: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see PR description |
||
| 8: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", | ||
| 9: "My favourite condiment is 100% ketchup. I love it on everything. I’m not a big", | ||
| } | ||
|
|
@@ -654,10 +641,6 @@ def test_speculative_generation(self): | |
| text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) | ||
| self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], text) | ||
|
|
||
| del model | ||
| backend_empty_cache(torch_device) | ||
| gc.collect() | ||
|
|
||
| @slow | ||
| @require_read_token | ||
| def test_compile_static_cache(self): | ||
|
|
@@ -726,10 +709,6 @@ def test_compile_static_cache(self): | |
| static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) | ||
| self.assertEqual(EXPECTED_TEXT_COMPLETION[self.cuda_compute_capability_major_version], static_compiled_text) | ||
|
|
||
| del model | ||
| backend_empty_cache(torch_device) | ||
| gc.collect() | ||
|
|
||
|
|
||
| @slow | ||
| @require_torch_gpu | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should update in #29905 but forgot