-
Notifications
You must be signed in to change notification settings - Fork 31.9k
Fix FalconMambaIntegrationTests
#38566
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c4e7c95
eca0001
5adbe07
ed25a37
bfb8bd3
e0598f0
83765d5
997b71f
ad88842
cd6065a
32c1cef
58916ae
8bd0c30
ac84d6b
f27d3d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,12 @@ | |
|
|
||
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, FalconMambaConfig, is_torch_available | ||
| from transformers.testing_utils import ( | ||
| Expectations, | ||
| cleanup, | ||
| require_bitsandbytes, | ||
| require_torch, | ||
| require_torch_accelerator, | ||
| require_torch_large_accelerator, | ||
| require_torch_multi_accelerator, | ||
| require_torch_multi_gpu, | ||
| slow, | ||
|
|
@@ -450,15 +453,30 @@ def setUp(self): | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) | ||
| self.text = "Hello today" | ||
|
|
||
| def test_generation_bf16(self): | ||
| model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16, device_map="auto") | ||
| cleanup(torch_device, gc_collect=True) | ||
|
|
||
| def tearDown(self): | ||
| cleanup(torch_device, gc_collect=True) | ||
|
|
||
| # On T4, get `NotImplementedError: Cannot copy out of meta tensor; no data!` | ||
| @require_torch_large_accelerator | ||
| def test_generation_fp16(self): | ||
| model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.float16, device_map="auto") | ||
vasqu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| inputs = self.tokenizer(self.text, return_tensors="pt").to(torch_device) | ||
| out = model.generate(**inputs, max_new_tokens=20, do_sample=False) | ||
|
|
||
| EXPECTED_OUTPUTS = Expectations( | ||
| { | ||
| ("cuda", 7): "Hello today I am going to show you how to make a simple and easy to make paper plane.\nStep", | ||
| ("cuda", 8): 'Hello today Iava,\n\nI am writing to you today to discuss the importance of maintaining a healthy lifestyle', | ||
| } | ||
| ) # fmt: skip | ||
| EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation() | ||
|
|
||
| self.assertEqual( | ||
| self.tokenizer.batch_decode(out, skip_special_tokens=False)[0], | ||
| "Hello today I am going to show you how to make a simple and easy to make paper plane.\nStep", | ||
| EXPECTED_OUTPUT, | ||
| ) | ||
|
|
||
| @require_bitsandbytes | ||
|
|
@@ -471,19 +489,19 @@ def test_generation_4bit(self): | |
|
|
||
| self.assertEqual( | ||
| self.tokenizer.batch_decode(out, skip_special_tokens=False)[0], | ||
| """Hello today I'm going to talk about the "C" in the "C-I-""", | ||
| "Hello today Iava,\n\nI'm sorry to hear that you're having trouble with the ", | ||
| ) | ||
|
|
||
| def test_generation_torch_compile(self): | ||
| model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.bfloat16).to(torch_device) | ||
| model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.float16).to(torch_device) | ||
| model = torch.compile(model) | ||
|
|
||
| inputs = self.tokenizer(self.text, return_tensors="pt").to(torch_device) | ||
| out = model.generate(**inputs, max_new_tokens=20, do_sample=False) | ||
|
|
||
| self.assertEqual( | ||
| self.tokenizer.batch_decode(out, skip_special_tokens=False)[0], | ||
| "Hello today I am going to show you how to make a simple and easy to make paper plane.\nStep", | ||
| "Hello today Iava,\n\nI am writing to you today to discuss the importance of maintaining a healthy lifestyle", | ||
| ) | ||
|
|
||
| def test_batched_generation(self): | ||
|
|
@@ -493,13 +511,22 @@ def test_batched_generation(self): | |
|
|
||
| texts = ["Hello today", "Hello my name is Younes and today"] | ||
|
|
||
| EXPECTED_OUTPUT = [ | ||
| "Hello today I'm going to show you how to make a 3D model of a house.\n", | ||
| "Hello my name is Younes and today I will be talking about the topic of “The importance of the internet in our life”.\n", | ||
| ] | ||
| EXPECTED_OUTPUTS = Expectations( | ||
| { | ||
| ("cuda", 7): [ | ||
| 'Hello today I will be talking about the “Theory of Relativity” by Albert Einstein.\nThe', | ||
| 'Hello my name is Younes and today I will be talking about the importance of the internet in our lives.\nThe internet is a global', | ||
| ], | ||
| ("cuda", 8): [ | ||
| 'Hello today I am going to talk about the “Theory of Relativity” by Albert Einstein.\n', | ||
| 'Hello my name is Younes and today I will be talking about the importance of the internet in our lives.\nThe internet is a global', | ||
| ], | ||
| } | ||
| ) # fmt: skip | ||
| EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation() | ||
|
|
||
| inputs = tok(texts, return_tensors="pt", padding=True, return_token_type_ids=False).to(torch_device) | ||
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map=0, torch_dtype=torch.bfloat16) | ||
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map=0, torch_dtype=torch.float16) | ||
|
|
||
| out = model.generate(**inputs, max_new_tokens=20) | ||
| out = tok.batch_decode(out, skip_special_tokens=True) | ||
|
|
@@ -514,14 +541,27 @@ def test_batched_generation(self): | |
| out = model.generate(**inputs, max_new_tokens=20) | ||
| out = tok.batch_decode(out, skip_special_tokens=True) | ||
|
|
||
| EXPECTED_OUTPUTS = Expectations( | ||
| { | ||
| ("cuda", 7): [ | ||
| ' I will be talking about the “Theory of Relativity” by Albert Einstein.\nThe', | ||
| ' I will be talking about the importance of the internet in our lives.\nThe internet is a global', | ||
| ], | ||
| ("cuda", 8): [ | ||
| ' I am going to talk about the “Theory of Relativity” by Albert Einstein.\n', | ||
| ' I will be talking about the importance of the internet in our lives.\nThe internet is a global' | ||
| ], | ||
|
Comment on lines
+546
to
+553
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gante I guess it's normal that, with
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Imo, that doesn't seem correct to me. It would be weird to expect different behaviour here since we generate from the same "prompt". That might be a regression somewhere.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not expert (that is why I ping @gante ), but I kind think it's normal. For the prompt part, we only pass embedding not the token ids. And for those part, we can't recover the token ids from the embedding. That is why it only gives the part that are
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to know if the original commit worked on this test. Or at least if on input embeds the same issues persisted (no prompt). Both behaviours are justified imo.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's failing when the test is written 😢 (That's why I always it's important to use
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, my bad, move too fast to get the wrong results. It's on I will check the commit the day before it to see what happened
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I confirmed that this test is failing when the test is added on 2025/06/19 - I checkout to the commit, run the test. (around that time, there are several CIs triggered manually on different commits, so hard to check on slack channels)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sry to be so picky but is it also having failures on (at least) no prefix being returned 👀 If yes, I think this is fine to merge then.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, same failing reason. Not picky, it's fine. I am also happy to wait joao's response. No urgent to merge at all :-)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gentilly ping @gante |
||
| } | ||
| ) # fmt: skip | ||
| EXPECTED_OUTPUT = EXPECTED_OUTPUTS.get_expectation() | ||
| self.assertListEqual(out, EXPECTED_OUTPUT) | ||
|
|
||
| @require_torch_multi_accelerator | ||
| def test_training_kernel(self): | ||
| model_id = "tiiuae/falcon-mamba-7b" | ||
|
|
||
| tokenizer = AutoTokenizer.from_pretrained(model_id) | ||
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.bfloat16) | ||
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16) | ||
| tokenizer.pad_token_id = tokenizer.eos_token_id | ||
|
|
||
| text = "Hello today" | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be too little memory or some cpu offloading issues?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I guess too. But if I remove
device_map="auto"and add.to(torch_device)the generation could run without OOM.
So it's a bit strange to me why
autowould cause problem.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe someone to cc here? It's not a deal breaker here but might be interesting to investigate if someone wants to / has the time.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will open an issue and ping