-
Notifications
You must be signed in to change notification settings - Fork 31.9k
BLOOM - modifying slow tests #17963
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BLOOM - modifying slow tests #17963
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -378,26 +378,26 @@ def test_model_from_pretrained(self): | |
| @require_torch_gpu | ||
| def test_simple_generation(self): | ||
| path_350m = "bigscience/bloom-350m" | ||
| model = BloomForCausalLM.from_pretrained(path_350m, torch_dtype="auto", use_cache=True).cuda() | ||
| model = BloomForCausalLM.from_pretrained( | ||
| path_350m, torch_dtype=torch.float32, use_cache=False, revision="gs555750" | ||
| ).cuda() | ||
| model = model.eval() | ||
| tokenizer = BloomTokenizerFast.from_pretrained(path_350m) | ||
|
|
||
| input_sentence = "I enjoy walking with my cute dog" | ||
| EXPECTED_OUTPUT = ( | ||
| "I enjoy walking with my cute dog, and I love to watch the kids play. I am a very active person, and I am" | ||
| " a very good listener. I am a very good person, and I am a very good person. I am a" | ||
| ) | ||
| EXPECTED_OUTPUT = "I enjoy walking with my cute dog, and I love to watch the kids" | ||
|
|
||
| input_ids = tokenizer.encode(input_sentence, return_tensors="pt") | ||
| greedy_output = model.generate(input_ids.cuda(), max_length=50) | ||
|
|
||
| greedy_output = model.generate(input_ids.cuda(), max_length=15) | ||
| self.assertEqual(tokenizer.decode(greedy_output[0], skip_special_tokens=True), EXPECTED_OUTPUT) | ||
|
|
||
| @slow | ||
| @require_torch_gpu | ||
| def test_batch_generation(self): | ||
| path_350m = "bigscience/bloom-350m" | ||
| model = BloomForCausalLM.from_pretrained(path_350m, torch_dtype="auto", use_cache=True).cuda() | ||
| model = BloomForCausalLM.from_pretrained( | ||
| path_350m, torch_dtype="auto", use_cache=True, revision="gs555750" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we add a revision here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that we don't need it there as well you are right |
||
| ).cuda() | ||
| model = model.eval() | ||
| tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left") | ||
|
|
||
|
|
@@ -417,7 +417,9 @@ def test_batch_generation(self): | |
| @require_torch_gpu | ||
| def test_batch_generation_padd(self): | ||
| path_350m = "bigscience/bloom-350m" | ||
| model = BloomForCausalLM.from_pretrained(path_350m, torch_dtype="auto", use_cache=True).cuda() | ||
| model = BloomForCausalLM.from_pretrained( | ||
| path_350m, torch_dtype=torch.float32, use_cache=True, revision="gs555750" | ||
| ).cuda() | ||
| model = model.eval() | ||
| tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left") | ||
|
|
||
|
|
@@ -428,7 +430,10 @@ def test_batch_generation_padd(self): | |
| input_ids_without_pad = tokenizer.encode(input_sentence_without_pad, return_tensors="pt") | ||
|
|
||
| greedy_output = model.generate( | ||
| input_ids["input_ids"].cuda(), attention_mask=input_ids["attention_mask"], max_length=50, do_sample=False | ||
| input_ids["input_ids"].cuda(), | ||
| attention_mask=input_ids["attention_mask"].cuda(), | ||
| max_length=50, | ||
| do_sample=False, | ||
| ) | ||
| greedy_output_without_pad = model.generate(input_ids_without_pad.cuda(), max_length=50, do_sample=False) | ||
|
|
||
|
|
@@ -441,33 +446,6 @@ def test_batch_generation_padd(self): | |
| tokenizer.decode(greedy_output_without_pad[0, :-3], skip_special_tokens=True), | ||
| ) | ||
|
|
||
| @slow | ||
| def test_right_left_batched_input(self): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would we delete this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because we always use padding_side=left for batched generation for autoregressive models so for me there is no point keeping this test |
||
| path_1b3 = "bigscience/bloom-1b3" | ||
| model = BloomForCausalLM.from_pretrained(path_1b3, use_cache=True) | ||
| model = model.eval() | ||
|
|
||
| tokenizer = BloomTokenizerFast.from_pretrained(path_1b3) | ||
| tokenizer.padding_side = "right" | ||
|
|
||
| inputs = ["Hello there", "Joe Biden is the president of the"] | ||
| inputs_right = tokenizer(inputs, return_tensors="pt", padding=True) | ||
|
|
||
| tokenizer.padding_side = "left" | ||
| inputs_left = tokenizer(inputs, return_tensors="pt", padding=True) | ||
|
|
||
| # test token values are different | ||
| self.assertNotEqual(inputs_right["input_ids"].tolist(), inputs_left["input_ids"].tolist()) | ||
|
|
||
| # test reconstructions are the same | ||
| outputs_right = model.generate(**inputs_right, max_length=10, do_sample=False) | ||
| outputs_left = model.generate(**inputs_left, max_length=10, do_sample=False) | ||
|
|
||
| self.assertEqual( | ||
| tokenizer.decode(outputs_right[0], skip_special_tokens=True), | ||
| tokenizer.decode(outputs_left[0], skip_special_tokens=True), | ||
| ) | ||
|
|
||
|
|
||
| @require_torch | ||
| class BloomEmbeddingTest(unittest.TestCase): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not use the most up-to-date model here?