Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 5 additions & 31 deletions tests/models/bloom/test_modeling_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,8 @@ def test_model_from_pretrained(self):
def test_simple_generation(self):
# This test is a bit flaky. For some GPU architectures, pytorch sets by default allow_fp16_reduced_precision_reduction = True and some operations
# do not give the same results under this configuration, especially torch.baddmm and torch.bmm. https://pytorch.org/docs/stable/notes/numerical_accuracy.html#fp16-on-mi200
# We set allow_fp16_reduced_precision_reduction = True. Please see: https://pytorch.org/docs/stable/notes/cuda.html#reduced-precision-reduction-in-fp16-gemms
# As we leave the default value (True) for allow_fp16_reduced_precision_reduction , the tests failed when running in half-precision with smaller models (350m)
# Please see: https://pytorch.org/docs/stable/notes/cuda.html#reduced-precision-reduction-in-fp16-gemms
# This discrepancy is observed only when using small models and seems to be stable for larger models.
# Our conclusion is that these operations are flaky for small inputs but seems to be stable for larger inputs (for the functions `baddmm` and `bmm`), and therefore for larger models.

Expand All @@ -396,7 +397,7 @@ def test_simple_generation(self):
# >=760m + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS

path_350m = "bigscience/bloom-350m"
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True).cuda()
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does the revision argument do? Are we sure we want to add that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, this argument loads the model from the initial model we were using when designing those tests: https://huggingface.co/bigscience/bloom-350m/tree/gs555750
@Muennighoff recently wanted to push the newest weights of 350m so just adding the revision flag should prevent us breaking those tests

model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_350m)

Expand All @@ -416,7 +417,7 @@ def test_simple_generation(self):
@require_torch_gpu
def test_batch_generation(self):
path_350m = "bigscience/bloom-350m"
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True).cuda()
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left")

Expand All @@ -437,7 +438,7 @@ def test_batch_generation(self):
def test_batch_generation_padd(self):

path_350m = "bigscience/bloom-350m"
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True).cuda()
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left")

Expand All @@ -461,33 +462,6 @@ def test_batch_generation_padd(self):
tokenizer.decode(greedy_output_without_pad[0, :-3], skip_special_tokens=True),
)

@slow
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you just explain why this test is removed please?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Following a discussion that we had on Slack, it appears that one should always use padding_side=left
Here is a link that quickly summarizes the issue: #17963 (comment)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for clarifying!

def test_right_left_batched_input(self):
path_1b3 = "bigscience/bloom-1b3"
model = BloomForCausalLM.from_pretrained(path_1b3, use_cache=True)
model = model.eval()

tokenizer = BloomTokenizerFast.from_pretrained(path_1b3)
tokenizer.padding_side = "right"

inputs = ["Hello there", "Joe Biden is the president of the"]
inputs_right = tokenizer(inputs, return_tensors="pt", padding=True)

tokenizer.padding_side = "left"
inputs_left = tokenizer(inputs, return_tensors="pt", padding=True)

# test token values are different
self.assertNotEqual(inputs_right["input_ids"].tolist(), inputs_left["input_ids"].tolist())

# test reconstructions are the same
outputs_right = model.generate(**inputs_right, max_length=10, do_sample=False)
outputs_left = model.generate(**inputs_left, max_length=10, do_sample=False)

self.assertEqual(
tokenizer.decode(outputs_right[0], skip_special_tokens=True),
tokenizer.decode(outputs_left[0], skip_special_tokens=True),
)


@require_torch
class BloomEmbeddingTest(unittest.TestCase):
Expand Down