Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 15 additions & 37 deletions tests/models/bloom/test_modeling_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,26 +378,26 @@ def test_model_from_pretrained(self):
@require_torch_gpu
def test_simple_generation(self):
path_350m = "bigscience/bloom-350m"
model = BloomForCausalLM.from_pretrained(path_350m, torch_dtype="auto", use_cache=True).cuda()
model = BloomForCausalLM.from_pretrained(
path_350m, torch_dtype=torch.float32, use_cache=False, revision="gs555750"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not use the most up-to-date model here?

).cuda()
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_350m)

input_sentence = "I enjoy walking with my cute dog"
EXPECTED_OUTPUT = (
"I enjoy walking with my cute dog, and I love to watch the kids play. I am a very active person, and I am"
" a very good listener. I am a very good person, and I am a very good person. I am a"
)
EXPECTED_OUTPUT = "I enjoy walking with my cute dog, and I love to watch the kids"

input_ids = tokenizer.encode(input_sentence, return_tensors="pt")
greedy_output = model.generate(input_ids.cuda(), max_length=50)

greedy_output = model.generate(input_ids.cuda(), max_length=15)
self.assertEqual(tokenizer.decode(greedy_output[0], skip_special_tokens=True), EXPECTED_OUTPUT)

@slow
@require_torch_gpu
def test_batch_generation(self):
path_350m = "bigscience/bloom-350m"
model = BloomForCausalLM.from_pretrained(path_350m, torch_dtype="auto", use_cache=True).cuda()
model = BloomForCausalLM.from_pretrained(
path_350m, torch_dtype="auto", use_cache=True, revision="gs555750"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we add a revision here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that we don't need it there as well you are right

).cuda()
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left")

Expand All @@ -417,7 +417,9 @@ def test_batch_generation(self):
@require_torch_gpu
def test_batch_generation_padd(self):
path_350m = "bigscience/bloom-350m"
model = BloomForCausalLM.from_pretrained(path_350m, torch_dtype="auto", use_cache=True).cuda()
model = BloomForCausalLM.from_pretrained(
path_350m, torch_dtype=torch.float32, use_cache=True, revision="gs555750"
).cuda()
model = model.eval()
tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left")

Expand All @@ -428,7 +430,10 @@ def test_batch_generation_padd(self):
input_ids_without_pad = tokenizer.encode(input_sentence_without_pad, return_tensors="pt")

greedy_output = model.generate(
input_ids["input_ids"].cuda(), attention_mask=input_ids["attention_mask"], max_length=50, do_sample=False
input_ids["input_ids"].cuda(),
attention_mask=input_ids["attention_mask"].cuda(),
max_length=50,
do_sample=False,
)
greedy_output_without_pad = model.generate(input_ids_without_pad.cuda(), max_length=50, do_sample=False)

Expand All @@ -441,33 +446,6 @@ def test_batch_generation_padd(self):
tokenizer.decode(greedy_output_without_pad[0, :-3], skip_special_tokens=True),
)

@slow
def test_right_left_batched_input(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why would we delete this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we always use padding_side=left for batched generation for autoregressive models so for me there is no point keeping this test

path_1b3 = "bigscience/bloom-1b3"
model = BloomForCausalLM.from_pretrained(path_1b3, use_cache=True)
model = model.eval()

tokenizer = BloomTokenizerFast.from_pretrained(path_1b3)
tokenizer.padding_side = "right"

inputs = ["Hello there", "Joe Biden is the president of the"]
inputs_right = tokenizer(inputs, return_tensors="pt", padding=True)

tokenizer.padding_side = "left"
inputs_left = tokenizer(inputs, return_tensors="pt", padding=True)

# test token values are different
self.assertNotEqual(inputs_right["input_ids"].tolist(), inputs_left["input_ids"].tolist())

# test reconstructions are the same
outputs_right = model.generate(**inputs_right, max_length=10, do_sample=False)
outputs_left = model.generate(**inputs_left, max_length=10, do_sample=False)

self.assertEqual(
tokenizer.decode(outputs_right[0], skip_special_tokens=True),
tokenizer.decode(outputs_left[0], skip_special_tokens=True),
)


@require_torch
class BloomEmbeddingTest(unittest.TestCase):
Expand Down