Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions tests/models/opt/test_modeling_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

from transformers import OPTConfig, is_torch_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
from transformers.utils import cached_property

from ...generation.test_generation_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester
Expand Down Expand Up @@ -270,10 +269,6 @@ def _long_tensor(tok_lst):
@require_sentencepiece
@require_tokenizers
class OPTModelIntegrationTests(unittest.TestCase):
@cached_property
def default_tokenizer(self):
return GPT2Tokenizer.from_pretrained("patrickvonplaten/opt_gpt2_tokenizer")

@slow
def test_inference_no_head(self):
model = OPTModel.from_pretrained("facebook/opt-350m").to(torch_device)
Expand All @@ -284,7 +279,7 @@ def test_inference_no_head(self):
expected_shape = torch.Size((1, 11, 512))
self.assertEqual(output.shape, expected_shape)
expected_slice = torch.tensor(
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]], device=torch_device
[[-0.2873, -1.9218, -0.3033], [-1.2710, -0.1338, -0.1902], [0.4095, 0.1214, -1.3121]], device=torch_device
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why the outputs differ now, but the model behaves correctly

)
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))

Expand All @@ -307,16 +302,16 @@ def test_logits(self):
model = OPTForCausalLM.from_pretrained(self.path_model)
model = model.eval()
tokenizer = GPT2Tokenizer.from_pretrained(self.path_model)
tokenizer.add_special_tokens({"pad_token": "<pad>"})

prompts = [
"Today is a beautiful day and I want to",
"In the city of",
"Paris is the capital of France and",
"Computers and mobile phones have taken",
]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# following metase, the BOS token is not added

input_ids = tokenizer(prompts, return_tensors="pt", padding=True).input_ids
logits = model(input_ids)[0].mean(dim=-1)
# verify that prompt without BOS token is identical to Metaseq -> add_special_tokens=False
inputs = tokenizer(prompts, return_tensors="pt", padding=True, add_special_tokens=False)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be identical to metaseq let's not append the BOS token

Copy link
Collaborator

@ydshieh ydshieh May 16, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As usual, I would like very much this comment is added to the code :-)

i.e. to be identical to metaseq let's not append the BOS token

logits = model(inputs.input_ids, attention_mask=inputs.attention_mask)[0].mean(dim=-1)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that we don't automatically create an attention_mask but always require the user to pass it @younesbelkada @ArthurZucker

# logits_meta = torch.load(self.path_logits_meta)
logits_meta = torch.Tensor(
[
Expand All @@ -326,7 +321,6 @@ def test_logits(self):
[6.4783, -1.9913, -10.7926, -2.3336, 1.5092, -0.9974, -6.8213, 1.3477, 1.3477],
]
)

assert torch.allclose(logits, logits_meta, atol=1e-4)


Expand Down