Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions tests/test_generation_logits_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

if is_torch_available():
import torch
import torch.nn.functional as F
from torch import nn

from transformers.generation_logits_process import (
EncoderNoRepeatNGramLogitsProcessor,
Expand Down Expand Up @@ -80,13 +80,13 @@ def test_temperature_dist_warper(self):
scores[1, 10] = (1 / length) - 0.4 # valley, 1st batch

# compute softmax
probs = F.softmax(scores, dim=-1)
probs = nn.functional.softmax(scores, dim=-1)

temp_dist_warper_sharper = TemperatureLogitsWarper(temperature=0.5)
temp_dist_warper_smoother = TemperatureLogitsWarper(temperature=1.3)

warped_prob_sharp = F.softmax(temp_dist_warper_sharper(input_ids, scores.clone()), dim=-1)
warped_prob_smooth = F.softmax(temp_dist_warper_smoother(input_ids, scores.clone()), dim=-1)
warped_prob_sharp = nn.functional.softmax(temp_dist_warper_sharper(input_ids, scores.clone()), dim=-1)
warped_prob_smooth = nn.functional.softmax(temp_dist_warper_smoother(input_ids, scores.clone()), dim=-1)

# uniform distribution stays uniform
self.assertTrue(torch.allclose(probs[0, :], warped_prob_sharp[0, :], atol=1e-3))
Expand Down
5 changes: 3 additions & 2 deletions tests/test_modeling_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers import CLIPConfig, CLIPModel, CLIPTextConfig, CLIPTextModel, CLIPVisionConfig, CLIPVisionModel
from transformers.models.clip.modeling_clip import CLIP_PRETRAINED_MODEL_ARCHIVE_LIST
Expand Down Expand Up @@ -140,9 +141,9 @@ def test_model_common_attributes(self):

for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (torch.nn.Module))
self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, torch.nn.Linear))
self.assertTrue(x is None or isinstance(x, nn.Linear))

def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
Expand Down
9 changes: 5 additions & 4 deletions tests/test_modeling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
if is_torch_available():
import numpy as np
import torch
from torch import nn

from transformers import (
BERT_PRETRAINED_MODEL_ARCHIVE_LIST,
Expand Down Expand Up @@ -1150,10 +1151,10 @@ def test_model_common_attributes(self):

for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (torch.nn.Embedding, AdaptiveEmbedding))
model.set_input_embeddings(torch.nn.Embedding(10, 10))
self.assertIsInstance(model.get_input_embeddings(), (nn.Embedding, AdaptiveEmbedding))
model.set_input_embeddings(nn.Embedding(10, 10))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, torch.nn.Linear))
self.assertTrue(x is None or isinstance(x, nn.Linear))

def test_correct_missing_keys(self):
if not self.test_missing_keys:
Expand Down Expand Up @@ -1337,7 +1338,7 @@ def test_multi_gpu_data_parallel_forward(self):
model.eval()

# Wrap model in nn.DataParallel
model = torch.nn.DataParallel(model)
model = nn.DataParallel(model)
with torch.no_grad():
_ = model(**self._prepare_for_class(inputs_dict, model_class))

Expand Down
5 changes: 3 additions & 2 deletions tests/test_modeling_deit.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers import (
MODEL_MAPPING,
Expand Down Expand Up @@ -176,9 +177,9 @@ def test_model_common_attributes(self):

for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (torch.nn.Module))
self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, torch.nn.Linear))
self.assertTrue(x is None or isinstance(x, nn.Linear))

def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
Expand Down
7 changes: 4 additions & 3 deletions tests/test_modeling_fsmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers import FSMTConfig, FSMTForConditionalGeneration, FSMTModel, FSMTTokenizer
from transformers.models.fsmt.modeling_fsmt import (
Expand Down Expand Up @@ -160,10 +161,10 @@ def test_model_common_attributes(self):

for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (torch.nn.Embedding))
model.set_input_embeddings(torch.nn.Embedding(10, 10))
self.assertIsInstance(model.get_input_embeddings(), (nn.Embedding))
model.set_input_embeddings(nn.Embedding(10, 10))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, torch.nn.modules.sparse.Embedding))
self.assertTrue(x is None or isinstance(x, nn.modules.sparse.Embedding))

def test_initialization_more(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
Expand Down
34 changes: 17 additions & 17 deletions tests/test_modeling_ibert.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

if is_torch_available():
import torch
import torch.nn as nn
from torch import nn

from transformers import (
IBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
Expand Down Expand Up @@ -304,9 +304,9 @@ def test_model_common_attributes(self):
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), QuantEmbedding)
model.set_input_embeddings(torch.nn.Embedding(10, 10))
model.set_input_embeddings(nn.Embedding(10, 10))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, torch.nn.Linear))
self.assertTrue(x is None or isinstance(x, nn.Linear))

# Override
def test_feed_forward_chunking(self):
Expand Down Expand Up @@ -350,7 +350,7 @@ def test_quant_embedding(self):
weight_bit = 8
embedding = QuantEmbedding(2, 4, quant_mode=True, weight_bit=weight_bit)
embedding_weight = torch.tensor([[-1.0, -2.0, -3.0, -4.0], [5.0, 6.0, 7.0, 8.0]])
embedding.weight = torch.nn.Parameter(embedding_weight)
embedding.weight = nn.Parameter(embedding_weight)

expected_scaling_factor = embedding_weight.abs().max() / (2 ** (weight_bit - 1) - 1)
x, x_scaling_factor = embedding(torch.tensor(0))
Expand Down Expand Up @@ -447,8 +447,8 @@ def _test(per_channel):
linear_q = QuantLinear(2, 4, quant_mode=True, per_channel=per_channel, weight_bit=weight_bit)
linear_dq = QuantLinear(2, 4, quant_mode=False, per_channel=per_channel, weight_bit=weight_bit)
linear_weight = torch.tensor([[-1.0, 2.0, 3.0, -4.0], [5.0, -6.0, -7.0, 8.0]]).T
linear_q.weight = torch.nn.Parameter(linear_weight)
linear_dq.weight = torch.nn.Parameter(linear_weight)
linear_q.weight = nn.Parameter(linear_weight)
linear_dq.weight = nn.Parameter(linear_weight)

q, q_scaling_factor = linear_q(x, x_scaling_factor)
q_int = q / q_scaling_factor
Expand Down Expand Up @@ -477,7 +477,7 @@ def _test(per_channel):

def test_int_gelu(self):
gelu_q = IntGELU(quant_mode=True)
gelu_dq = torch.nn.GELU()
gelu_dq = nn.GELU()

x_int = torch.range(-10000, 10000, 1)
x_scaling_factor = torch.tensor(0.001)
Expand Down Expand Up @@ -523,7 +523,7 @@ def test_force_dequant_gelu(self):
def test_int_softmax(self):
output_bit = 8
softmax_q = IntSoftmax(output_bit, quant_mode=True)
softmax_dq = torch.nn.Softmax()
softmax_dq = nn.Softmax()

# x_int = torch.range(-10000, 10000, 1)
def _test(array):
Expand Down Expand Up @@ -590,12 +590,12 @@ def test_int_layernorm(self):
x = x_int * x_scaling_factor

ln_q = IntLayerNorm(x.shape[1:], 1e-5, quant_mode=True, output_bit=output_bit)
ln_dq = torch.nn.LayerNorm(x.shape[1:], 1e-5)
ln_dq = nn.LayerNorm(x.shape[1:], 1e-5)

ln_q.weight = torch.nn.Parameter(torch.ones(x.shape[1:]))
ln_q.bias = torch.nn.Parameter(torch.ones(x.shape[1:]))
ln_dq.weight = torch.nn.Parameter(torch.ones(x.shape[1:]))
ln_dq.bias = torch.nn.Parameter(torch.ones(x.shape[1:]))
ln_q.weight = nn.Parameter(torch.ones(x.shape[1:]))
ln_q.bias = nn.Parameter(torch.ones(x.shape[1:]))
ln_dq.weight = nn.Parameter(torch.ones(x.shape[1:]))
ln_dq.bias = nn.Parameter(torch.ones(x.shape[1:]))

q, q_scaling_factor = ln_q(x, x_scaling_factor)
q_int = q / q_scaling_factor
Expand Down Expand Up @@ -627,13 +627,13 @@ def test_force_dequant_layernorm(self):
],
}

ln_dq.weight = torch.nn.Parameter(torch.ones(x.shape[1:]))
ln_dq.bias = torch.nn.Parameter(torch.ones(x.shape[1:]))
ln_dq.weight = nn.Parameter(torch.ones(x.shape[1:]))
ln_dq.bias = nn.Parameter(torch.ones(x.shape[1:]))
dq, dq_scaling_factor = ln_dq(x, x_scaling_factor)
for label, ln_fdqs in ln_fdqs_dict.items():
for ln_fdq in ln_fdqs:
ln_fdq.weight = torch.nn.Parameter(torch.ones(x.shape[1:]))
ln_fdq.bias = torch.nn.Parameter(torch.ones(x.shape[1:]))
ln_fdq.weight = nn.Parameter(torch.ones(x.shape[1:]))
ln_fdq.bias = nn.Parameter(torch.ones(x.shape[1:]))
q, q_scaling_factor = ln_fdq(x, x_scaling_factor)
if label:
self.assertTrue(torch.allclose(q, dq, atol=1e-4))
Expand Down
3 changes: 2 additions & 1 deletion tests/test_modeling_reformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers import (
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
Expand Down Expand Up @@ -241,7 +242,7 @@ def create_and_check_reformer_model_with_attn_mask(
# set all position encodings to zero so that postions don't matter
with torch.no_grad():
embedding = model.embeddings.position_embeddings.embedding
embedding.weight = torch.nn.Parameter(torch.zeros(embedding.weight.shape).to(torch_device))
embedding.weight = nn.Parameter(torch.zeros(embedding.weight.shape).to(torch_device))
embedding.weight.requires_grad = False

half_seq_len = self.seq_length // 2
Expand Down
5 changes: 3 additions & 2 deletions tests/test_modeling_transfo_xl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers import TransfoXLConfig, TransfoXLForSequenceClassification, TransfoXLLMHeadModel, TransfoXLModel
from transformers.models.transfo_xl.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
Expand Down Expand Up @@ -362,11 +363,11 @@ def _mock_init_weights(self, module):
if hasattr(module, "emb_projs"):
for i in range(len(module.emb_projs)):
if module.emb_projs[i] is not None:
torch.nn.init.constant_(module.emb_projs[i], 0.0003)
nn.init.constant_(module.emb_projs[i], 0.0003)
if hasattr(module, "out_projs"):
for i in range(len(module.out_projs)):
if module.out_projs[i] is not None:
torch.nn.init.constant_(module.out_projs[i], 0.0003)
nn.init.constant_(module.out_projs[i], 0.0003)

for param in ["r_emb", "r_w_bias", "r_r_bias", "r_bias"]:
if hasattr(module, param) and getattr(module, param) is not None:
Expand Down
5 changes: 3 additions & 2 deletions tests/test_modeling_vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers import ViTConfig, ViTForImageClassification, ViTModel
from transformers.models.vit.modeling_vit import VIT_PRETRAINED_MODEL_ARCHIVE_LIST, to_2tuple
Expand Down Expand Up @@ -169,9 +170,9 @@ def test_model_common_attributes(self):

for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (torch.nn.Module))
self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, torch.nn.Linear))
self.assertTrue(x is None or isinstance(x, nn.Linear))

def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
Expand Down
7 changes: 4 additions & 3 deletions tests/test_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers import (
Adafactor,
Expand Down Expand Up @@ -70,7 +71,7 @@ def assertListAlmostEqual(self, list1, list2, tol):
def test_adam_w(self):
w = torch.tensor([0.1, -0.2, -0.1], requires_grad=True)
target = torch.tensor([0.4, 0.2, -0.5])
criterion = torch.nn.MSELoss()
criterion = nn.MSELoss()
# No warmup, constant schedule, no gradient clipping
optimizer = AdamW(params=[w], lr=2e-1, weight_decay=0.0)
for _ in range(100):
Expand All @@ -84,7 +85,7 @@ def test_adam_w(self):
def test_adafactor(self):
w = torch.tensor([0.1, -0.2, -0.1], requires_grad=True)
target = torch.tensor([0.4, 0.2, -0.5])
criterion = torch.nn.MSELoss()
criterion = nn.MSELoss()
# No warmup, constant schedule, no gradient clipping
optimizer = Adafactor(
params=[w],
Expand All @@ -109,7 +110,7 @@ def test_adafactor(self):

@require_torch
class ScheduleInitTest(unittest.TestCase):
m = torch.nn.Linear(50, 50) if is_torch_available() else None
m = nn.Linear(50, 50) if is_torch_available() else None
optimizer = AdamW(m.parameters(), lr=10.0) if is_torch_available() else None
num_steps = 10

Expand Down
5 changes: 3 additions & 2 deletions tests/test_pipelines_conversational.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers.models.gpt2 import GPT2Config, GPT2LMHeadModel

Expand Down Expand Up @@ -59,8 +60,8 @@ def get_pipeline(self):
bias[76] = 1
weight = torch.zeros((V, D), requires_grad=True)

model.lm_head.bias = torch.nn.Parameter(bias)
model.lm_head.weight = torch.nn.Parameter(weight)
model.lm_head.bias = nn.Parameter(bias)
model.lm_head.weight = nn.Parameter(weight)

# # Created with:
# import tempfile
Expand Down
3 changes: 2 additions & 1 deletion tests/test_pipelines_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

if is_torch_available():
import torch
from torch import nn

from transformers.models.bart import BartConfig, BartForConditionalGeneration

Expand Down Expand Up @@ -55,7 +56,7 @@ def test_input_too_long(self):
bias = torch.zeros(V)
bias[76] = 10

model.lm_head.bias = torch.nn.Parameter(bias)
model.lm_head.bias = nn.Parameter(bias)

# # Generated with:
# import tempfile
Expand Down
Loading