Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/research_projects/wav2vec2/run_asr.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
if is_apex_available():
from apex import amp

if version.parse(torch.__version__) >= version.parse("1.6"):
if version.parse(version.parse(torch.__version__).base_version) >= version.parse("1.6"):
_is_native_amp_available = True
from torch.cuda.amp import autocast

Expand Down
2 changes: 1 addition & 1 deletion examples/research_projects/wav2vec2/run_common_voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from apex import amp


if version.parse(torch.__version__) >= version.parse("1.6"):
if version.parse(version.parse(torch.__version__).base_version) >= version.parse("1.6"):
_is_native_amp_available = True
from torch.cuda.amp import autocast

Expand Down
2 changes: 1 addition & 1 deletion examples/research_projects/wav2vec2/run_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
if is_apex_available():
from apex import amp

if version.parse(torch.__version__) >= version.parse("1.6"):
if version.parse(version.parse(torch.__version__).base_version) >= version.parse("1.6"):
_is_native_amp_available = True
from torch.cuda.amp import autocast

Expand Down
6 changes: 3 additions & 3 deletions src/transformers/activations.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class GELUActivation(nn.Module):

def __init__(self, use_gelu_python: bool = False):
super().__init__()
if version.parse(torch.__version__) < version.parse("1.4") or use_gelu_python:
if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.4") or use_gelu_python:
self.act = self._gelu_python
else:
self.act = nn.functional.gelu
Expand Down Expand Up @@ -110,7 +110,7 @@ class SiLUActivation(nn.Module):

def __init__(self):
super().__init__()
if version.parse(torch.__version__) < version.parse("1.7"):
if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.7"):
self.act = self._silu_python
else:
self.act = nn.functional.silu
Expand All @@ -130,7 +130,7 @@ class MishActivation(nn.Module):

def __init__(self):
super().__init__()
if version.parse(torch.__version__) < version.parse("1.9"):
if version.parse(version.parse(torch.__version__).base_version) < version.parse("1.9"):
self.act = self._mish_python
else:
self.act = nn.functional.mish
Expand Down
4 changes: 3 additions & 1 deletion src/transformers/convert_graph_to_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ def convert_pytorch(nlp: Pipeline, opset: int, output: Path, use_external_format
import torch
from torch.onnx import export

from .pytorch_utils import is_torch_less_than_1_11

print(f"Using framework PyTorch: {torch.__version__}")

with torch.no_grad():
Expand All @@ -281,7 +283,7 @@ def convert_pytorch(nlp: Pipeline, opset: int, output: Path, use_external_format

# PyTorch deprecated the `enable_onnx_checker` and `use_external_data_format` arguments in v1.11,
# so we check the torch version for backwards compatibility
if parse(torch.__version__) <= parse("1.10.99"):
if is_torch_less_than_1_11:
export(
nlp.model,
model_args,
Expand Down
10 changes: 7 additions & 3 deletions src/transformers/models/albert/modeling_albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from typing import Dict, List, Optional, Tuple, Union

import torch
from packaging import version
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

Expand All @@ -35,7 +34,12 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
is_torch_greater_than_1_6,
prune_linear_layer,
)
from ...utils import (
ModelOutput,
add_code_sample_docstrings,
Expand Down Expand Up @@ -212,7 +216,7 @@ def __init__(self, config: AlbertConfig):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"token_type_ids",
torch.zeros(self.position_ids.size(), dtype=torch.long),
Expand Down
10 changes: 7 additions & 3 deletions src/transformers/models/bert/modeling_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

import torch
import torch.utils.checkpoint
from packaging import version
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

Expand All @@ -41,7 +40,12 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
is_torch_greater_than_1_6,
prune_linear_layer,
)
from ...utils import (
ModelOutput,
add_code_sample_docstrings,
Expand Down Expand Up @@ -195,7 +199,7 @@ def __init__(self, config):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"token_type_ids",
torch.zeros(self.position_ids.size(), dtype=torch.long),
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/big_bird/modeling_big_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import numpy as np
import torch
import torch.utils.checkpoint
from packaging import version
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

Expand All @@ -38,7 +37,7 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward
from ...pytorch_utils import apply_chunking_to_forward, is_torch_greater_than_1_6
from ...utils import (
ModelOutput,
add_code_sample_docstrings,
Expand Down Expand Up @@ -260,7 +259,7 @@ def __init__(self, config):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"token_type_ids",
torch.zeros(self.position_ids.size(), dtype=torch.long),
Expand Down
10 changes: 7 additions & 3 deletions src/transformers/models/convbert/modeling_convbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

import torch
import torch.utils.checkpoint
from packaging import version
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

Expand All @@ -36,7 +35,12 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel, SequenceSummary
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
is_torch_greater_than_1_6,
prune_linear_layer,
)
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from .configuration_convbert import ConvBertConfig

Expand Down Expand Up @@ -194,7 +198,7 @@ def __init__(self, config):
self.dropout = nn.Dropout(config.hidden_dropout_prob)
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"token_type_ids",
torch.zeros(self.position_ids.size(), dtype=torch.long),
Expand Down
10 changes: 7 additions & 3 deletions src/transformers/models/data2vec/modeling_data2vec_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import torch
import torch.utils.checkpoint
from packaging import version
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

Expand All @@ -35,7 +34,12 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
is_torch_greater_than_1_6,
prune_linear_layer,
)
from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -83,7 +87,7 @@ def __init__(self, config):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"token_type_ids",
torch.zeros(self.position_ids.size(), dtype=torch.long),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,16 @@

import torch
import torch.utils.checkpoint
from packaging import version
from torch import nn

from ...activations import ACT2FN
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import Conv1D, find_pruneable_heads_and_indices, prune_conv1d_layer
from ...pytorch_utils import (
Conv1D,
find_pruneable_heads_and_indices,
is_torch_greater_or_equal_than_1_6,
prune_conv1d_layer,
)
from ...utils import (
ModelOutput,
add_start_docstrings,
Expand All @@ -36,7 +40,7 @@
)


if version.parse(torch.__version__) >= version.parse("1.6"):
if is_torch_greater_or_equal_than_1_6:
is_amp_available = True
from torch.cuda.amp import autocast
else:
Expand Down
10 changes: 7 additions & 3 deletions src/transformers/models/distilbert/modeling_distilbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

import numpy as np
import torch
from packaging import version
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

Expand All @@ -40,7 +39,12 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
is_torch_greater_than_1_6,
prune_linear_layer,
)
from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -102,7 +106,7 @@ def __init__(self, config: PretrainedConfig):

self.LayerNorm = nn.LayerNorm(config.dim, eps=1e-12)
self.dropout = nn.Dropout(config.dropout)
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)), persistent=False
)
Expand Down
10 changes: 7 additions & 3 deletions src/transformers/models/electra/modeling_electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import torch
import torch.utils.checkpoint
from packaging import version
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

Expand All @@ -37,7 +36,12 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel, SequenceSummary
from ...pytorch_utils import apply_chunking_to_forward, find_pruneable_heads_and_indices, prune_linear_layer
from ...pytorch_utils import (
apply_chunking_to_forward,
find_pruneable_heads_and_indices,
is_torch_greater_than_1_6,
prune_linear_layer,
)
from ...utils import (
ModelOutput,
add_code_sample_docstrings,
Expand Down Expand Up @@ -165,7 +169,7 @@ def __init__(self, config):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"token_type_ids",
torch.zeros(self.position_ids.size(), dtype=torch.long),
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/flaubert/modeling_flaubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@
from typing import Dict, Optional, Tuple, Union

import torch
from packaging import version
from torch import nn

from ...modeling_outputs import BaseModelOutput
from ...pytorch_utils import is_torch_greater_than_1_6
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from ..xlm.modeling_xlm import (
XLMForMultipleChoice,
Expand Down Expand Up @@ -139,7 +139,7 @@ def __init__(self, config): # , dico, is_encoder, with_output):
super().__init__(config)
self.layerdrop = getattr(config, "layerdrop", 0.0)
self.pre_norm = getattr(config, "pre_norm", False)
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)), persistent=False
)
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/flava/modeling_flava.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@

import torch
import torch.utils.checkpoint
from packaging import version
from torch import nn

from transformers.utils.doc import add_code_sample_docstrings

from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithPooling
from ...modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
from ...pytorch_utils import is_torch_greater_than_1_6
from ...utils import (
ModelOutput,
add_start_docstrings,
Expand Down Expand Up @@ -392,7 +392,7 @@ def __init__(self, config):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.position_embedding_type = getattr(config, "position_embedding_type", "absolute")
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"token_type_ids",
torch.zeros(self.position_ids.size(), dtype=torch.long),
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/fnet/modeling_fnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import torch
import torch.utils.checkpoint
from packaging import version
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

Expand All @@ -44,7 +43,7 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward
from ...pytorch_utils import apply_chunking_to_forward, is_torch_greater_than_1_6
from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -118,7 +117,7 @@ def __init__(self, config):
# position_ids (1, len position emb) is contiguous in memory and exported when serialized
self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))

if version.parse(torch.__version__) > version.parse("1.6.0"):
if is_torch_greater_than_1_6:
self.register_buffer(
"token_type_ids",
torch.zeros(self.position_ids.size(), dtype=torch.long),
Expand Down
Loading