Skip to content
2 changes: 0 additions & 2 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,6 @@
"LUKE_PRETRAINED_MODEL_ARCHIVE_LIST",
"LukeLayer",
"LukeModel",
"LukeEntityAwareAttentionModel",
"LukePreTrainedModel",
"LukeForEntityClassification",
"LukeForEntityPairClassification",
Expand Down Expand Up @@ -1734,7 +1733,6 @@
)
from .models.luke import (
LUKE_PRETRAINED_MODEL_ARCHIVE_LIST,
LukeEntityAwareAttentionModel,
LukeForEntityTyping,
LukeLayer,
LukeModel,
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@
)

# Add modeling imports here
from ..luke.modeling_luke import LukeEntityAwareAttentionModel, LukeModel
from ..luke.modeling_luke import LukeModel
from ..lxmert.modeling_lxmert import LxmertForPreTraining, LxmertForQuestionAnswering, LxmertModel
from ..marian.modeling_marian import MarianForCausalLM, MarianModel, MarianMTModel
from ..mbart.modeling_mbart import (
Expand Down
3 changes: 3 additions & 0 deletions src/transformers/models/auto/tokenization_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from ..layoutlm.tokenization_layoutlm import LayoutLMTokenizer
from ..led.tokenization_led import LEDTokenizer
from ..longformer.tokenization_longformer import LongformerTokenizer
from ..luke.tokenization_luke import LukeTokenizer
from ..lxmert.tokenization_lxmert import LxmertTokenizer
from ..mobilebert.tokenization_mobilebert import MobileBertTokenizer
from ..mpnet.tokenization_mpnet import MPNetTokenizer
Expand Down Expand Up @@ -78,6 +79,7 @@
LayoutLMConfig,
LEDConfig,
LongformerConfig,
LukeConfig,
LxmertConfig,
MarianConfig,
MBartConfig,
Expand Down Expand Up @@ -201,6 +203,7 @@

TOKENIZER_MAPPING = OrderedDict(
[
(LukeConfig, (LukeTokenizer, None)),
(RetriBertConfig, (RetriBertTokenizer, RetriBertTokenizerFast)),
(T5Config, (T5Tokenizer, T5TokenizerFast)),
(MT5Config, (MT5Tokenizer, MT5TokenizerFast)),
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/luke/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
_import_structure["modeling_luke"] = [
"LUKE_PRETRAINED_MODEL_ARCHIVE_LIST",
"LukeModel",
"LukeEntityAwareAttentionModel",
"LukeForEntityClassification",
"LukeForEntityPairClassification",
"LukeForEntitySpanClassification",
Expand All @@ -41,7 +40,7 @@
from .configuration_luke import LUKE_PRETRAINED_CONFIG_ARCHIVE_MAP, LukeConfig

if is_torch_available():
from .modeling_luke import LUKE_PRETRAINED_MODEL_ARCHIVE_LIST, LukeEntityAwareAttentionModel, LukeModel
from .modeling_luke import LUKE_PRETRAINED_MODEL_ARCHIVE_LIST, LukeModel

else:
import importlib
Expand Down
12 changes: 10 additions & 2 deletions src/transformers/models/luke/configuration_luke.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,17 @@ class LukeConfig(RobertaConfig):
"""
model_type = "luke"

def __init__(self, entity_vocab_size: int = 500000, entity_emb_size: int = 256, **kwargs):
def __init__(
self,
vocab_size: int = 50267,
entity_vocab_size: int = 500000,
entity_emb_size: int = 256,
use_entity_aware_attention=False,
**kwargs
):
"""Constructs LukeConfig."""
super(LukeConfig, self).__init__(**kwargs)
super(LukeConfig, self).__init__(vocab_size=vocab_size, **kwargs)

self.entity_vocab_size = entity_vocab_size
self.entity_emb_size = entity_emb_size
self.use_entity_aware_attention = use_entity_aware_attention
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

import torch

from transformers import LukeConfig, LukeEntityAwareAttentionModel, LukeTokenizer, RobertaTokenizer
from transformers import LukeConfig, LukeModel, LukeTokenizer, RobertaTokenizer
from transformers.tokenization_utils_base import AddedToken


Expand All @@ -29,7 +29,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
# Load configuration defined in the metadata file
with open(metadata_path) as metadata_file:
metadata = json.load(metadata_file)
config = LukeConfig(**metadata["model_config"])
config = LukeConfig(use_entity_aware_attention=True, **metadata["model_config"])

# Load in the weights from the checkpoint_path
state_dict = torch.load(checkpoint_path, map_location="cpu")
Expand Down Expand Up @@ -70,7 +70,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
entity_emb = state_dict["entity_embeddings.entity_embeddings.weight"]
entity_emb[entity_vocab["[MASK2]"]] = entity_emb[entity_vocab["[MASK]"]]

model = LukeEntityAwareAttentionModel(config=config).eval()
model = LukeModel(config=config).eval()

missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
assert len(missing_keys) == 1 and missing_keys[0] == "embeddings.position_ids"
Expand All @@ -81,9 +81,7 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p

text = "Top seed Ana Ivanovic said on Thursday she could hardly believe her luck as a fortuitous netcord helped the new world number one avoid a humiliating second- round exit at Wimbledon ."
span = (39, 42)
encoding = tokenizer(text, entity_spans=[span], add_prefix_space=True)
for key, value in encoding.items():
encoding[key] = torch.as_tensor(encoding[key]).unsqueeze(0)
encoding = tokenizer(text, entity_spans=[span], add_prefix_space=True, return_tensors="pt")

outputs = model(**encoding)

Expand All @@ -97,8 +95,8 @@ def convert_luke_checkpoint(checkpoint_path, metadata_path, entity_vocab_path, p
expected_shape = torch.Size((1, 42, 768))
expected_slice = torch.tensor([[0.0037, 0.1368, -0.0091], [0.1099, 0.3329, -0.1095], [0.0765, 0.5335, 0.1179]])

assert outputs.last_hidden_state.shape == expected_shape
assert torch.allclose(outputs.last_hidden_state[0, :3, :3], expected_slice, atol=1e-4)
assert outputs.word_last_hidden_state.shape == expected_shape
assert torch.allclose(outputs.word_last_hidden_state[0, :3, :3], expected_slice, atol=1e-4)

# Verify entity hidden states
if model_size == "large":
Expand Down
Loading