Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
98 commits
Select commit Hold shift + click to select a range
9b442bd
First draft
NielsRogge Aug 2, 2021
8c3d079
Style and remove mlm
NielsRogge Sep 6, 2021
f95c3ff
Make forward pass work
NielsRogge Sep 6, 2021
4b65769
More improvements
NielsRogge Sep 6, 2021
03a1dd6
More improvements
NielsRogge Sep 7, 2021
52fd289
Fix bug
NielsRogge Sep 7, 2021
eac1a79
More improvements
NielsRogge Sep 7, 2021
d7992ca
More improvements
NielsRogge Sep 7, 2021
da5aca3
Add PerceiverTokenizer first draft
NielsRogge Sep 8, 2021
86a3dc7
Improve conversion script
NielsRogge Sep 8, 2021
774995d
More improvements
NielsRogge Sep 8, 2021
e7bb47c
Make conversion script work for the encoder
NielsRogge Sep 8, 2021
e1f4974
Make conversion script work with local pickle files
NielsRogge Sep 8, 2021
2da1d9c
Style & quality, fix-copies
NielsRogge Sep 8, 2021
62addfe
Add dummy input to conversion script
NielsRogge Sep 8, 2021
c7e1889
Add absolute position embeddings to TextPreProcessor
NielsRogge Sep 8, 2021
88e2e63
Make forward pass of encoder work
NielsRogge Sep 9, 2021
f2e783c
More improvements
NielsRogge Sep 10, 2021
53814f4
Move text preprocessor to separate script
NielsRogge Sep 10, 2021
b197342
More improvements
NielsRogge Sep 10, 2021
b72b69e
More improvements
NielsRogge Sep 10, 2021
19424fa
Add post processor
NielsRogge Sep 10, 2021
67a1b46
Make MLM model work
NielsRogge Sep 10, 2021
c245a1f
Style
NielsRogge Sep 10, 2021
26e0b3e
Add PerceiverForMaskedLM
NielsRogge Sep 10, 2021
2bb76a5
Add PerceiverImagePreprocessor
NielsRogge Sep 13, 2021
c80adfe
Make style
NielsRogge Sep 13, 2021
b264012
Make PerceiverForImageClassification work
NielsRogge Sep 13, 2021
a23a247
More improvements
NielsRogge Sep 14, 2021
c0e4c97
More improvements
NielsRogge Sep 14, 2021
fd5c778
Use tokenizer in conversion script
NielsRogge Sep 14, 2021
52bd744
Use PerceiverForMaskedLM in conversion script
NielsRogge Sep 14, 2021
f591c32
Define custom PerceiverModelOutput
NielsRogge Sep 14, 2021
bbdbfff
Improve PerceiverAttention to make it work for both MLM and image cla…
NielsRogge Sep 14, 2021
08876d7
More improvements
NielsRogge Sep 14, 2021
4a794b6
More improvements
NielsRogge Sep 15, 2021
52c4401
More improvements to the conversion script
NielsRogge Sep 15, 2021
6dca6bb
Make conversion script work for both MLM and image classification
NielsRogge Sep 15, 2021
e6699d6
Add PerceiverFeatureExtractor
NielsRogge Sep 15, 2021
78d4a57
More improvements
NielsRogge Sep 15, 2021
cd033d6
Style and quality
NielsRogge Sep 15, 2021
ef190f9
Add center cropping
NielsRogge Sep 15, 2021
7b07335
Fix bug
NielsRogge Sep 15, 2021
55aeffb
Small fix
NielsRogge Sep 15, 2021
e14b46e
Add print statement
NielsRogge Sep 15, 2021
d8289b6
Fix bug in image preprocessor
NielsRogge Sep 15, 2021
b7cfa89
Fix bug with conversion script
NielsRogge Sep 15, 2021
e4bce74
Make output position embeddings an nn.Parameter layer instead of nn.E…
NielsRogge Sep 15, 2021
95b1557
Comment out print statements
NielsRogge Sep 16, 2021
69cd3a8
Add position encoding classes
NielsRogge Sep 16, 2021
2a220b4
More improvements
NielsRogge Sep 16, 2021
8392cdd
Use position_encoding_kwargs
NielsRogge Sep 17, 2021
da34e96
Add PerceiverForImageClassificationFourier
NielsRogge Sep 17, 2021
6ed4938
Make style & quality
NielsRogge Sep 17, 2021
d7a29f6
Add PerceiverForImageClassificationConvProcessing
NielsRogge Sep 17, 2021
e7ea9f3
Style & quality
NielsRogge Sep 17, 2021
926a1e6
Add flow model
NielsRogge Sep 18, 2021
bd00190
Move processors to modeling file
NielsRogge Sep 20, 2021
9b056aa
Make position encodings modular
NielsRogge Sep 20, 2021
3137356
Make basic decoder use modular position encodings
NielsRogge Sep 20, 2021
bf9e354
Add PerceiverForOpticalFlow to conversion script
NielsRogge Sep 20, 2021
26267db
Add AudioPreprocessor
NielsRogge Sep 21, 2021
435cfff
Make it possible for the basic decoder to use Fourier position embedd…
NielsRogge Sep 21, 2021
bfe4ea7
Add PerceiverForMultimodalAutoencoding
NielsRogge Sep 21, 2021
3e757ca
Improve model for optical flow
NielsRogge Sep 22, 2021
7ceda57
Improve _build_network_inputs method
NielsRogge Sep 22, 2021
04df090
Add print statement
NielsRogge Sep 22, 2021
0fa4e63
Fix device issue
NielsRogge Sep 22, 2021
98eaa27
Fix device of Fourier embeddings
NielsRogge Sep 23, 2021
0c4547e
Add print statements for debugging
NielsRogge Sep 23, 2021
6b62a78
Add another print statement
NielsRogge Sep 23, 2021
312fce8
Add another print statement
NielsRogge Sep 23, 2021
38f5505
Add another print statement
NielsRogge Sep 23, 2021
6d87180
Add another print statement
NielsRogge Sep 23, 2021
286022a
Improve PerceiverAudioPreprocessor
NielsRogge Sep 24, 2021
321236c
Improve conversion script for multimodal modal
NielsRogge Sep 24, 2021
42cf915
More improvements
NielsRogge Sep 24, 2021
5524c9c
More improvements
NielsRogge Sep 25, 2021
4a46f07
Improve multimodal model
NielsRogge Sep 27, 2021
d940e49
Make forward pass multimodal model work
NielsRogge Sep 28, 2021
40120f2
More improvements
NielsRogge Sep 29, 2021
782fb47
Added output_size calculation for preprocessors (except multimodal)
esceptico Oct 4, 2021
30452c6
Improve tests
NielsRogge Oct 6, 2021
57c754f
Fix some more tests
NielsRogge Oct 6, 2021
166853e
Merge branch 'modeling_perceiver' into multimodal-shapes
esceptico Oct 6, 2021
367e213
Add output dataclasses
NielsRogge Oct 6, 2021
af19bf9
Renamed output_size to num_channels. Added num_channels support to Pe…
esceptico Oct 7, 2021
232a5c6
Make more tests pass
NielsRogge Oct 7, 2021
9dfda14
Add print statements for debuggin
NielsRogge Oct 7, 2021
70151d8
Add tests for image classification
NielsRogge Oct 7, 2021
90fdb9a
Add PerceiverClassifierOutput
NielsRogge Oct 7, 2021
0c09784
More improvements
NielsRogge Oct 7, 2021
a5330fb
Make more tests pass for the optical flow model
NielsRogge Oct 7, 2021
3eb2e43
Make style & quality
NielsRogge Oct 7, 2021
e921530
Merge branch 'modeling_perceiver' into multimodal-shapes
esceptico Oct 7, 2021
686e0be
Merge branch 'modeling_perceiver' into multimodal-shapes
esceptico Oct 7, 2021
74ee525
Merge remote-tracking branch 'origin/multimodal-shapes' into multimod…
esceptico Oct 7, 2021
07003be
Fixed `is_temporal` in PerceiverFlowPreprocessor
esceptico Oct 8, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ Flax), PyTorch, and/or TensorFlow.
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| Pegasus | ✅ | ✅ | ✅ | ✅ | ✅ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| Perceiver | ✅ | ❌ | ✅ | ❌ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| ProphetNet | ✅ | ❌ | ✅ | ❌ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| RAG | ✅ | ❌ | ✅ | ✅ | ❌ |
Expand Down
59 changes: 59 additions & 0 deletions docs/source/model_doc/perceiver.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
..
Copyright 2021 The HuggingFace Team. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.

Perceiver
-----------------------------------------------------------------------------------------------------------------------

Overview
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The Perceiver model was proposed in `<INSERT PAPER NAME HERE> <<INSERT PAPER LINK HERE>>`__ by <INSERT AUTHORS HERE>.
<INSERT SHORT SUMMARY HERE>

The abstract from the paper is the following:

*<INSERT PAPER ABSTRACT HERE>*

Tips:

<INSERT TIPS ABOUT MODEL HERE>

This model was contributed by `<INSERT YOUR HF USERNAME HERE> <https://huggingface.co/<INSERT YOUR HF USERNAME
HERE>>`__. The original code can be found `here <<INSERT LINK TO GITHUB REPO HERE>>`__.

PerceiverConfig
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: transformers.PerceiverConfig
:members:


PerceiverTokenizer
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: transformers.PerceiverTokenizer
:members: build_inputs_with_special_tokens, get_special_tokens_mask,
create_token_type_ids_from_sequences, save_vocabulary


PerceiverModel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: transformers.PerceiverModel
:members: forward


PerceiverForMaskedLM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: transformers.PerceiverForMaskedLM
:members: forward
46 changes: 46 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@
"models.mt5": ["MT5Config"],
"models.openai": ["OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP", "OpenAIGPTConfig", "OpenAIGPTTokenizer"],
"models.pegasus": ["PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP", "PegasusConfig", "PegasusTokenizer"],
"models.perceiver": ["PERCEIVER_PRETRAINED_CONFIG_ARCHIVE_MAP", "PerceiverConfig", "PerceiverTokenizer"],
"models.phobert": ["PhobertTokenizer"],
"models.prophetnet": ["PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP", "ProphetNetConfig", "ProphetNetTokenizer"],
"models.rag": ["RagConfig", "RagRetriever", "RagTokenizer"],
Expand Down Expand Up @@ -455,6 +456,7 @@
_import_structure["models.detr"].append("DetrFeatureExtractor")
_import_structure["models.layoutlmv2"].append("LayoutLMv2FeatureExtractor")
_import_structure["models.layoutlmv2"].append("LayoutLMv2Processor")
_import_structure["models.perceiver"].append("PerceiverFeatureExtractor")
_import_structure["models.vit"].append("ViTFeatureExtractor")
else:
from .utils import dummy_vision_objects
Expand Down Expand Up @@ -525,6 +527,7 @@
_import_structure["modeling_utils"] = ["Conv1D", "PreTrainedModel", "apply_chunking_to_forward", "prune_layer"]

# PyTorch models structure

_import_structure["models.albert"].extend(
[
"ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST",
Expand Down Expand Up @@ -1032,6 +1035,27 @@
_import_structure["models.pegasus"].extend(
["PegasusForCausalLM", "PegasusForConditionalGeneration", "PegasusModel", "PegasusPreTrainedModel"]
)
_import_structure["models.perceiver"].extend(
[
"PERCEIVER_PRETRAINED_MODEL_ARCHIVE_LIST",
"PerceiverAudioPreprocessor",
"PerceiverBasicDecoder",
"PerceiverClassificationDecoder",
"PerceiverForImageClassification",
"PerceiverForImageClassificationConvProcessing",
"PerceiverForImageClassificationFourier",
"PerceiverForMaskedLM",
"PerceiverForMultimodalAutoencoding",
"PerceiverForOpticalFlow",
"PerceiverImagePreprocessor",
"PerceiverLayer",
"PerceiverModel",
"PerceiverPreTrainedModel",
"PerceiverOneHotPreprocessor",
"PerceiverTextPostprocessor",
"PerceiverTextPreprocessor",
]
)
_import_structure["models.prophetnet"].extend(
[
"PROPHETNET_PRETRAINED_MODEL_ARCHIVE_LIST",
Expand Down Expand Up @@ -2061,6 +2085,7 @@
from .models.mt5 import MT5Config
from .models.openai import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OpenAIGPTConfig, OpenAIGPTTokenizer
from .models.pegasus import PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP, PegasusConfig, PegasusTokenizer
from .models.perceiver import PERCEIVER_PRETRAINED_CONFIG_ARCHIVE_MAP, PerceiverConfig, PerceiverTokenizer
from .models.phobert import PhobertTokenizer
from .models.prophetnet import PROPHETNET_PRETRAINED_CONFIG_ARCHIVE_MAP, ProphetNetConfig, ProphetNetTokenizer
from .models.rag import RagConfig, RagRetriever, RagTokenizer
Expand Down Expand Up @@ -2246,6 +2271,7 @@
from .models.deit import DeiTFeatureExtractor
from .models.detr import DetrFeatureExtractor
from .models.layoutlmv2 import LayoutLMv2FeatureExtractor, LayoutLMv2Processor
from .models.perceiver import PerceiverFeatureExtractor
from .models.vit import ViTFeatureExtractor
else:
from .utils.dummy_vision_objects import *
Expand All @@ -2263,6 +2289,7 @@
from .utils.dummy_timm_objects import *

if is_torch_available():

# Benchmarks
from .benchmark.benchmark import PyTorchBenchmark
from .benchmark.benchmark_args import PyTorchBenchmarkArguments
Expand Down Expand Up @@ -2731,6 +2758,25 @@
PegasusModel,
PegasusPreTrainedModel,
)
from .models.perceiver import (
PERCEIVER_PRETRAINED_MODEL_ARCHIVE_LIST,
PerceiverAudioPreprocessor,
PerceiverBasicDecoder,
PerceiverClassificationDecoder,
PerceiverForImageClassification,
PerceiverForImageClassificationConvProcessing,
PerceiverForImageClassificationFourier,
PerceiverForMaskedLM,
PerceiverForMultimodalAutoencoding,
PerceiverForOpticalFlow,
PerceiverImagePreprocessor,
PerceiverLayer,
PerceiverModel,
PerceiverOneHotPreprocessor,
PerceiverPreTrainedModel,
PerceiverTextPostprocessor,
PerceiverTextPreprocessor,
)
from .models.prophetnet import (
PROPHETNET_PRETRAINED_MODEL_ARCHIVE_LIST,
ProphetNetDecoder,
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
mt5,
openai,
pegasus,
perceiver,
phobert,
prophetnet,
rag,
Expand Down
3 changes: 3 additions & 0 deletions src/transformers/models/auto/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
[
# Add configs here
("fnet", "FNetConfig"),
("perceiver", "PerceiverConfig"),
("gptj", "GPTJConfig"),
("layoutlmv2", "LayoutLMv2Config"),
("beit", "BeitConfig"),
Expand Down Expand Up @@ -102,6 +103,7 @@
# Add archive maps here
("fnet", "FNET_PRETRAINED_CONFIG_ARCHIVE_MAP"),
("pegasus", "PEGASUS_PRETRAINED_CONFIG_ARCHIVE_MAP"),
("perceiver", "PERCEIVER_PRETRAINED_CONFIG_ARCHIVE_MAP"),
("gptj", "GPTJ_PRETRAINED_CONFIG_ARCHIVE_MAP"),
("layoutlmv2", "LAYOUTLMV2_PRETRAINED_CONFIG_ARCHIVE_MAP"),
("beit", "BEIT_PRETRAINED_CONFIG_ARCHIVE_MAP"),
Expand Down Expand Up @@ -167,6 +169,7 @@
[
# Add full (and cased) model names here
("fnet", "FNet"),
("perceiver", "Perceiver"),
("gptj", "GPT-J"),
("beit", "BeiT"),
("rembert", "RemBERT"),
Expand Down
3 changes: 3 additions & 0 deletions src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
[
# Base model mapping
("fnet", "FNetModel"),
("perceiver", "PerceiverModel"),
("gptj", "GPTJModel"),
("layoutlmv2", "LayoutLMv2Model"),
("beit", "BeitModel"),
Expand Down Expand Up @@ -225,12 +226,14 @@
("vit", "ViTForImageClassification"),
("deit", ("DeiTForImageClassification", "DeiTForImageClassificationWithTeacher")),
("beit", "BeitForImageClassification"),
("perceiver", "PerceiverForImageClassification"),
]
)

MODEL_FOR_MASKED_LM_MAPPING_NAMES = OrderedDict(
[
# Model for Masked LM mapping
("perceiver", "PerceiverForMaskedLM"),
("fnet", "FNetForMaskedLM"),
("rembert", "RemBertForMaskedLM"),
("roformer", "RoFormerForMaskedLM"),
Expand Down
84 changes: 84 additions & 0 deletions src/transformers/models/perceiver/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# flake8: noqa
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.

# Copyright 2021 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING

from ...file_utils import _LazyModule, is_tokenizers_available, is_torch_available, is_vision_available


_import_structure = {
"configuration_perceiver": ["PERCEIVER_PRETRAINED_CONFIG_ARCHIVE_MAP", "PerceiverConfig"],
"tokenization_perceiver": ["PerceiverTokenizer"],
}

if is_vision_available():
_import_structure["feature_extraction_perceiver"] = ["PerceiverFeatureExtractor"]

if is_torch_available():
_import_structure["modeling_perceiver"] = [
"PERCEIVER_PRETRAINED_MODEL_ARCHIVE_LIST",
"PerceiverAudioPreprocessor",
"PerceiverBasicDecoder",
"PerceiverClassificationDecoder",
"PerceiverForImageClassification",
"PerceiverForImageClassificationConvProcessing",
"PerceiverForImageClassificationFourier",
"PerceiverForMaskedLM",
"PerceiverForMultimodalAutoencoding",
"PerceiverForOpticalFlow",
"PerceiverImagePreprocessor",
"PerceiverLayer",
"PerceiverModel",
"PerceiverOneHotPreprocessor",
"PerceiverPreTrainedModel",
"PerceiverTextPostprocessor",
"PerceiverTextPreprocessor",
]


if TYPE_CHECKING:
from .configuration_perceiver import PERCEIVER_PRETRAINED_CONFIG_ARCHIVE_MAP, PerceiverConfig
from .tokenization_perceiver import PerceiverTokenizer

if is_vision_available():
from .feature_extraction_perceiver import PerceiverFeatureExtractor

if is_torch_available():
from .modeling_perceiver import (
PERCEIVER_PRETRAINED_MODEL_ARCHIVE_LIST,
PerceiverAudioPreprocessor,
PerceiverBasicDecoder,
PerceiverClassificationDecoder,
PerceiverForImageClassification,
PerceiverForImageClassificationConvProcessing,
PerceiverForImageClassificationFourier,
PerceiverForMaskedLM,
PerceiverForMultimodalAutoencoding,
PerceiverForOpticalFlow,
PerceiverImagePreprocessor,
PerceiverLayer,
PerceiverModel,
PerceiverOneHotPreprocessor,
PerceiverPreTrainedModel,
PerceiverTextPostprocessor,
PerceiverTextPreprocessor,
)

else:
import sys

sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure)
Loading