Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/en/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ Flax), PyTorch, and/or TensorFlow.
| VideoMAE | ❌ | ❌ | ✅ | ❌ | ❌ |
| ViLT | ❌ | ❌ | ✅ | ❌ | ❌ |
| Vision Encoder decoder | ❌ | ❌ | ✅ | ✅ | ✅ |
| VisionTextDualEncoder | ❌ | ❌ | ✅ | | ✅ |
| VisionTextDualEncoder | ❌ | ❌ | ✅ | | ✅ |
| VisualBERT | ❌ | ❌ | ✅ | ❌ | ❌ |
| ViT | ❌ | ❌ | ✅ | ✅ | ✅ |
| ViT Hybrid | ❌ | ❌ | ✅ | ❌ | ❌ |
Expand Down
5 changes: 5 additions & 0 deletions docs/source/en/model_doc/vision-text-dual-encoder.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,8 @@ new zero-shot vision tasks such as image classification or retrieval.

[[autodoc]] FlaxVisionTextDualEncoderModel
- __call__

## TFVisionTextDualEncoderModel

[[autodoc]] TFVisionTextDualEncoderModel
- call
2 changes: 2 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3275,6 +3275,7 @@
]
)
_import_structure["models.vision_encoder_decoder"].extend(["TFVisionEncoderDecoderModel"])
_import_structure["models.vision_text_dual_encoder"].extend(["TFVisionTextDualEncoderModel"])
_import_structure["models.vit"].extend(
[
"TFViTForImageClassification",
Expand Down Expand Up @@ -6335,6 +6336,7 @@
TFTransfoXLPreTrainedModel,
)
from .models.vision_encoder_decoder import TFVisionEncoderDecoderModel
from .models.vision_text_dual_encoder import TFVisionTextDualEncoderModel
from .models.vit import TFViTForImageClassification, TFViTModel, TFViTPreTrainedModel
from .models.vit_mae import TFViTMAEForPreTraining, TFViTMAEModel, TFViTMAEPreTrainedModel
from .models.wav2vec2 import (
Expand Down
2 changes: 0 additions & 2 deletions src/transformers/modeling_tf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,8 +892,6 @@ def load_tf_weights(model, resolved_archive_file, ignore_mismatched_sizes=False,


def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_sizes=False, _prefix=None):
missing_layers = []
unexpected_layers = []
mismatched_layers = []

# Read the H5 file
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/modeling_tf_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
("t5", "TFT5Model"),
("tapas", "TFTapasModel"),
("transfo-xl", "TFTransfoXLModel"),
("vision-text-dual-encoder", "TFVisionTextDualEncoderModel"),
("vit", "TFViTModel"),
("vit_mae", "TFViTMAEModel"),
("wav2vec2", "TFWav2Vec2Model"),
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/models/clip/modeling_tf_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,8 @@ class TFCLIPPreTrainedModel(TFPreTrainedModel):

config_class = CLIPConfig
base_model_prefix = "clip"
_keys_to_ignore_on_load_missing = [r"position_ids"]
_keys_to_ignore_on_load_unexpected = [r"position_ids"]


CLIP_START_DOCSTRING = r"""
Expand Down
28 changes: 25 additions & 3 deletions src/transformers/models/vision_text_dual_encoder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@
# limitations under the License.
from typing import TYPE_CHECKING

from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_flax_available, is_torch_available
from ...utils import (
OptionalDependencyNotAvailable,
_LazyModule,
is_flax_available,
is_tf_available,
is_torch_available,
)


_import_structure = {
Expand All @@ -39,10 +45,18 @@
else:
_import_structure["modeling_flax_vision_text_dual_encoder"] = ["FlaxVisionTextDualEncoderModel"]

try:
if not is_tf_available():
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
pass
else:
_import_structure["modeling_tf_vision_text_dual_encoder"] = ["TFVisionTextDualEncoderModel"]


if TYPE_CHECKING:
from .configuration_vision_text_dual_encoder import VisionTextDualEncoderConfig
from .processing_visiotn_text_dual_encoder import VisionTextDualEncoderProcessor
from .processing_vision_text_dual_encoder import VisionTextDualEncoderProcessor

try:
if not is_torch_available():
Expand All @@ -58,7 +72,15 @@
except OptionalDependencyNotAvailable:
pass
else:
from .modeling_vision_text_dual_encoder import FlaxVisionTextDualEncoderModel
from .modeling_flax_vision_text_dual_encoder import FlaxVisionTextDualEncoderModel

try:
if not is_tf_available():
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
pass
else:
from .modeling_tf_vision_text_dual_encoder import TFVisionTextDualEncoderModel


else:
Expand Down
Loading