Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/en/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ Flax), PyTorch, and/or TensorFlow.
| VideoMAE | ❌ | ❌ | ✅ | ❌ | ❌ |
| ViLT | ❌ | ❌ | ✅ | ❌ | ❌ |
| Vision Encoder decoder | ❌ | ❌ | ✅ | ✅ | ✅ |
| VisionTextDualEncoder | ❌ | ❌ | ✅ | | ✅ |
| VisionTextDualEncoder | ❌ | ❌ | ✅ | | ✅ |
| VisualBERT | ❌ | ❌ | ✅ | ❌ | ❌ |
| ViT | ❌ | ❌ | ✅ | ✅ | ✅ |
| ViT Hybrid | ❌ | ❌ | ✅ | ❌ | ❌ |
Expand Down
5 changes: 5 additions & 0 deletions docs/source/en/model_doc/vision-text-dual-encoder.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,8 @@ new zero-shot vision tasks such as image classification or retrieval.

[[autodoc]] FlaxVisionTextDualEncoderModel
- __call__

## TFVisionTextDualEncoderModel

[[autodoc]] TFVisionTextDualEncoderModel
- call
2 changes: 2 additions & 0 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3275,6 +3275,7 @@
]
)
_import_structure["models.vision_encoder_decoder"].extend(["TFVisionEncoderDecoderModel"])
_import_structure["models.vision_text_dual_encoder"].extend(["TFVisionTextDualEncoderModel"])
_import_structure["models.vit"].extend(
[
"TFViTForImageClassification",
Expand Down Expand Up @@ -6335,6 +6336,7 @@
TFTransfoXLPreTrainedModel,
)
from .models.vision_encoder_decoder import TFVisionEncoderDecoderModel
from .models.vision_text_dual_encoder import TFVisionTextDualEncoderModel
from .models.vit import TFViTForImageClassification, TFViTModel, TFViTPreTrainedModel
from .models.vit_mae import TFViTMAEForPreTraining, TFViTMAEModel, TFViTMAEPreTrainedModel
from .models.wav2vec2 import (
Expand Down
2 changes: 0 additions & 2 deletions src/transformers/modeling_tf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,8 +892,6 @@ def load_tf_weights(model, resolved_archive_file, ignore_mismatched_sizes=False,


def load_tf_weights_from_h5(model, resolved_archive_file, ignore_mismatched_sizes=False, _prefix=None):
missing_layers = []
unexpected_layers = []
mismatched_layers = []

# Read the H5 file
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/auto/modeling_tf_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
("t5", "TFT5Model"),
("tapas", "TFTapasModel"),
("transfo-xl", "TFTransfoXLModel"),
("vision-text-dual-encoder", "TFVisionTextDualEncoderModel"),
("vit", "TFViTModel"),
("vit_mae", "TFViTMAEModel"),
("wav2vec2", "TFWav2Vec2Model"),
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/models/clip/modeling_tf_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,8 @@ class TFCLIPPreTrainedModel(TFPreTrainedModel):

config_class = CLIPConfig
base_model_prefix = "clip"
_keys_to_ignore_on_load_missing = [r"position_ids"]
_keys_to_ignore_on_load_unexpected = [r"position_ids"]


CLIP_START_DOCSTRING = r"""
Expand Down
28 changes: 25 additions & 3 deletions src/transformers/models/vision_text_dual_encoder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@
# limitations under the License.
from typing import TYPE_CHECKING

from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_flax_available, is_torch_available
from ...utils import (
OptionalDependencyNotAvailable,
_LazyModule,
is_flax_available,
is_tf_available,
is_torch_available,
)


_import_structure = {
Expand All @@ -39,10 +45,18 @@
else:
_import_structure["modeling_flax_vision_text_dual_encoder"] = ["FlaxVisionTextDualEncoderModel"]

try:
if not is_tf_available():
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
pass
else:
_import_structure["modeling_tf_vision_text_dual_encoder"] = ["TFVisionTextDualEncoderModel"]


if TYPE_CHECKING:
from .configuration_vision_text_dual_encoder import VisionTextDualEncoderConfig
from .processing_visiotn_text_dual_encoder import VisionTextDualEncoderProcessor
from .processing_vision_text_dual_encoder import VisionTextDualEncoderProcessor

try:
if not is_torch_available():
Expand All @@ -58,7 +72,15 @@
except OptionalDependencyNotAvailable:
pass
else:
from .modeling_vision_text_dual_encoder import FlaxVisionTextDualEncoderModel
from .modeling_flax_vision_text_dual_encoder import FlaxVisionTextDualEncoderModel

try:
if not is_tf_available():
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
pass
else:
from .modeling_tf_vision_text_dual_encoder import TFVisionTextDualEncoderModel


else:
Expand Down
Loading