Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 66 additions & 6 deletions src/transformers/models/altclip/configuration_altclip.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,23 +294,83 @@ class AltCLIPConfig(PretrainedConfig):
def __init__(
self, text_config=None, vision_config=None, projection_dim=768, logit_scale_init_value=2.6592, **kwargs
):
super().__init__(**kwargs)

# If `_config_dict` exist, we use them for the backward compatibility.
# We pop out these 2 attributes before calling `super().__init__` to avoid them being saved (which causes a lot
# of confusion!).
text_config_dict = kwargs.pop("text_config_dict", None)
vision_config_dict = kwargs.pop("vision_config_dict", None)

super().__init__(**kwargs)

# Instead of simply assigning `[text|vision]_config_dict` to `[text|vision]_config`, we use the values in
# `[text|vision]_config_dict` to update the values in `[text|vision]_config`. The values should be same in most
# cases, but we don't want to break anything regarding `_config_dict` that existed before commit `8827e1b2`.
if text_config_dict is not None:
text_config = text_config_dict
if text_config is None:
text_config = {}

# This is the complete result when using `text_config_dict`.
_text_config_dict = AltCLIPTextConfig(**text_config_dict).to_dict()

# Give a warning if the values exist in both `_text_config_dict` and `text_config` but being different.
for key, value in _text_config_dict.items():
if key in text_config and value != text_config[key] and key not in ["transformers_version"]:
# If specified in `text_config_dict`
if key in text_config_dict:
message = (
f"`{key}` is found in both `text_config_dict` and `text_config` but with different values. "
f'The value `text_config_dict["{key}"]` will be used instead.'
)
# If inferred from default argument values (just to be super careful)
else:
message = (
f"`text_config_dict` is provided which will be used to initialize `AltCLIPTextConfig`. The "
f'value `text_config["{key}"]` will be overriden.'
)
logger.warning(message)

# Update all values in `text_config` with the ones in `_text_config_dict`.
text_config.update(_text_config_dict)

if vision_config_dict is not None:
vision_config = vision_config_dict
if vision_config is None:
vision_config = {}

# This is the complete result when using `vision_config_dict`.
_vision_config_dict = AltCLIPVisionConfig(**vision_config_dict).to_dict()
# convert keys to string instead of integer
if "id2label" in _vision_config_dict:
_vision_config_dict["id2label"] = {
str(key): value for key, value in _vision_config_dict["id2label"].items()
}

# Give a warning if the values exist in both `_vision_config_dict` and `vision_config` but being different.
for key, value in _vision_config_dict.items():
if key in vision_config and value != vision_config[key] and key not in ["transformers_version"]:
# If specified in `vision_config_dict`
if key in vision_config_dict:
message = (
f"`{key}` is found in both `vision_config_dict` and `vision_config` but with different "
f'values. The value `vision_config_dict["{key}"]` will be used instead.'
)
# If inferred from default argument values (just to be super careful)
else:
message = (
f"`vision_config_dict` is provided which will be used to initialize `AltCLIPVisionConfig`. "
f'The value `vision_config["{key}"]` will be overriden.'
)
logger.warning(message)

# Update all values in `vision_config` with the ones in `_vision_config_dict`.
vision_config.update(_vision_config_dict)

if text_config is None:
text_config = {}
logger.info("text_config is None. Initializing the AltCLIPTextConfig with default values.")
logger.info("`text_config` is `None`. Initializing the `AltCLIPTextConfig` with default values.")

if vision_config is None:
vision_config = {}
logger.info("vision_config is None. initializing the AltCLIPVisionConfig with default values.")
logger.info("`vision_config` is `None`. initializing the `AltCLIPVisionConfig` with default values.")

self.text_config = AltCLIPTextConfig(**text_config)
self.vision_config = AltCLIPVisionConfig(**vision_config)
Expand Down
12 changes: 2 additions & 10 deletions src/transformers/models/blip/configuration_blip.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,21 +336,13 @@ def __init__(
):
super().__init__(**kwargs)

# If `_config_dict` exist, we use them for the backward compatibility.
text_config_dict = kwargs.pop("text_config_dict", None)
vision_config_dict = kwargs.pop("vision_config_dict", None)
if text_config_dict is not None:
text_config = text_config_dict
if vision_config_dict is not None:
vision_config = vision_config_dict

if text_config is None:
text_config = {}
logger.info("text_config is None. Initializing the BlipTextConfig with default values.")
logger.info("`text_config` is `None`. Initializing the `BlipTextConfig` with default values.")

if vision_config is None:
vision_config = {}
logger.info("vision_config is None. initializing the BlipVisionConfig with default values.")
logger.info("`vision_config` is `None`. Initializing the `BlipVisionConfig` with default values.")

self.text_config = BlipTextConfig(**text_config)
self.vision_config = BlipVisionConfig(**vision_config)
Expand Down
15 changes: 6 additions & 9 deletions src/transformers/models/bridgetower/configuration_bridgetower.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,10 @@ def __init__(
vision_config=None,
**kwargs,
):
# TODO: remove this once the Hub files are updated.
_ = kwargs.pop("text_config_dict", None)
_ = kwargs.pop("vision_config_dict", None)

super().__init__(**kwargs)
self.share_cross_modal_transformer_layers = share_cross_modal_transformer_layers
self.hidden_act = hidden_act
Expand All @@ -332,20 +336,13 @@ def __init__(
self.tie_word_embeddings = tie_word_embeddings
self.init_layernorm_from_vision_encoder = init_layernorm_from_vision_encoder

text_config_dict = kwargs.pop("text_config_dict", None)
vision_config_dict = kwargs.pop("vision_config_dict", None)
if text_config_dict is not None:
text_config = text_config_dict
if vision_config_dict is not None:
vision_config = vision_config_dict

if text_config is None:
text_config = {}
logger.info("text_config is None. Initializing the BridgeTowerTextConfig with default values.")
logger.info("`text_config` is `None`. Initializing the `BridgeTowerTextConfig` with default values.")

if vision_config is None:
vision_config = {}
logger.info("vision_config is None. Initializing the BridgeTowerVisionConfig with default values.")
logger.info("`vision_config` is `None`. Initializing the `BridgeTowerVisionConfig` with default values.")

self.text_config = BridgeTowerTextConfig(**text_config)
self.vision_config = BridgeTowerVisionConfig(**vision_config)
Expand Down
72 changes: 66 additions & 6 deletions src/transformers/models/chinese_clip/configuration_chinese_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,23 +315,83 @@ class ChineseCLIPConfig(PretrainedConfig):
def __init__(
self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs
):
super().__init__(**kwargs)

# If `_config_dict` exist, we use them for the backward compatibility.
# We pop out these 2 attributes before calling `super().__init__` to avoid them being saved (which causes a lot
# of confusion!).
text_config_dict = kwargs.pop("text_config_dict", None)
vision_config_dict = kwargs.pop("vision_config_dict", None)

super().__init__(**kwargs)

# Instead of simply assigning `[text|vision]_config_dict` to `[text|vision]_config`, we use the values in
# `[text|vision]_config_dict` to update the values in `[text|vision]_config`. The values should be same in most
# cases, but we don't want to break anything regarding `_config_dict` that existed before commit `8827e1b2`.
if text_config_dict is not None:
text_config = text_config_dict
if text_config is None:
text_config = {}

# This is the complete result when using `text_config_dict`.
_text_config_dict = ChineseCLIPTextConfig(**text_config_dict).to_dict()

# Give a warning if the values exist in both `_text_config_dict` and `text_config` but being different.
for key, value in _text_config_dict.items():
if key in text_config and value != text_config[key] and key not in ["transformers_version"]:
# If specified in `text_config_dict`
if key in text_config_dict:
message = (
f"`{key}` is found in both `text_config_dict` and `text_config` but with different values. "
f'The value `text_config_dict["{key}"]` will be used instead.'
)
# If inferred from default argument values (just to be super careful)
else:
message = (
f"`text_config_dict` is provided which will be used to initialize `ChineseCLIPTextConfig`. "
f'The value `text_config["{key}"]` will be overriden.'
)
logger.warning(message)

# Update all values in `text_config` with the ones in `_text_config_dict`.
text_config.update(_text_config_dict)

if vision_config_dict is not None:
vision_config = vision_config_dict
if vision_config is None:
vision_config = {}

# This is the complete result when using `vision_config_dict`.
_vision_config_dict = ChineseCLIPVisionConfig(**vision_config_dict).to_dict()
# convert keys to string instead of integer
if "id2label" in _vision_config_dict:
_vision_config_dict["id2label"] = {
str(key): value for key, value in _vision_config_dict["id2label"].items()
}

# Give a warning if the values exist in both `_vision_config_dict` and `vision_config` but being different.
for key, value in _vision_config_dict.items():
if key in vision_config and value != vision_config[key] and key not in ["transformers_version"]:
# If specified in `vision_config_dict`
if key in vision_config_dict:
message = (
f"`{key}` is found in both `vision_config_dict` and `vision_config` but with different "
f'values. The value `vision_config_dict["{key}"]` will be used instead.'
)
# If inferred from default argument values (just to be super careful)
else:
message = (
f"`vision_config_dict` is provided which will be used to initialize "
f'`ChineseCLIPVisionConfig`. The value `vision_config["{key}"]` will be overriden.'
)
logger.warning(message)

# Update all values in `vision_config` with the ones in `_vision_config_dict`.
vision_config.update(_vision_config_dict)

if text_config is None:
text_config = {}
logger.info("text_config is None. Initializing the ChineseCLIPTextConfig with default values.")
logger.info("`text_config` is `None`. Initializing the `ChineseCLIPTextConfig` with default values.")

if vision_config is None:
vision_config = {}
logger.info("vision_config is None. initializing the ChineseCLIPVisionConfig with default values.")
logger.info("`vision_config` is `None`. initializing the `ChineseCLIPVisionConfig` with default values.")

self.text_config = ChineseCLIPTextConfig(**text_config)
self.vision_config = ChineseCLIPVisionConfig(**vision_config)
Expand Down
72 changes: 66 additions & 6 deletions src/transformers/models/clip/configuration_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,23 +297,83 @@ class CLIPConfig(PretrainedConfig):
def __init__(
self, text_config=None, vision_config=None, projection_dim=512, logit_scale_init_value=2.6592, **kwargs
):
super().__init__(**kwargs)

# If `_config_dict` exist, we use them for the backward compatibility.
# We pop out these 2 attributes before calling `super().__init__` to avoid them being saved (which causes a lot
# of confusion!).
text_config_dict = kwargs.pop("text_config_dict", None)
vision_config_dict = kwargs.pop("vision_config_dict", None)

super().__init__(**kwargs)

# Instead of simply assigning `[text|vision]_config_dict` to `[text|vision]_config`, we use the values in
# `[text|vision]_config_dict` to update the values in `[text|vision]_config`. The values should be same in most
# cases, but we don't want to break anything regarding `_config_dict` that existed before commit `8827e1b2`.
if text_config_dict is not None:
text_config = text_config_dict
if text_config is None:
text_config = {}

# This is the complete result when using `text_config_dict`.
_text_config_dict = CLIPTextConfig(**text_config_dict).to_dict()

# Give a warning if the values exist in both `_text_config_dict` and `text_config` but being different.
for key, value in _text_config_dict.items():
if key in text_config and value != text_config[key] and key not in ["transformers_version"]:
# If specified in `text_config_dict`
if key in text_config_dict:
message = (
f"`{key}` is found in both `text_config_dict` and `text_config` but with different values. "
f'The value `text_config_dict["{key}"]` will be used instead.'
)
# If inferred from default argument values (just to be super careful)
else:
message = (
f"`text_config_dict` is provided which will be used to initialize `CLIPTextConfig`. The "
f'value `text_config["{key}"]` will be overriden.'
)
logger.warning(message)

# Update all values in `text_config` with the ones in `_text_config_dict`.
text_config.update(_text_config_dict)

if vision_config_dict is not None:
vision_config = vision_config_dict
if vision_config is None:
vision_config = {}

# This is the complete result when using `vision_config_dict`.
_vision_config_dict = CLIPVisionConfig(**vision_config_dict).to_dict()
# convert keys to string instead of integer
if "id2label" in _vision_config_dict:
_vision_config_dict["id2label"] = {
str(key): value for key, value in _vision_config_dict["id2label"].items()
}

# Give a warning if the values exist in both `_vision_config_dict` and `vision_config` but being different.
for key, value in _vision_config_dict.items():
if key in vision_config and value != vision_config[key] and key not in ["transformers_version"]:
# If specified in `vision_config_dict`
if key in vision_config_dict:
message = (
f"`{key}` is found in both `vision_config_dict` and `vision_config` but with different "
f'values. The value `vision_config_dict["{key}"]` will be used instead.'
)
# If inferred from default argument values (just to be super careful)
else:
message = (
f"`vision_config_dict` is provided which will be used to initialize `CLIPVisionConfig`. "
f'The value `vision_config["{key}"]` will be overriden.'
)
logger.warning(message)

# Update all values in `vision_config` with the ones in `_vision_config_dict`.
vision_config.update(_vision_config_dict)

if text_config is None:
text_config = {}
logger.info("text_config is None. Initializing the CLIPTextConfig with default values.")
logger.info("`text_config` is `None`. Initializing the `CLIPTextConfig` with default values.")

if vision_config is None:
vision_config = {}
logger.info("vision_config is None. initializing the CLIPVisionConfig with default values.")
logger.info("`vision_config` is `None`. initializing the `CLIPVisionConfig` with default values.")

self.text_config = CLIPTextConfig(**text_config)
self.vision_config = CLIPVisionConfig(**vision_config)
Expand Down
Loading