diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py index 6c61433712e2..7128f348c36e 100644 --- a/src/transformers/keras_callbacks.py +++ b/src/transformers/keras_callbacks.py @@ -9,7 +9,7 @@ from packaging.version import parse from tensorflow.keras.callbacks import Callback -from huggingface_hub import Repository +from huggingface_hub import Repository, create_repo from . import IntervalStrategy, PreTrainedTokenizerBase from .modelcard import TrainingSummary @@ -339,11 +339,13 @@ def __init__( self.output_dir = output_dir self.hub_model_id = hub_model_id + create_repo(self.hub_model_id, exist_ok=True) self.repo = Repository( str(self.output_dir), clone_from=self.hub_model_id, use_auth_token=hub_token if hub_token else True, ) + self.tokenizer = tokenizer self.last_job = None self.checkpoint = checkpoint @@ -394,17 +396,22 @@ def on_epoch_end(self, epoch, logs=None): ) def on_train_end(self, logs=None): + # Makes sure the latest version of the model is uploaded if self.last_job is not None and not self.last_job.is_done: - self.last_job._process.terminate() # Gotta go fast + logging.info("Pushing the last epoch to the Hub, this may take a while...") while not self.last_job.is_done: sleep(1) - self.model.save_pretrained(self.output_dir) - if self.tokenizer is not None: - self.tokenizer.save_pretrained(self.output_dir) - train_summary = TrainingSummary.from_keras( - model=self.model, model_name=self.hub_model_id, keras_history=self.training_history, **self.model_card_args - ) - model_card = train_summary.to_model_card() - with (self.output_dir / "README.md").open("w") as f: - f.write(model_card) - self.repo.push_to_hub(commit_message="End of training", blocking=True) + else: + self.model.save_pretrained(self.output_dir) + if self.tokenizer is not None: + self.tokenizer.save_pretrained(self.output_dir) + train_summary = TrainingSummary.from_keras( + model=self.model, + model_name=self.hub_model_id, + keras_history=self.training_history, + **self.model_card_args, + ) + model_card = train_summary.to_model_card() + with (self.output_dir / "README.md").open("w") as f: + f.write(model_card) + self.repo.push_to_hub(commit_message="End of training", blocking=True) diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot.py b/src/transformers/models/blenderbot/tokenization_blenderbot.py index ef0a24e06d9a..157d495a7af9 100644 --- a/src/transformers/models/blenderbot/tokenization_blenderbot.py +++ b/src/transformers/models/blenderbot/tokenization_blenderbot.py @@ -399,6 +399,7 @@ def build_inputs_with_special_tokens(self, token_ids_0: List[int], token_ids_1: Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A Blenderbot sequence has the following format: - single sequence: ` X ` + Args: token_ids_0 (`List[int]`): List of IDs to which the special tokens will be added diff --git a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py index 508c643f4d82..4e65294cb904 100644 --- a/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py +++ b/src/transformers/models/blenderbot/tokenization_blenderbot_fast.py @@ -284,6 +284,7 @@ def build_inputs_with_special_tokens(self, token_ids_0: List[int], token_ids_1: Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A Blenderbot sequence has the following format: - single sequence: ` X ` + Args: token_ids_0 (`List[int]`): List of IDs to which the special tokens will be added diff --git a/src/transformers/models/markuplm/tokenization_markuplm.py b/src/transformers/models/markuplm/tokenization_markuplm.py index faaebf3f268b..2c9e006858ca 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm.py +++ b/src/transformers/models/markuplm/tokenization_markuplm.py @@ -428,6 +428,7 @@ def build_inputs_with_special_tokens( adding special tokens. A RoBERTa sequence has the following format: - single sequence: ` X ` - pair of sequences: ` A B ` + Args: token_ids_0 (`List[int]`): List of IDs to which the special tokens will be added. diff --git a/src/transformers/models/markuplm/tokenization_markuplm_fast.py b/src/transformers/models/markuplm/tokenization_markuplm_fast.py index c55cb2b3e124..1531c5ca4bce 100644 --- a/src/transformers/models/markuplm/tokenization_markuplm_fast.py +++ b/src/transformers/models/markuplm/tokenization_markuplm_fast.py @@ -883,6 +883,7 @@ def build_inputs_with_special_tokens( adding special tokens. A RoBERTa sequence has the following format: - single sequence: ` X ` - pair of sequences: ` A B ` + Args: token_ids_0 (`List[int]`): List of IDs to which the special tokens will be added. diff --git a/src/transformers/models/tapex/tokenization_tapex.py b/src/transformers/models/tapex/tokenization_tapex.py index e9994ff7b5cb..f862ba7e4f76 100644 --- a/src/transformers/models/tapex/tokenization_tapex.py +++ b/src/transformers/models/tapex/tokenization_tapex.py @@ -342,6 +342,7 @@ def build_inputs_with_special_tokens( adding special tokens. A TAPEX sequence has the following format: - single sequence: ` X ` - pair of sequences: ` A B ` + Args: token_ids_0 (`List[int]`): List of IDs to which the special tokens will be added. diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 2f4c12257645..f6a2d74342e4 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -78,9 +78,11 @@ TF_MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING, TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, BertConfig, + PushToHubCallback, RagRetriever, TFAutoModel, TFAutoModelForSequenceClassification, + TFBertForMaskedLM, TFBertModel, TFRagModel, TFSharedEmbeddings, @@ -2344,6 +2346,11 @@ def tearDownClass(cls): except HTTPError: pass + try: + delete_repo(token=cls._token, repo_id="test-model-tf-callback") + except HTTPError: + pass + try: delete_repo(token=cls._token, repo_id="valid_org/test-model-tf-org") except HTTPError: @@ -2363,13 +2370,14 @@ def test_push_to_hub(self): model.push_to_hub("test-model-tf", use_auth_token=self._token) logging.set_verbosity_warning() # Check the model card was created and uploaded. - self.assertIn("Uploading README.md to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out) + self.assertIn("Uploading the following files to __DUMMY_TRANSFORMERS_USER__/test-model-tf", cl.out) new_model = TFBertModel.from_pretrained(f"{USER}/test-model-tf") models_equal = True for p1, p2 in zip(model.weights, new_model.weights): - if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: + if not tf.math.reduce_all(p1 == p2): models_equal = False + break self.assertTrue(models_equal) # Reset repo @@ -2382,8 +2390,32 @@ def test_push_to_hub(self): new_model = TFBertModel.from_pretrained(f"{USER}/test-model-tf") models_equal = True for p1, p2 in zip(model.weights, new_model.weights): - if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: + if not tf.math.reduce_all(p1 == p2): + models_equal = False + break + self.assertTrue(models_equal) + + def test_push_to_hub_callback(self): + config = BertConfig( + vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 + ) + model = TFBertForMaskedLM(config) + model.compile() + + with tempfile.TemporaryDirectory() as tmp_dir: + push_to_hub_callback = PushToHubCallback( + output_dir=tmp_dir, + hub_model_id="test-model-tf-callback", + hub_token=self._token, + ) + model.fit(model.dummy_inputs, model.dummy_inputs, epochs=1, callbacks=[push_to_hub_callback]) + + new_model = TFBertForMaskedLM.from_pretrained(f"{USER}/test-model-tf-callback") + models_equal = True + for p1, p2 in zip(model.weights, new_model.weights): + if not tf.math.reduce_all(p1 == p2): models_equal = False + break self.assertTrue(models_equal) def test_push_to_hub_in_organization(self): @@ -2399,8 +2431,9 @@ def test_push_to_hub_in_organization(self): new_model = TFBertModel.from_pretrained("valid_org/test-model-tf-org") models_equal = True for p1, p2 in zip(model.weights, new_model.weights): - if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: + if not tf.math.reduce_all(p1 == p2): models_equal = False + break self.assertTrue(models_equal) # Reset repo @@ -2415,6 +2448,7 @@ def test_push_to_hub_in_organization(self): new_model = TFBertModel.from_pretrained("valid_org/test-model-tf-org") models_equal = True for p1, p2 in zip(model.weights, new_model.weights): - if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0: + if not tf.math.reduce_all(p1 == p2): models_equal = False + break self.assertTrue(models_equal)