From 1f14e2592d1bc628853f9578303b42a8aa4c116a Mon Sep 17 00:00:00 2001 From: Karim Foda Date: Sun, 27 Mar 2022 17:25:47 +0100 Subject: [PATCH 1/9] Add initial doctring changes --- .../models/longformer/modeling_longformer.py | 51 ++++++++++++------- utils/documentation_tests.txt | 1 + 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index a2a4e94414a2..8d8ac292c8ed 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1407,6 +1407,28 @@ def _set_gradient_checkpointing(self, module, value=False): module.gradient_checkpointing = value +LONGFORMER_GENERATION_DOCSTRING = r""" + Mask filling example: + + ```python + >>> from transformers import LongformerTokenizer, LongformerForMaskedLM + + >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") + >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") + + >>> TXT = "My friends are but they eat too many carbs." + " That's why I decide not to eat with them."*300 + >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] + >>> logits = model(input_ids).logits + + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() + >>> probs = logits[0, masked_index].softmax(dim=0) + >>> values, predictions = probs.topk(5) + + >>> tokenizer.decode(predictions).split() + ['healthy', 'skinny', 'thin', 'good', 'vegetarian'] + ``` +""" + LONGFORMER_START_DOCSTRING = r""" This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the @@ -1636,20 +1658,9 @@ def forward( >>> SAMPLE_TEXT = " ".join(["Hello world! "] * 1000) # long input document >>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1 - >>> attention_mask = torch.ones( - ... input_ids.shape, dtype=torch.long, device=input_ids.device - >>> ) # initialize to local attention - >>> global_attention_mask = torch.zeros( - ... input_ids.shape, dtype=torch.long, device=input_ids.device - >>> ) # initialize to global attention to be deactivated for all tokens - >>> global_attention_mask[ - ... :, - ... [ - ... 1, - ... 4, - ... 21, - ... ], - >>> ] = 1 # Set global attention to random tokens for the sake of this example + >>> attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to local attention + >>> global_attention_mask = torch.zeros(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to global attention to be deactivated for all tokens + >>> global_attention_mask[:,[1,4,21,],] = 1 # Set global attention to random tokens for the sake of this example >>> # Usually, set global attention based on the task. For example, >>> # classification: the token >>> # QA: question tokens @@ -1852,9 +1863,11 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="jpelhaw/longformer-base-plagiarism-detection", output_type=LongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output=[1,2], + expected_loss=0.08 ) def forward( self, @@ -2027,9 +2040,7 @@ def forward( >>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist()) >>> answer_tokens = all_tokens[torch.argmax(start_logits) : torch.argmax(end_logits) + 1] - >>> answer = tokenizer.decode( - ... tokenizer.convert_tokens_to_ids(answer_tokens) - >>> ) # remove space prepending space token + >>> answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -2118,9 +2129,11 @@ def __init__(self, config): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="brad1141/Longformer-finetuned-norm", output_type=LongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="['Lead', 'Evidence', 'Lead', 'Evidence', 'Lead']", + expected_loss=0.01, ) def forward( self, diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index b8632f29f9f2..daf30224dece 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -10,6 +10,7 @@ src/transformers/models/convnext/modeling_convnext.py src/transformers/models/data2vec/modeling_data2vec_audio.py src/transformers/models/deit/modeling_deit.py src/transformers/models/hubert/modeling_hubert.py +src/transformers/models/longformer/modeling_longformer.py src/transformers/models/marian/modeling_marian.py src/transformers/models/mbart/modeling_mbart.py src/transformers/models/pegasus/modeling_pegasus.py From 996d0329214f012230018de0556680dc0a9c5bf1 Mon Sep 17 00:00:00 2001 From: Karim Foda Date: Sun, 27 Mar 2022 17:40:03 +0100 Subject: [PATCH 2/9] make fixup --- .../models/longformer/modeling_longformer.py | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 8d8ac292c8ed..b922f63f9830 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1416,7 +1416,7 @@ def _set_gradient_checkpointing(self, module, value=False): >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") - >>> TXT = "My friends are but they eat too many carbs." + " That's why I decide not to eat with them."*300 + >>> TXT = "My friends are but they eat too many carbs." + " That's why I decide not to eat with them." * 300 >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] >>> logits = model(input_ids).logits @@ -1658,9 +1658,20 @@ def forward( >>> SAMPLE_TEXT = " ".join(["Hello world! "] * 1000) # long input document >>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1 - >>> attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to local attention - >>> global_attention_mask = torch.zeros(input_ids.shape, dtype=torch.long, device=input_ids.device) # initialize to global attention to be deactivated for all tokens - >>> global_attention_mask[:,[1,4,21,],] = 1 # Set global attention to random tokens for the sake of this example + >>> attention_mask = torch.ones( + ... input_ids.shape, dtype=torch.long, device=input_ids.device + >>> ) # initialize to local attention + >>> global_attention_mask = torch.zeros( + ... input_ids.shape, dtype=torch.long, device=input_ids.device + >>> ) # initialize to global attention to be deactivated for all tokens + >>> global_attention_mask[ + ... :, + ... [ + ... 1, + ... 4, + ... 21, + ... ], + >>> ] = 1 # Set global attention to random tokens for the sake of this example >>> # Usually, set global attention based on the task. For example, >>> # classification: the token >>> # QA: question tokens @@ -1866,8 +1877,8 @@ def __init__(self, config): checkpoint="jpelhaw/longformer-base-plagiarism-detection", output_type=LongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output=[1,2], - expected_loss=0.08 + expected_output=[1, 2], + expected_loss=0.08, ) def forward( self, @@ -2040,7 +2051,9 @@ def forward( >>> all_tokens = tokenizer.convert_ids_to_tokens(input_ids[0].tolist()) >>> answer_tokens = all_tokens[torch.argmax(start_logits) : torch.argmax(end_logits) + 1] - >>> answer = tokenizer.decode(tokenizer.convert_tokens_to_ids(answer_tokens)) # remove space prepending space token + >>> answer = tokenizer.decode( + ... tokenizer.convert_tokens_to_ids(answer_tokens) + >>> ) # remove space prepending space token ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict From c4b9a25f95f747e6e4b7cd29d80eec0fd4dc8fdc Mon Sep 17 00:00:00 2001 From: Karim Foda Date: Thu, 31 Mar 2022 15:15:49 +0100 Subject: [PATCH 3/9] Add TF doc changes --- .../models/longformer/modeling_tf_longformer.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 762f872ee709..a214b562dcca 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -2081,10 +2081,12 @@ def get_prefix_bias_name(self): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="saibo/legal-longformer-base-4096", output_type=TFLongformerMaskedLMOutput, config_class=_CONFIG_FOR_DOC, mask="", + expected_output="' no'", + expected_loss=4.5, ) def call( self, @@ -2178,6 +2180,8 @@ def __init__(self, config, *inputs, **kwargs): checkpoint="allenai/longformer-large-4096-finetuned-triviaqa", output_type=TFLongformerQuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output="' puppet'", + expected_loss=0.96, ) def call( self, @@ -2325,6 +2329,8 @@ def __init__(self, config, *inputs, **kwargs): checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="LABEL_0", + expected_loss=0.58, ) def call( self, @@ -2565,6 +2571,8 @@ def __init__(self, config, *inputs, **kwargs): checkpoint=_CHECKPOINT_FOR_DOC, output_type=TFLongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, + expected_output="['LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0']", + expected_loss=0.62, ) def call( self, From 9c811f7b057bb8d377e0e269cf39f7efac5bd9af Mon Sep 17 00:00:00 2001 From: Karim Foda Date: Tue, 3 May 2022 09:11:01 +0200 Subject: [PATCH 4/9] fix seq classifier output --- .../models/longformer/modeling_longformer.py | 52 ++++++------------- utils/documentation_tests.txt | 1 + 2 files changed, 17 insertions(+), 36 deletions(-) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 95a417a3b87e..194cfc66ca86 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1407,28 +1407,6 @@ def _set_gradient_checkpointing(self, module, value=False): module.gradient_checkpointing = value -LONGFORMER_GENERATION_DOCSTRING = r""" - Mask filling example: - - ```python - >>> from transformers import LongformerTokenizer, LongformerForMaskedLM - - >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") - >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") - - >>> TXT = "My friends are but they eat too many carbs." + " That's why I decide not to eat with them." * 300 - >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] - >>> logits = model(input_ids).logits - - >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() - >>> probs = logits[0, masked_index].softmax(dim=0) - >>> values, predictions = probs.topk(5) - - >>> tokenizer.decode(predictions).split() - ['healthy', 'skinny', 'thin', 'good', 'vegetarian'] - ``` -""" - LONGFORMER_START_DOCSTRING = r""" This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the @@ -1796,23 +1774,25 @@ def forward( Returns: - Examples: + Mask filling example: ```python - >>> import torch - >>> from transformers import LongformerForMaskedLM, LongformerTokenizer + >>> from transformers import LongformerTokenizer, LongformerForMaskedLM - >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") - - >>> SAMPLE_TEXT = " ".join(["Hello world! "] * 1000) # long input document - >>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1 - - >>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM - >>> # check `LongformerModel.forward` for more details how to set *attention_mask* - >>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids) - >>> loss = outputs.loss - >>> prediction_logits = outputs.logits + >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") + + # Let's try a very long input. + >>> TXT = "My friends are but they eat too many carbs." + " That's why I decide not to eat with them." * 300 + >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] + >>> logits = model(input_ids).logits + + >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item() + >>> probs = logits[0, masked_index].softmax(dim=0) + >>> values, predictions = probs.topk(5) + + >>> tokenizer.decode(predictions).split() + ['healthy', 'skinny', 'thin', 'good', 'vegetarian'] ```""" return_dict = return_dict if return_dict is not None else self.config.use_return_dict @@ -1877,7 +1857,7 @@ def __init__(self, config): checkpoint="jpelhaw/longformer-base-plagiarism-detection", output_type=LongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output=[1, 2], + expected_output='ORIGINAL', expected_loss=0.08, ) def forward( diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index c1059d45168f..eca783ceb348 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -19,6 +19,7 @@ src/transformers/models/deit/modeling_deit.py src/transformers/models/glpn/modeling_glpn.py src/transformers/models/hubert/modeling_hubert.py src/transformers/models/longformer/modeling_longformer.py +src/transformers/models/longformer/modeling_tf_longformer.py src/transformers/models/marian/modeling_marian.py src/transformers/models/marian/modeling_marian.py src/transformers/models/mbart/modeling_mbart.py From 34809da689aded2fb5d1649d70798694ca15bd06 Mon Sep 17 00:00:00 2001 From: Karim Foda Date: Tue, 3 May 2022 10:25:31 +0200 Subject: [PATCH 5/9] fix quality errors --- src/transformers/models/longformer/modeling_longformer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 20868e5931c2..5b71bfe15ce2 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1777,7 +1777,7 @@ def forward( >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") - + # Let's try a very long input. >>> TXT = "My friends are but they eat too many carbs." + " That's why I decide not to eat with them." * 300 >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] @@ -1853,7 +1853,7 @@ def __init__(self, config): checkpoint="jpelhaw/longformer-base-plagiarism-detection", output_type=LongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output='ORIGINAL', + expected_output="ORIGINAL", expected_loss=0.08, ) def forward( From 2da3790691856d2670018dc893a135ce8a614044 Mon Sep 17 00:00:00 2001 From: Karim Foda Date: Tue, 3 May 2022 20:41:36 +0200 Subject: [PATCH 6/9] t --- src/transformers/models/longformer/modeling_longformer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 5b71bfe15ce2..241321b048a7 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1777,9 +1777,12 @@ def forward( >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") - # Let's try a very long input. - >>> TXT = "My friends are but they eat too many carbs." + " That's why I decide not to eat with them." * 300 + + >>> TXT = ( + ... "My friends are but they eat too many carbs." + ... + " That's why I decide not to eat with them." * 300 + ... ) >>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"] >>> logits = model(input_ids).logits From ca8f6dcc61771ab9f86aabd9f8f9ac5e9c3ff9df Mon Sep 17 00:00:00 2001 From: Karim Foda Date: Wed, 11 May 2022 16:39:58 -0700 Subject: [PATCH 7/9] swithc head to random init --- .../models/longformer/modeling_tf_longformer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index d6141b0e7c10..031b837c74d3 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -2079,7 +2079,7 @@ def get_prefix_bias_name(self): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint="saibo/legal-longformer-base-4096", + checkpoint="allenai/longformer-base-4096", output_type=TFLongformerMaskedLMOutput, config_class=_CONFIG_FOR_DOC, mask="", @@ -2322,7 +2322,7 @@ def __init__(self, config, *inputs, **kwargs): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="hf-internal-testing/tiny-random-longformer", output_type=TFLongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, expected_output="LABEL_0", @@ -2562,7 +2562,7 @@ def __init__(self, config, *inputs, **kwargs): @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint="hf-internal-testing/tiny-random-longformer", output_type=TFLongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, expected_output="['LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0']", From 2858d815001a33ab82a574f9952d7ec2404df983 Mon Sep 17 00:00:00 2001 From: Karim Foda Date: Fri, 13 May 2022 12:59:42 -0700 Subject: [PATCH 8/9] Fix expected outputs --- .../models/longformer/modeling_longformer.py | 11 ++++++----- .../models/longformer/modeling_tf_longformer.py | 12 ++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index bbacdee99083..109e1b24064c 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1777,7 +1777,8 @@ def forward( >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") - # Let's try a very long input. + + Let's try a very long input. >>> TXT = ( ... "My friends are but they eat too many carbs." @@ -1856,8 +1857,8 @@ def __init__(self, config): checkpoint="jpelhaw/longformer-base-plagiarism-detection", output_type=LongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output="ORIGINAL", - expected_loss=0.08, + expected_output="'ORIGINAL'", + expected_loss=5.44, ) def forward( self, @@ -2124,8 +2125,8 @@ def __init__(self, config): checkpoint="brad1141/Longformer-finetuned-norm", output_type=LongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output="['Lead', 'Evidence', 'Lead', 'Evidence', 'Lead']", - expected_loss=0.01, + expected_output="['Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence']", + expected_loss=0.63, ) def forward( self, diff --git a/src/transformers/models/longformer/modeling_tf_longformer.py b/src/transformers/models/longformer/modeling_tf_longformer.py index 031b837c74d3..8f654ea84b18 100644 --- a/src/transformers/models/longformer/modeling_tf_longformer.py +++ b/src/transformers/models/longformer/modeling_tf_longformer.py @@ -2083,8 +2083,8 @@ def get_prefix_bias_name(self): output_type=TFLongformerMaskedLMOutput, config_class=_CONFIG_FOR_DOC, mask="", - expected_output="' no'", - expected_loss=4.5, + expected_output="' Paris'", + expected_loss=0.44, ) def call( self, @@ -2325,8 +2325,8 @@ def __init__(self, config, *inputs, **kwargs): checkpoint="hf-internal-testing/tiny-random-longformer", output_type=TFLongformerSequenceClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output="LABEL_0", - expected_loss=0.58, + expected_output="'LABEL_1'", + expected_loss=0.69, ) def call( self, @@ -2565,8 +2565,8 @@ def __init__(self, config, *inputs, **kwargs): checkpoint="hf-internal-testing/tiny-random-longformer", output_type=TFLongformerTokenClassifierOutput, config_class=_CONFIG_FOR_DOC, - expected_output="['LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_1', 'LABEL_1', 'LABEL_0', 'LABEL_0', 'LABEL_0', 'LABEL_0']", - expected_loss=0.62, + expected_output="['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1']", + expected_loss=0.59, ) def call( self, From f5f504039c8e6000834d93b465614ee014681a04 Mon Sep 17 00:00:00 2001 From: Karim Foda <35491698+KMFODA@users.noreply.github.com> Date: Mon, 16 May 2022 17:09:09 -0700 Subject: [PATCH 9/9] Update src/transformers/models/longformer/modeling_longformer.py Co-authored-by: Yih-Dar <2521628+ydshieh@users.noreply.github.com> --- src/transformers/models/longformer/modeling_longformer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/longformer/modeling_longformer.py b/src/transformers/models/longformer/modeling_longformer.py index 109e1b24064c..c35cf318c437 100755 --- a/src/transformers/models/longformer/modeling_longformer.py +++ b/src/transformers/models/longformer/modeling_longformer.py @@ -1777,9 +1777,11 @@ def forward( >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") >>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096") - + ``` + Let's try a very long input. + ```python >>> TXT = ( ... "My friends are but they eat too many carbs." ... + " That's why I decide not to eat with them." * 300