black format

VarunGumma · Jun 10, 2024 · 9c25756 · 9c25756
1 parent 561ab6a
commit 9c25756
Show file tree

Hide file tree

Showing 142 changed files with 897 additions and 732 deletions.
diff --git a/docs/examples/MMPT/mmpt/models/mmfusionnlg.py b/docs/examples/MMPT/mmpt/models/mmfusionnlg.py
@@ -640,9 +640,9 @@ def generate(
             )
 
             # expand encoder_outputs
-            encoder_outputs[
-                "last_hidden_state"
-            ] = encoder_outputs.last_hidden_state.index_select(0, expanded_batch_idxs)
+            encoder_outputs["last_hidden_state"] = (
+                encoder_outputs.last_hidden_state.index_select(0, expanded_batch_idxs)
+            )
 
             # save encoder_outputs in `model_kwargs`
             model_kwargs["encoder_outputs"] = encoder_outputs

diff --git a/docs/examples/MMPT/mmpt/modules/vectorpool.py b/docs/examples/MMPT/mmpt/modules/vectorpool.py
@@ -33,7 +33,6 @@ def build_retriver(
         db_type="flatl2",
         examples_per_cent_to_train=48,
     ):
-
         """merge results from multiple gpus and return a retriver.."""
         self.retriver = retriever_cls(
             hidden_size, centroids, db_type, examples_per_cent_to_train

diff --git a/docs/examples/MMPT/mmpt/processors/how2processor.py b/docs/examples/MMPT/mmpt/processors/how2processor.py
@@ -635,14 +635,16 @@ def get_special_tokens_mask(
                 )
             return list(
                 map(
-                    lambda x: 1
-                    if x
-                    in [
-                        self.tokenizer.sep_token_id,
-                        self.tokenizer.cls_token_id,
-                        self.tokenizer.pad_token_id,
-                    ]
-                    else 0,
+                    lambda x: (
+                        1
+                        if x
+                        in [
+                            self.tokenizer.sep_token_id,
+                            self.tokenizer.cls_token_id,
+                            self.tokenizer.pad_token_id,
+                        ]
+                        else 0
+                    ),
                     token_ids_0,
                 )
             )

diff --git a/docs/examples/MMPT/scripts/video_feature_extractor/extract.py b/docs/examples/MMPT/scripts/video_feature_extractor/extract.py
@@ -152,9 +152,9 @@
                         )  # (51, 487), (51, 512)
                     if args.l2_normalize:
                         batch_features = F.normalize(batch_features, dim=1)
-                    features[
-                        i * args.batch_size : (i + 1) * args.batch_size
-                    ] = batch_features
+                    features[i * args.batch_size : (i + 1) * args.batch_size] = (
+                        batch_features
+                    )
                 features = features.cpu().numpy()
                 if args.half_precision:
                     if args.type == "vae":

diff --git a/docs/examples/data2vec/models/data2vec_text.py b/docs/examples/data2vec/models/data2vec_text.py
@@ -483,9 +483,11 @@ def forward(
 
         result = {
             "losses": {
-                "main": loss.sum() / math.sqrt(sz)
-                if self.loss_scale <= 0
-                else loss.sum() * self.loss_scale,
+                "main": (
+                    loss.sum() / math.sqrt(sz)
+                    if self.loss_scale <= 0
+                    else loss.sum() * self.loss_scale
+                ),
             },
             "sample_size": loss.numel(),
         }

diff --git a/docs/examples/discriminative_reranking_nmt/models/discriminative_reranking_model.py b/docs/examples/discriminative_reranking_nmt/models/discriminative_reranking_model.py
@@ -173,9 +173,11 @@ def __init__(self, args, task):
                 dropout=args.dropout,
                 attention_dropout=args.attention_dropout,
                 activation_dropout=args.activation_dropout,
-                max_seq_len=task.max_positions()
-                if task.max_positions()
-                else args.tokens_per_sample,
+                max_seq_len=(
+                    task.max_positions()
+                    if task.max_positions()
+                    else args.tokens_per_sample
+                ),
                 num_segments=2,
                 offset_positions_by_padding=False,
                 encoder_normalize_before=args.encoder_normalize_before,

diff --git a/docs/examples/fast_noisy_channel/noisy_channel_translation.py b/docs/examples/fast_noisy_channel/noisy_channel_translation.py
@@ -88,9 +88,9 @@ def build_generator(
 
                 for model in channel_models:
                     model.make_generation_fast_(
-                        beamable_mm_beam_size=None
-                        if args.no_beamable_mm
-                        else args.beam,
+                        beamable_mm_beam_size=(
+                            None if args.no_beamable_mm else args.beam
+                        ),
                         need_attn=args.print_alignment,
                     )
                     if self.args.fp16:

diff --git a/docs/examples/laser/laser_src/laser_lstm.py b/docs/examples/laser/laser_src/laser_lstm.py
@@ -315,9 +315,9 @@ def combine_bidir(outs):
         return {
             "sentemb": sentemb,
             "encoder_out": (x, final_hiddens, final_cells),
-            "encoder_padding_mask": encoder_padding_mask
-            if encoder_padding_mask.any()
-            else None,
+            "encoder_padding_mask": (
+                encoder_padding_mask if encoder_padding_mask.any() else None
+            ),
         }
 
     def reorder_encoder_out(self, encoder_out_dict, new_order):
@@ -372,9 +372,11 @@ def __init__(
         self.layers = nn.ModuleList(
             [
                 LSTMCell(
-                    input_size=encoder_output_units + embed_dim + lang_embed_dim
-                    if layer == 0
-                    else hidden_size,
+                    input_size=(
+                        encoder_output_units + embed_dim + lang_embed_dim
+                        if layer == 0
+                        else hidden_size
+                    ),
                     hidden_size=hidden_size,
                 )
                 for layer in range(num_layers)

diff --git a/docs/examples/latent_depth/latent_depth_src/modules/latent_layers.py b/docs/examples/latent_depth/latent_depth_src/modules/latent_layers.py
@@ -34,9 +34,11 @@ def sample(self, logit_idx):
         """
         assert logit_idx is not None
         self.samples = self._gumbel_sigmoid(
-            self.layer_logits[logit_idx, :].detach()
-            if self.detach_grad
-            else self.layer_logits[logit_idx, :],
+            (
+                self.layer_logits[logit_idx, :].detach()
+                if self.detach_grad
+                else self.layer_logits[logit_idx, :]
+            ),
             dim=-1,
             tau=self.tau,
             hard=self.hard_select,

diff --git a/docs/examples/m2m_100/process_data/clean_histogram.py b/docs/examples/m2m_100/process_data/clean_histogram.py
@@ -26,10 +26,10 @@ def read_hist(f):
     return ch
 
 
-with (open("{}/{}".format(args.histograms, args.src), "r", encoding="utf8")) as f:
+with open("{}/{}".format(args.histograms, args.src), "r", encoding="utf8") as f:
     ch1 = read_hist(f)
 
-with (open("{}/{}".format(args.histograms, args.tgt), "r", encoding="utf8")) as f:
+with open("{}/{}".format(args.histograms, args.tgt), "r", encoding="utf8") as f:
     ch2 = read_hist(f)
 
 print("Accepted characters for {}: {}".format(args.src, ch1))

diff --git a/docs/examples/multilingual/data_scripts/remove_valid_test_in_train.py b/docs/examples/multilingual/data_scripts/remove_valid_test_in_train.py
@@ -132,7 +132,13 @@ def remove_messed_up_sentences(
     corrected_src = f"{to_folder}/{split}.{direction}.{src_lang}"
     line_num = 0
     keep_num = 0
-    with open(src, encoding="utf8",) as fsrc, open(tgt, encoding="utf8",) as ftgt, open(
+    with open(
+        src,
+        encoding="utf8",
+    ) as fsrc, open(
+        tgt,
+        encoding="utf8",
+    ) as ftgt, open(
         corrected_src, "w", encoding="utf8"
     ) as fsrc_corrected, open(corrected_tgt, "w", encoding="utf8") as ftgt_corrected:
         for s, t in zip(fsrc, ftgt):

diff --git a/docs/examples/multilingual/data_scripts/utils/fasttext_multi_filter.py b/docs/examples/multilingual/data_scripts/utils/fasttext_multi_filter.py
@@ -51,17 +51,23 @@ def main():
 
     with contextlib.ExitStack() as stack:
         inputs = [
-            stack.enter_context(
-                open(input, "r", encoding="utf-8", newline="\n", errors="replace")
+            (
+                stack.enter_context(
+                    open(input, "r", encoding="utf-8", newline="\n", errors="replace")
+                )
+                if input != "-"
+                else io.TextIOWrapper(
+                    sys.stdin.buffer, encoding="utf-8", errors="replace"
+                )
             )
-            if input != "-"
-            else io.TextIOWrapper(sys.stdin.buffer, encoding="utf-8", errors="replace")
             for input in args.inputs
         ]
         outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8", newline="\n"))
-            if output != "-"
-            else sys.stdout
+            (
+                stack.enter_context(open(output, "w", encoding="utf-8", newline="\n"))
+                if output != "-"
+                else sys.stdout
+            )
             for output in args.outputs
         ]
         with Pool(args.num_workers, initializer=partial(init, args.model)) as p:

diff --git a/docs/examples/noisychannel/rerank_utils.py b/docs/examples/noisychannel/rerank_utils.py
@@ -134,7 +134,6 @@ def write_reprocessed(
     target_prefix_frac=None,
     source_prefix_frac=None,
 ):
-
     """writes nbest hypothesis for rescoring"""
     assert not (
         prefix_len is not None and target_prefix_frac is not None

diff --git a/docs/examples/pointer_generator/pointer_generator_src/transformer_pg.py b/docs/examples/pointer_generator/pointer_generator_src/transformer_pg.py
@@ -404,6 +404,7 @@ class Embedding(nn.Embedding):
         output. The gradient for this vector from :class:`~torch.nn.Embedding`
         is always zero.
     """
+
     __constants__ = ["unk_idx"]
 
     # Torchscript: Inheriting from Embedding class produces an error when exporting to Torchscript

diff --git a/docs/examples/roberta/multiprocessing_bpe_encoder.py b/docs/examples/roberta/multiprocessing_bpe_encoder.py
@@ -58,15 +58,19 @@ def main():
 
     with contextlib.ExitStack() as stack:
         inputs = [
-            stack.enter_context(open(input, "r", encoding="utf-8"))
-            if input != "-"
-            else sys.stdin
+            (
+                stack.enter_context(open(input, "r", encoding="utf-8"))
+                if input != "-"
+                else sys.stdin
+            )
             for input in args.inputs
         ]
         outputs = [
-            stack.enter_context(open(output, "w", encoding="utf-8"))
-            if output != "-"
-            else sys.stdout
+            (
+                stack.enter_context(open(output, "w", encoding="utf-8"))
+                if output != "-"
+                else sys.stdout
+            )
             for output in args.outputs
         ]
 

diff --git a/docs/examples/simultaneous_translation/models/convtransformer_simul_trans.py b/docs/examples/simultaneous_translation/models/convtransformer_simul_trans.py
@@ -142,9 +142,11 @@ def forward(self, src_tokens, src_lengths):
             "encoder_out": [output],
             # This is because that in the original implementation
             # the output didn't consider the last segment as right context.
-            "encoder_padding_mask": [encoder_padding_masks[0][:, : output.size(0)]]
-            if len(encoder_padding_masks) > 0
-            else [],
+            "encoder_padding_mask": (
+                [encoder_padding_masks[0][:, : output.size(0)]]
+                if len(encoder_padding_masks) > 0
+                else []
+            ),
             "encoder_embedding": [],
             "encoder_states": [],
             "src_tokens": [],

diff --git a/docs/examples/simultaneous_translation/models/transformer_monotonic_attention.py b/docs/examples/simultaneous_translation/models/transformer_monotonic_attention.py
@@ -211,9 +211,9 @@ def extract_features(
                 encoder_out=encoder_outs,
                 encoder_padding_mask=encoder_padding_mask,
                 incremental_state=incremental_state,
-                self_attn_mask=self.buffered_future_mask(x)
-                if incremental_state is None
-                else None,
+                self_attn_mask=(
+                    self.buffered_future_mask(x) if incremental_state is None else None
+                ),
             )
 
             inner_states.append(x)

diff --git a/docs/examples/speech_recognition/models/vggtransformer.py b/docs/examples/speech_recognition/models/vggtransformer.py
@@ -382,9 +382,9 @@ def forward(self, src_tokens, src_lengths, **kwargs):
 
         return {
             "encoder_out": x,  # (T, B, C)
-            "encoder_padding_mask": encoder_padding_mask.t()
-            if encoder_padding_mask is not None
-            else None,
+            "encoder_padding_mask": (
+                encoder_padding_mask.t() if encoder_padding_mask is not None else None
+            ),
             # (B, T) --> (T, B)
         }
 

diff --git a/docs/examples/speech_recognition/models/w2l_conv_glu_enc.py b/docs/examples/speech_recognition/models/w2l_conv_glu_enc.py
@@ -121,7 +121,6 @@ def __init__(
             cur_channels = out_channels // 2
 
     def forward(self, src_tokens, src_lengths, **kwargs):
-
         """
         src_tokens: padded tensor (B, T, C * feat)
         src_lengths: tensor of original lengths of input utterances (B,)

diff --git a/docs/examples/speech_text_joint_to_text/criterions/multi_modality_compound.py b/docs/examples/speech_text_joint_to_text/criterions/multi_modality_compound.py
@@ -155,26 +155,32 @@ def _get_mode(logging_outputs):
         if c_total > 0:
             metrics.log_derived(
                 "uer",
-                lambda meters: safe_round(
-                    meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3
-                )
-                if meters["_c_total"].sum > 0
-                else float("nan"),
+                lambda meters: (
+                    safe_round(
+                        meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3
+                    )
+                    if meters["_c_total"].sum > 0
+                    else float("nan")
+                ),
             )
         if w_total > 0:
             metrics.log_derived(
                 "wer",
-                lambda meters: safe_round(
-                    meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3
-                )
-                if meters["_w_total"].sum > 0
-                else float("nan"),
+                lambda meters: (
+                    safe_round(
+                        meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3
+                    )
+                    if meters["_w_total"].sum > 0
+                    else float("nan")
+                ),
             )
             metrics.log_derived(
                 "raw_wer",
-                lambda meters: safe_round(
-                    meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3
-                )
-                if meters["_w_total"].sum > 0
-                else float("nan"),
+                lambda meters: (
+                    safe_round(
+                        meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3
+                    )
+                    if meters["_w_total"].sum > 0
+                    else float("nan")
+                ),
             )
diff --git a/docs/examples/speech_text_joint_to_text/criterions/text_guide_cross_entropy_acc.py b/docs/examples/speech_text_joint_to_text/criterions/text_guide_cross_entropy_acc.py
@@ -252,21 +252,27 @@ def aggregate_logging_outputs(logging_outputs):
 
         agg_output = {
             "loss": loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0,
-            "nll_loss": nll_loss_sum / sample_size / math.log(2)
-            if sample_size > 0
-            else 0.0,
+            "nll_loss": (
+                nll_loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0
+            ),
             # if args.sentence_avg, then sample_size is nsentences, and loss
             # is per-sentence loss; else sample_size is ntokens, and the loss
             # becomes per-output token loss
-            "speech_loss": speech_loss_sum / sample_size_speech / math.log(2)
-            if sample_size_speech > 0
-            else 0.0,
-            "speech_nll_loss": speech_nll_loss_sum / sample_size_speech / math.log(2)
-            if sample_size_speech > 0
-            else 0.0,
-            "speech_attn_loss": speech_attn_loss_sum / src_token_sum / math.log(2)
-            if src_token_sum > 0
-            else 0.0,
+            "speech_loss": (
+                speech_loss_sum / sample_size_speech / math.log(2)
+                if sample_size_speech > 0
+                else 0.0
+            ),
+            "speech_nll_loss": (
+                speech_nll_loss_sum / sample_size_speech / math.log(2)
+                if sample_size_speech > 0
+                else 0.0
+            ),
+            "speech_attn_loss": (
+                speech_attn_loss_sum / src_token_sum / math.log(2)
+                if src_token_sum > 0
+                else 0.0
+            ),
             "ntokens": ntokens,
             "nsentences": nsentences,
             "nframes": nframes,

diff --git a/docs/examples/speech_text_joint_to_text/data/pair_denoising_dataset.py b/docs/examples/speech_text_joint_to_text/data/pair_denoising_dataset.py
@@ -233,9 +233,9 @@ def add_whole_word_mask(self, source, p):
         source_length = source.size(0)
         assert source_length - 1 not in indices
         to_keep = torch.ones(source_length, dtype=torch.bool)
-        is_word_start[
-            -1
-        ] = 255  # acts as a long length, so spans don't go over the end of doc
+        is_word_start[-1] = (
+            255  # acts as a long length, so spans don't go over the end of doc
+        )
         if self.replace_length == 0:
             to_keep[indices] = 0
         else: