Skip to content

Commit

Permalink
black format
Browse files Browse the repository at this point in the history
  • Loading branch information
VarunGumma committed Jun 10, 2024
1 parent 561ab6a commit 9c25756
Show file tree
Hide file tree
Showing 142 changed files with 897 additions and 732 deletions.
6 changes: 3 additions & 3 deletions docs/examples/MMPT/mmpt/models/mmfusionnlg.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,9 +640,9 @@ def generate(
)

# expand encoder_outputs
encoder_outputs[
"last_hidden_state"
] = encoder_outputs.last_hidden_state.index_select(0, expanded_batch_idxs)
encoder_outputs["last_hidden_state"] = (
encoder_outputs.last_hidden_state.index_select(0, expanded_batch_idxs)
)

# save encoder_outputs in `model_kwargs`
model_kwargs["encoder_outputs"] = encoder_outputs
Expand Down
1 change: 0 additions & 1 deletion docs/examples/MMPT/mmpt/modules/vectorpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def build_retriver(
db_type="flatl2",
examples_per_cent_to_train=48,
):

"""merge results from multiple gpus and return a retriver.."""
self.retriver = retriever_cls(
hidden_size, centroids, db_type, examples_per_cent_to_train
Expand Down
18 changes: 10 additions & 8 deletions docs/examples/MMPT/mmpt/processors/how2processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,14 +635,16 @@ def get_special_tokens_mask(
)
return list(
map(
lambda x: 1
if x
in [
self.tokenizer.sep_token_id,
self.tokenizer.cls_token_id,
self.tokenizer.pad_token_id,
]
else 0,
lambda x: (
1
if x
in [
self.tokenizer.sep_token_id,
self.tokenizer.cls_token_id,
self.tokenizer.pad_token_id,
]
else 0
),
token_ids_0,
)
)
Expand Down
6 changes: 3 additions & 3 deletions docs/examples/MMPT/scripts/video_feature_extractor/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@
) # (51, 487), (51, 512)
if args.l2_normalize:
batch_features = F.normalize(batch_features, dim=1)
features[
i * args.batch_size : (i + 1) * args.batch_size
] = batch_features
features[i * args.batch_size : (i + 1) * args.batch_size] = (
batch_features
)
features = features.cpu().numpy()
if args.half_precision:
if args.type == "vae":
Expand Down
8 changes: 5 additions & 3 deletions docs/examples/data2vec/models/data2vec_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,11 @@ def forward(

result = {
"losses": {
"main": loss.sum() / math.sqrt(sz)
if self.loss_scale <= 0
else loss.sum() * self.loss_scale,
"main": (
loss.sum() / math.sqrt(sz)
if self.loss_scale <= 0
else loss.sum() * self.loss_scale
),
},
"sample_size": loss.numel(),
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,11 @@ def __init__(self, args, task):
dropout=args.dropout,
attention_dropout=args.attention_dropout,
activation_dropout=args.activation_dropout,
max_seq_len=task.max_positions()
if task.max_positions()
else args.tokens_per_sample,
max_seq_len=(
task.max_positions()
if task.max_positions()
else args.tokens_per_sample
),
num_segments=2,
offset_positions_by_padding=False,
encoder_normalize_before=args.encoder_normalize_before,
Expand Down
6 changes: 3 additions & 3 deletions docs/examples/fast_noisy_channel/noisy_channel_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ def build_generator(

for model in channel_models:
model.make_generation_fast_(
beamable_mm_beam_size=None
if args.no_beamable_mm
else args.beam,
beamable_mm_beam_size=(
None if args.no_beamable_mm else args.beam
),
need_attn=args.print_alignment,
)
if self.args.fp16:
Expand Down
14 changes: 8 additions & 6 deletions docs/examples/laser/laser_src/laser_lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,9 @@ def combine_bidir(outs):
return {
"sentemb": sentemb,
"encoder_out": (x, final_hiddens, final_cells),
"encoder_padding_mask": encoder_padding_mask
if encoder_padding_mask.any()
else None,
"encoder_padding_mask": (
encoder_padding_mask if encoder_padding_mask.any() else None
),
}

def reorder_encoder_out(self, encoder_out_dict, new_order):
Expand Down Expand Up @@ -372,9 +372,11 @@ def __init__(
self.layers = nn.ModuleList(
[
LSTMCell(
input_size=encoder_output_units + embed_dim + lang_embed_dim
if layer == 0
else hidden_size,
input_size=(
encoder_output_units + embed_dim + lang_embed_dim
if layer == 0
else hidden_size
),
hidden_size=hidden_size,
)
for layer in range(num_layers)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,11 @@ def sample(self, logit_idx):
"""
assert logit_idx is not None
self.samples = self._gumbel_sigmoid(
self.layer_logits[logit_idx, :].detach()
if self.detach_grad
else self.layer_logits[logit_idx, :],
(
self.layer_logits[logit_idx, :].detach()
if self.detach_grad
else self.layer_logits[logit_idx, :]
),
dim=-1,
tau=self.tau,
hard=self.hard_select,
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/m2m_100/process_data/clean_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def read_hist(f):
return ch


with (open("{}/{}".format(args.histograms, args.src), "r", encoding="utf8")) as f:
with open("{}/{}".format(args.histograms, args.src), "r", encoding="utf8") as f:
ch1 = read_hist(f)

with (open("{}/{}".format(args.histograms, args.tgt), "r", encoding="utf8")) as f:
with open("{}/{}".format(args.histograms, args.tgt), "r", encoding="utf8") as f:
ch2 = read_hist(f)

print("Accepted characters for {}: {}".format(args.src, ch1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,13 @@ def remove_messed_up_sentences(
corrected_src = f"{to_folder}/{split}.{direction}.{src_lang}"
line_num = 0
keep_num = 0
with open(src, encoding="utf8",) as fsrc, open(tgt, encoding="utf8",) as ftgt, open(
with open(
src,
encoding="utf8",
) as fsrc, open(
tgt,
encoding="utf8",
) as ftgt, open(
corrected_src, "w", encoding="utf8"
) as fsrc_corrected, open(corrected_tgt, "w", encoding="utf8") as ftgt_corrected:
for s, t in zip(fsrc, ftgt):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,23 @@ def main():

with contextlib.ExitStack() as stack:
inputs = [
stack.enter_context(
open(input, "r", encoding="utf-8", newline="\n", errors="replace")
(
stack.enter_context(
open(input, "r", encoding="utf-8", newline="\n", errors="replace")
)
if input != "-"
else io.TextIOWrapper(
sys.stdin.buffer, encoding="utf-8", errors="replace"
)
)
if input != "-"
else io.TextIOWrapper(sys.stdin.buffer, encoding="utf-8", errors="replace")
for input in args.inputs
]
outputs = [
stack.enter_context(open(output, "w", encoding="utf-8", newline="\n"))
if output != "-"
else sys.stdout
(
stack.enter_context(open(output, "w", encoding="utf-8", newline="\n"))
if output != "-"
else sys.stdout
)
for output in args.outputs
]
with Pool(args.num_workers, initializer=partial(init, args.model)) as p:
Expand Down
1 change: 0 additions & 1 deletion docs/examples/noisychannel/rerank_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ def write_reprocessed(
target_prefix_frac=None,
source_prefix_frac=None,
):

"""writes nbest hypothesis for rescoring"""
assert not (
prefix_len is not None and target_prefix_frac is not None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,7 @@ class Embedding(nn.Embedding):
output. The gradient for this vector from :class:`~torch.nn.Embedding`
is always zero.
"""

__constants__ = ["unk_idx"]

# Torchscript: Inheriting from Embedding class produces an error when exporting to Torchscript
Expand Down
16 changes: 10 additions & 6 deletions docs/examples/roberta/multiprocessing_bpe_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,19 @@ def main():

with contextlib.ExitStack() as stack:
inputs = [
stack.enter_context(open(input, "r", encoding="utf-8"))
if input != "-"
else sys.stdin
(
stack.enter_context(open(input, "r", encoding="utf-8"))
if input != "-"
else sys.stdin
)
for input in args.inputs
]
outputs = [
stack.enter_context(open(output, "w", encoding="utf-8"))
if output != "-"
else sys.stdout
(
stack.enter_context(open(output, "w", encoding="utf-8"))
if output != "-"
else sys.stdout
)
for output in args.outputs
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,11 @@ def forward(self, src_tokens, src_lengths):
"encoder_out": [output],
# This is because that in the original implementation
# the output didn't consider the last segment as right context.
"encoder_padding_mask": [encoder_padding_masks[0][:, : output.size(0)]]
if len(encoder_padding_masks) > 0
else [],
"encoder_padding_mask": (
[encoder_padding_masks[0][:, : output.size(0)]]
if len(encoder_padding_masks) > 0
else []
),
"encoder_embedding": [],
"encoder_states": [],
"src_tokens": [],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,9 @@ def extract_features(
encoder_out=encoder_outs,
encoder_padding_mask=encoder_padding_mask,
incremental_state=incremental_state,
self_attn_mask=self.buffered_future_mask(x)
if incremental_state is None
else None,
self_attn_mask=(
self.buffered_future_mask(x) if incremental_state is None else None
),
)

inner_states.append(x)
Expand Down
6 changes: 3 additions & 3 deletions docs/examples/speech_recognition/models/vggtransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,9 @@ def forward(self, src_tokens, src_lengths, **kwargs):

return {
"encoder_out": x, # (T, B, C)
"encoder_padding_mask": encoder_padding_mask.t()
if encoder_padding_mask is not None
else None,
"encoder_padding_mask": (
encoder_padding_mask.t() if encoder_padding_mask is not None else None
),
# (B, T) --> (T, B)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ def __init__(
cur_channels = out_channels // 2

def forward(self, src_tokens, src_lengths, **kwargs):

"""
src_tokens: padded tensor (B, T, C * feat)
src_lengths: tensor of original lengths of input utterances (B,)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,26 +155,32 @@ def _get_mode(logging_outputs):
if c_total > 0:
metrics.log_derived(
"uer",
lambda meters: safe_round(
meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3
)
if meters["_c_total"].sum > 0
else float("nan"),
lambda meters: (
safe_round(
meters["_c_errors"].sum * 100.0 / meters["_c_total"].sum, 3
)
if meters["_c_total"].sum > 0
else float("nan")
),
)
if w_total > 0:
metrics.log_derived(
"wer",
lambda meters: safe_round(
meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3
)
if meters["_w_total"].sum > 0
else float("nan"),
lambda meters: (
safe_round(
meters["_w_errors"].sum * 100.0 / meters["_w_total"].sum, 3
)
if meters["_w_total"].sum > 0
else float("nan")
),
)
metrics.log_derived(
"raw_wer",
lambda meters: safe_round(
meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3
)
if meters["_w_total"].sum > 0
else float("nan"),
lambda meters: (
safe_round(
meters["_wv_errors"].sum * 100.0 / meters["_w_total"].sum, 3
)
if meters["_w_total"].sum > 0
else float("nan")
),
)
Original file line number Diff line number Diff line change
Expand Up @@ -252,21 +252,27 @@ def aggregate_logging_outputs(logging_outputs):

agg_output = {
"loss": loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0,
"nll_loss": nll_loss_sum / sample_size / math.log(2)
if sample_size > 0
else 0.0,
"nll_loss": (
nll_loss_sum / sample_size / math.log(2) if sample_size > 0 else 0.0
),
# if args.sentence_avg, then sample_size is nsentences, and loss
# is per-sentence loss; else sample_size is ntokens, and the loss
# becomes per-output token loss
"speech_loss": speech_loss_sum / sample_size_speech / math.log(2)
if sample_size_speech > 0
else 0.0,
"speech_nll_loss": speech_nll_loss_sum / sample_size_speech / math.log(2)
if sample_size_speech > 0
else 0.0,
"speech_attn_loss": speech_attn_loss_sum / src_token_sum / math.log(2)
if src_token_sum > 0
else 0.0,
"speech_loss": (
speech_loss_sum / sample_size_speech / math.log(2)
if sample_size_speech > 0
else 0.0
),
"speech_nll_loss": (
speech_nll_loss_sum / sample_size_speech / math.log(2)
if sample_size_speech > 0
else 0.0
),
"speech_attn_loss": (
speech_attn_loss_sum / src_token_sum / math.log(2)
if src_token_sum > 0
else 0.0
),
"ntokens": ntokens,
"nsentences": nsentences,
"nframes": nframes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,9 @@ def add_whole_word_mask(self, source, p):
source_length = source.size(0)
assert source_length - 1 not in indices
to_keep = torch.ones(source_length, dtype=torch.bool)
is_word_start[
-1
] = 255 # acts as a long length, so spans don't go over the end of doc
is_word_start[-1] = (
255 # acts as a long length, so spans don't go over the end of doc
)
if self.replace_length == 0:
to_keep[indices] = 0
else:
Expand Down
Loading

0 comments on commit 9c25756

Please sign in to comment.