Skip to content

Commit

Permalink
Merge branch 'main' into nmt-bottleneck
Browse files Browse the repository at this point in the history
  • Loading branch information
michalivne authored Jul 14, 2021
2 parents 85449f3 + 6ebbcb8 commit efa5d3f
Show file tree
Hide file tree
Showing 9 changed files with 549 additions and 136 deletions.
50 changes: 0 additions & 50 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -1001,56 +1001,6 @@ pipeline {
sh 'ls -lha examples/nlp/language_modeling'
}
}
stage('L2: Pretraining BERT pretraining from Text with char tokenizer') {
steps {
sh 'cd examples/nlp/language_modeling && \
python bert_pretraining.py \
--config-name=bert_pretraining_from_text_config.yaml \
trainer.gpus=[0] \
trainer.precision=16 \
trainer.amp_level=O1 \
+trainer.fast_dev_run=true \
model.train_ds.data_file=/home/TestData/nlp/wikitext-2/train.txt \
model.train_ds.batch_size=32 \
model.validation_ds.data_file=/home/TestData/nlp/wikitext-2/valid.txt \
model.validation_ds.batch_size=32 \
model.language_model.config_file=/home/TestData/nlp/bert_configs/bert_3200.json \
model.optim.lr=0.01 \
model.optim.sched.warmup_ratio=0.1 \
model.tokenizer.tokenizer_name=char \
model.tokenizer.vocab_file=/home/TestData/nlp/vocabs/mini_vocab.txt \
model.mask_prob=0.15 \
model.short_seq_prob=0.1 \
exp_manager.exp_dir=PretrainingBERTFromTextchartok \
'
sh 'rm -rf examples/nlp/language_modeling/PretrainingBERTFromTextchartok'
}
}
stage('L2: Pretraining BERT pretraining from Text with word tokenizer') {
steps {
sh 'cd examples/nlp/language_modeling && \
python bert_pretraining.py \
--config-name=bert_pretraining_from_text_config.yaml \
trainer.gpus=[1] \
trainer.precision=16 \
trainer.amp_level=O1 \
+trainer.fast_dev_run=true \
model.train_ds.data_file=/home/TestData/nlp/wikitext-2/train.txt \
model.train_ds.batch_size=32 \
model.validation_ds.data_file=/home/TestData/nlp/wikitext-2/valid.txt \
model.validation_ds.batch_size=32 \
model.language_model.config_file=/home/TestData/nlp/bert_configs/bert_3200.json \
model.optim.lr=0.01 \
model.optim.sched.warmup_ratio=0.1 \
model.tokenizer.tokenizer_name=word \
model.tokenizer.vocab_file=/home/TestData/nlp/vocabs/mini_vocab.txt \
model.mask_prob=0.15 \
model.short_seq_prob=0.1 \
exp_manager.exp_dir=PretrainingBERTFromTextwordtok \
'
sh 'rm -rf examples/nlp/language_modeling/PretrainingBERTFromTextwordtok'
}
}
}
}

Expand Down
99 changes: 99 additions & 0 deletions examples/speaker_recognition/conf/SpeakerNet_ECAPA.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
name: "SpeakerNet"

model:

sample_rate: 16000

train_ds:
manifest_filepath: ???
sample_rate: ${model.sample_rate}
labels: null
batch_size: 64
shuffle: True
time_length: 3
augmentor:
noise:
manifest_path: null
prob: 0.5
min_snr_db: 0
max_snr_db: 15

speed:
prob: 0.5
sr: ${model.sample_rate}
resample_type: 'kaiser_fast'
min_speed_rate: 0.95
max_speed_rate: 1.05

validation_ds:
manifest_filepath: ???
sample_rate: ${model.sample_rate}
labels: null
batch_size: 128
shuffle: False
time_length: 3

preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
normalize: "per_feature"
window_size: 0.025
sample_rate: ${model.sample_rate}
window_stride: 0.01
window: "hann"
features: 80
n_fft: 512
frame_splicing: 1
dither: 0.00001
stft_conv: false

encoder:
_target_: nemo.collections.asr.modules.ECAPAEncoder
feat_in: ${model.preprocessor.features}
filters: [1024,1024,1024,1024,3072]
kernel_sizes: [5,3,3,3,1]
dilations: [1,1,1,1,1]
scale: 8


decoder:
_target_: nemo.collections.asr.modules.SpeakerDecoder
feat_in: 3072
num_classes: 7205
pool_mode: 'attention' #xvector,tap or attention
emb_sizes: 192
angular: True

loss:
scale: 30
margin: 0.2

optim:
name: sgd
lr: 0.008
weight_decay: 0.0002

# scheduler setup
sched:
name: CosineAnnealing
warmup_ratio: 0.1
min_lr: 0.0001

trainer:
gpus: 1 # number of gpus (trained on single node - 8 gpus)
max_epochs: 200
max_steps: null # computed at runtime if not set
num_nodes: 1
accelerator: ddp
accumulate_grad_batches: 1
amp_level: O0
deterministic: False
checkpoint_callback: False
logger: False
log_every_n_steps: 1 # Interval of logging.
val_check_interval: 1.0 # Set to 0.25 to check 4 times per epoch, or an int for number of iterations

exp_manager:
exp_dir: null
name: ${name}
create_tensorboard_logger: True
create_checkpoint_callback: True
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ model:
feat_in: *enc_feat_out
num_classes: 7205
pool_mode: 'xvector'
emb_sizes: 512,512
emb_sizes: [512,512]
angular: False

loss:
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/asr/models/label_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ def forward(self, input_signal, input_signal_length):
input_signal=input_signal, length=input_signal_length,
)

encoded, _ = self.encoder(audio_signal=processed_signal, length=processed_signal_len)
logits, embs = self.decoder(encoder_output=encoded)
encoded, length = self.encoder(audio_signal=processed_signal, length=processed_signal_len)
logits, embs = self.decoder(encoder_output=encoded, length=length)
return logits, embs

# PTL-specific methods
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/asr/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ConvASRDecoder,
ConvASRDecoderClassification,
ConvASREncoder,
ECAPAEncoder,
SpeakerDecoder,
)
from nemo.collections.asr.modules.lstm_decoder import LSTMDecoder
Expand Down
Loading

0 comments on commit efa5d3f

Please sign in to comment.