Skip to content

Commit a69e359

Browse files
antejunithinraokericharpertitu1994
authored andcommitted
[ASR] Add pretrained ASR models for Croatian (#4682)
* [ASR] Add pretrained ASR models for Croatian Signed-off-by: Ante Jukić <[email protected]> * Fix style for import Signed-off-by: Ante Jukić <[email protected]> Signed-off-by: Ante Jukić <[email protected]> Co-authored-by: Ante Jukić <[email protected]> Co-authored-by: Nithin Rao <[email protected]> Co-authored-by: Eric Harper <[email protected]> Co-authored-by: Somshubra Majumdar <[email protected]>
1 parent 3ccac78 commit a69e359

File tree

6 files changed

+33
-36
lines changed

6 files changed

+33
-36
lines changed

docs/source/asr/data/benchmark_hr.csv

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Model,Model Base Class,Model Card
2+
stt_hr_conformer_ctc_large,EncDecCTCModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_conformer_ctc_large"
3+
stt_hr_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_conformer_transducer_large"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Model Name,Language,ParlaSpeech-HR v1.0 (dev),ParlaSpeech-HR v1.0 (test)
2+
stt_hr_conformer_ctc_large,hr,4.43,4.70
3+
stt_hr_conformer_transducer_large,hr,4.56,4.69

docs/source/asr/scores.rst

+10
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,16 @@ FR
169169

170170
--------------------
171171

172+
HR
173+
^^
174+
175+
.. csv-table::
176+
:header-rows: 1
177+
:align: left
178+
:file: data/scores/hr/conformer_hr.csv
179+
180+
--------------------
181+
172182
IT
173183
^^
174184

nemo/collections/asr/models/ctc_bpe_models.py

+3-11
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,6 @@ def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLo
186186
'shuffle': False,
187187
'num_workers': config.get('num_workers', min(batch_size, os.cpu_count() - 1)),
188188
'pin_memory': True,
189-
'channel_selector': config.get('channel_selector', None),
190189
'use_start_end_token': self.cfg.validation_ds.get('use_start_end_token', False),
191190
}
192191

@@ -536,16 +535,9 @@ def list_available_models(cls) -> Optional[PretrainedModelInfo]:
536535
results.append(model)
537536

538537
model = PretrainedModelInfo(
539-
pretrained_model_name="stt_enes_conformer_ctc_large_codesw",
540-
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_enes_conformer_ctc_large_codesw",
541-
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_enes_conformer_ctc_large_codesw/versions/1.0.0/files/stt_enes_conformer_ctc_large_codesw.nemo",
542-
)
543-
results.append(model)
544-
545-
model = PretrainedModelInfo(
546-
pretrained_model_name="stt_be_conformer_ctc_large",
547-
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_be_conformer_ctc_large",
548-
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_be_conformer_ctc_large/versions/1.12.0/files/stt_be_conformer_ctc_large.nemo",
538+
pretrained_model_name="stt_hr_conformer_ctc_large",
539+
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_conformer_ctc_large",
540+
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_hr_conformer_ctc_large/versions/1.11.0/files/stt_hr_conformer_ctc_large.nemo",
549541
)
550542
results.append(model)
551543

nemo/collections/asr/models/rnnt_bpe_models.py

+3-18
Original file line numberDiff line numberDiff line change
@@ -198,23 +198,9 @@ def list_available_models(cls) -> List[PretrainedModelInfo]:
198198
results.append(model)
199199

200200
model = PretrainedModelInfo(
201-
pretrained_model_name="stt_enes_conformer_transducer_large_codesw",
202-
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_enes_conformer_transducer_large_codesw",
203-
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_enes_conformer_transducer_large_codesw/versions/1.0.0/files/stt_enes_conformer_transducer_large_codesw.nemo",
204-
)
205-
results.append(model)
206-
207-
model = PretrainedModelInfo(
208-
pretrained_model_name="stt_kab_conformer_transducer_large",
209-
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_kab_conformer_transducer_large",
210-
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_kab_conformer_transducer_large/versions/1.12.0/files/stt_kab_conformer_transducer_large.nemo",
211-
)
212-
results.append(model)
213-
214-
model = PretrainedModelInfo(
215-
pretrained_model_name="stt_be_conformer_transducer_large",
216-
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_be_conformer_transducer_large",
217-
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_be_conformer_transducer_large/versions/1.12.0/files/stt_be_conformer_transducer_large.nemo",
201+
pretrained_model_name="stt_hr_conformer_transducer_large",
202+
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_hr_conformer_transducer_large",
203+
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_hr_conformer_transducer_large/versions/1.11.0/files/stt_hr_conformer_transducer_large.nemo",
218204
)
219205
results.append(model)
220206

@@ -508,7 +494,6 @@ def _setup_transcribe_dataloader(self, config: Dict) -> 'torch.utils.data.DataLo
508494
'shuffle': False,
509495
'num_workers': config.get('num_workers', min(batch_size, os.cpu_count() - 1)),
510496
'pin_memory': True,
511-
'channel_selector': config.get('channel_selector', None),
512497
'use_start_end_token': self.cfg.validation_ds.get('use_start_end_token', False),
513498
}
514499

scripts/checkpoint_averaging/checkpoint_averaging.py

+11-7
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import sys
3636

3737
import torch
38+
from tqdm.auto import tqdm
3839

3940
from nemo.core import ModelPT
4041
from nemo.utils import logging, model_utils
@@ -44,13 +45,14 @@ def main():
4445
parser = argparse.ArgumentParser()
4546
parser.add_argument(
4647
'model_fname_list',
47-
metavar='N',
48+
metavar='NEMO_FILE_OR_FOLDER',
4849
type=str,
4950
nargs='+',
5051
help='Input .nemo files (or folders who contains them) to parse',
5152
)
5253
parser.add_argument(
5354
'--import_fname_list',
55+
metavar='FILE',
5456
type=str,
5557
nargs='+',
5658
default=[],
@@ -59,7 +61,7 @@ def main():
5961
args = parser.parse_args()
6062

6163
logging.info(
62-
f"\n\nIMPORTANT: Use --import_fname_list for all files that contain missing classes (AttributeError: Can't get attribute '???' on <module '__main__' from '???'>)\n\n"
64+
f"\n\nIMPORTANT:\nIf you get the following error:\n\t(AttributeError: Can't get attribute '???' on <module '__main__' from '???'>)\nuse:\n\t--import_fname_list\nfor all files that contain missing classes.\n\n"
6365
)
6466

6567
for fn in args.import_fname_list:
@@ -77,7 +79,7 @@ def main():
7779
filter(lambda fn: not fn.endswith("-averaged.nemo"), glob.glob(os.path.join(model_fname, "*.nemo")))
7880
)
7981
if len(nemo_files) != 1:
80-
raise RuntimeError(f"Expected only a single .nemo files but discovered {len(nemo_files)} .nemo files")
82+
raise RuntimeError(f"Expected exactly one .nemo file but discovered {len(nemo_files)} .nemo files")
8183

8284
model_fname = nemo_files[0]
8385

@@ -107,23 +109,25 @@ def main():
107109

108110
logging.info(f"Averaging {n} checkpoints ...")
109111

110-
for ix, path in enumerate(checkpoint_paths):
112+
for ix, path in enumerate(tqdm(checkpoint_paths, total=n, desc='Averaging checkpoints')):
111113
checkpoint = torch.load(path, map_location=device)
112114

113115
if 'state_dict' in checkpoint:
114116
checkpoint = checkpoint['state_dict']
117+
else:
118+
raise RuntimeError(f"Checkpoint from {path} does not include a state_dict.")
115119

116120
if ix == 0:
117121
# Initial state
118122
avg_state = checkpoint
119123

120-
logging.info(f"Initialized average state dict with checkpoint : {path}")
124+
logging.info(f"Initialized average state dict with checkpoint:\n\t{path}")
121125
else:
122126
# Accumulated state
123127
for k in avg_state:
124128
avg_state[k] = avg_state[k] + checkpoint[k]
125129

126-
logging.info(f"Updated average state dict with state from checkpoint : {path}")
130+
logging.info(f"Updated average state dict with state from checkpoint:\n\t{path}")
127131

128132
for k in avg_state:
129133
if str(avg_state[k].dtype).startswith("torch.int"):
@@ -136,7 +140,7 @@ def main():
136140
# restore merged weights into model
137141
nemo_model.load_state_dict(avg_state, strict=True)
138142
# Save model
139-
logging.info(f"Saving average mdel to: {avg_model_fname}")
143+
logging.info(f"Saving average model to:\n\t{avg_model_fname}")
140144
nemo_model.save_to(avg_model_fname)
141145

142146

0 commit comments

Comments
 (0)