From 3bb00026063d09e3d833102bfb8a35eb2055bfa3 Mon Sep 17 00:00:00 2001 From: fayejf <36722593+fayejf@users.noreply.github.com> Date: Fri, 17 Mar 2023 06:32:44 -0700 Subject: [PATCH] small bugfix and add asr evaluator to doc (#6229) * small fix eval_metrics Signed-off-by: fayejf * fix rng Signed-off-by: fayejf * add asr evaluator to doc Signed-off-by: fayejf --------- Signed-off-by: fayejf --- docs/source/tools/asr_evaluator.rst | 6 ++++++ docs/source/tools/intro.rst | 1 + nemo/collections/asr/parts/preprocessing/perturb.py | 4 ++-- tools/asr_evaluator/utils.py | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 docs/source/tools/asr_evaluator.rst diff --git a/docs/source/tools/asr_evaluator.rst b/docs/source/tools/asr_evaluator.rst new file mode 100644 index 000000000000..f40c171681d9 --- /dev/null +++ b/docs/source/tools/asr_evaluator.rst @@ -0,0 +1,6 @@ +ASR Evaluator +======================== + +ASR evaluator is a tool for thoroughly evaluating the performance of ASR models and other features such as Voice Activity Detection. + +See more details in: https://github.com/NVIDIA/NeMo/tree/stable/tools/asr_evaluator \ No newline at end of file diff --git a/docs/source/tools/intro.rst b/docs/source/tools/intro.rst index 37956e351b2a..962b0a2f716b 100644 --- a/docs/source/tools/intro.rst +++ b/docs/source/tools/intro.rst @@ -10,6 +10,7 @@ NeMo provides a set of tools useful for developing Automatic Speech Recognitions ctc_segmentation speech_data_explorer comparison_tool + asr_evaluator There are also additional NeMo-related tools hosted in separate github repositories: diff --git a/nemo/collections/asr/parts/preprocessing/perturb.py b/nemo/collections/asr/parts/preprocessing/perturb.py index d1c92dcf5e7c..801305d90b7a 100644 --- a/nemo/collections/asr/parts/preprocessing/perturb.py +++ b/nemo/collections/asr/parts/preprocessing/perturb.py @@ -362,8 +362,8 @@ def __init__( self._audiodataset = AugmentationDataset(manifest_path, audio_tar_filepaths, shuffle_n) self._data_iterator = iter(self._audiodataset) - self.rng = rng - random.seed(self.rng) if rng else None + self._rng = rng + random.seed(self._rng) if rng else None def perturb(self, data): impulse = read_one_audiosegment( diff --git a/tools/asr_evaluator/utils.py b/tools/asr_evaluator/utils.py index d2704c8a4e61..ad69b249f5db 100644 --- a/tools/asr_evaluator/utils.py +++ b/tools/asr_evaluator/utils.py @@ -407,7 +407,7 @@ def cal_target_metadata_wer(manifest: str, target: str, meta_cfg: DictConfig, ev raise ValueError("Current only support target metadata belongs to numeric or string ") for slot_key in slot_wer: - slot_wer[slot_key]['wer'] = slot_wer[slot_key]['errors'] / slot_wer[slot_key]['tokens'] + slot_wer[slot_key][eval_metric] = slot_wer[slot_key]['errors'] / slot_wer[slot_key]['tokens'] slot_wer[slot_key]['ins_rate'] = slot_wer[slot_key]['inss'] / slot_wer[slot_key]['tokens'] slot_wer[slot_key]['del_rate'] = slot_wer[slot_key]['dels'] / slot_wer[slot_key]['tokens'] slot_wer[slot_key]['sub_rate'] = slot_wer[slot_key]['subs'] / slot_wer[slot_key]['tokens']