From 3bb00026063d09e3d833102bfb8a35eb2055bfa3 Mon Sep 17 00:00:00 2001
From: fayejf <36722593+fayejf@users.noreply.github.com>
Date: Fri, 17 Mar 2023 06:32:44 -0700
Subject: [PATCH] small bugfix and add asr evaluator to doc (#6229)

* small fix eval_metrics

Signed-off-by: fayejf <fayejf07@gmail.com>

* fix rng

Signed-off-by: fayejf <fayejf07@gmail.com>

* add asr evaluator to doc

Signed-off-by: fayejf <fayejf07@gmail.com>

---------

Signed-off-by: fayejf <fayejf07@gmail.com>
---
 docs/source/tools/asr_evaluator.rst                 | 6 ++++++
 docs/source/tools/intro.rst                         | 1 +
 nemo/collections/asr/parts/preprocessing/perturb.py | 4 ++--
 tools/asr_evaluator/utils.py                        | 2 +-
 4 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 docs/source/tools/asr_evaluator.rst

diff --git a/docs/source/tools/asr_evaluator.rst b/docs/source/tools/asr_evaluator.rst
new file mode 100644
index 000000000000..f40c171681d9
--- /dev/null
+++ b/docs/source/tools/asr_evaluator.rst
@@ -0,0 +1,6 @@
+ASR Evaluator
+========================
+
+ASR evaluator is a tool for thoroughly evaluating the performance of ASR models and other features such as Voice Activity Detection.
+
+See more details in: https://github.com/NVIDIA/NeMo/tree/stable/tools/asr_evaluator
\ No newline at end of file
diff --git a/docs/source/tools/intro.rst b/docs/source/tools/intro.rst
index 37956e351b2a..962b0a2f716b 100644
--- a/docs/source/tools/intro.rst
+++ b/docs/source/tools/intro.rst
@@ -10,6 +10,7 @@ NeMo provides a set of tools useful for developing Automatic Speech Recognitions
    ctc_segmentation
    speech_data_explorer
    comparison_tool
+   asr_evaluator
 
 
 There are also additional NeMo-related tools hosted in separate github repositories:
diff --git a/nemo/collections/asr/parts/preprocessing/perturb.py b/nemo/collections/asr/parts/preprocessing/perturb.py
index d1c92dcf5e7c..801305d90b7a 100644
--- a/nemo/collections/asr/parts/preprocessing/perturb.py
+++ b/nemo/collections/asr/parts/preprocessing/perturb.py
@@ -362,8 +362,8 @@ def __init__(
             self._audiodataset = AugmentationDataset(manifest_path, audio_tar_filepaths, shuffle_n)
             self._data_iterator = iter(self._audiodataset)
 
-        self.rng = rng
-        random.seed(self.rng) if rng else None
+        self._rng = rng
+        random.seed(self._rng) if rng else None
 
     def perturb(self, data):
         impulse = read_one_audiosegment(
diff --git a/tools/asr_evaluator/utils.py b/tools/asr_evaluator/utils.py
index d2704c8a4e61..ad69b249f5db 100644
--- a/tools/asr_evaluator/utils.py
+++ b/tools/asr_evaluator/utils.py
@@ -407,7 +407,7 @@ def cal_target_metadata_wer(manifest: str, target: str, meta_cfg: DictConfig, ev
                     raise ValueError("Current only support target metadata belongs to numeric or string ")
 
         for slot_key in slot_wer:
-            slot_wer[slot_key]['wer'] = slot_wer[slot_key]['errors'] / slot_wer[slot_key]['tokens']
+            slot_wer[slot_key][eval_metric] = slot_wer[slot_key]['errors'] / slot_wer[slot_key]['tokens']
             slot_wer[slot_key]['ins_rate'] = slot_wer[slot_key]['inss'] / slot_wer[slot_key]['tokens']
             slot_wer[slot_key]['del_rate'] = slot_wer[slot_key]['dels'] / slot_wer[slot_key]['tokens']
             slot_wer[slot_key]['sub_rate'] = slot_wer[slot_key]['subs'] / slot_wer[slot_key]['tokens']