diff --git a/nemo/collections/asr/metrics/rnnt_wer.py b/nemo/collections/asr/metrics/rnnt_wer.py index fc083a2ab3f3..97c9c4575982 100644 --- a/nemo/collections/asr/metrics/rnnt_wer.py +++ b/nemo/collections/asr/metrics/rnnt_wer.py @@ -100,16 +100,16 @@ class AbstractRNNTDecoding(ConfidenceMixin): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -140,7 +140,7 @@ class AbstractRNNTDecoding(ConfidenceMixin): timestep during greedy decoding. Setting to larger values allows longer sentences to be decoded, at the cost of increased execution time. preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. @@ -277,7 +277,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) else: self.decoding = greedy_decode.GreedyTDTInfer( @@ -291,7 +291,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) else: self.decoding = greedy_decode.GreedyMultiblankRNNTInfer( @@ -304,7 +304,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) elif self.cfg.strategy == 'greedy_batch': @@ -320,7 +320,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) else: self.decoding = greedy_decode.GreedyBatchedTDTInfer( @@ -334,7 +334,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) else: @@ -348,7 +348,7 @@ def __init__(self, decoding_cfg, decoder, joint, blank_id: int): ), preserve_alignments=self.preserve_alignments, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) elif self.cfg.strategy == 'beam': @@ -1005,16 +1005,16 @@ class RNNTDecoding(AbstractRNNTDecoding): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -1047,7 +1047,7 @@ class RNNTDecoding(AbstractRNNTDecoding): preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. diff --git a/nemo/collections/asr/metrics/rnnt_wer_bpe.py b/nemo/collections/asr/metrics/rnnt_wer_bpe.py index 40ae00b413b3..e8ea8f399b99 100644 --- a/nemo/collections/asr/metrics/rnnt_wer_bpe.py +++ b/nemo/collections/asr/metrics/rnnt_wer_bpe.py @@ -100,16 +100,16 @@ class RNNTBPEDecoding(AbstractRNNTDecoding): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -142,7 +142,7 @@ class RNNTBPEDecoding(AbstractRNNTDecoding): preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py index 7802e3b8a0c9..14fa46b308ab 100644 --- a/nemo/collections/asr/metrics/wer.py +++ b/nemo/collections/asr/metrics/wer.py @@ -258,16 +258,16 @@ class AbstractCTCDecoding(ConfidenceMixin): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -300,7 +300,7 @@ class AbstractCTCDecoding(ConfidenceMixin): preserve_alignments: Same as above, overrides above value. compute_timestamps: Same as above, overrides above value. preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. @@ -389,7 +389,7 @@ def __init__(self, decoding_cfg, blank_id: int): preserve_alignments=self.preserve_alignments, compute_timestamps=self.compute_timestamps, preserve_frame_confidence=self.preserve_frame_confidence, - confidence_measure_cfg=self.confidence_measure_cfg, + confidence_method_cfg=self.confidence_method_cfg, ) elif self.cfg.strategy == 'beam': @@ -1037,16 +1037,16 @@ class CTCDecoding(AbstractCTCDecoding): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -1079,7 +1079,7 @@ class CTCDecoding(AbstractCTCDecoding): preserve_alignments: Same as above, overrides above value. compute_timestamps: Same as above, overrides above value. preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. diff --git a/nemo/collections/asr/metrics/wer_bpe.py b/nemo/collections/asr/metrics/wer_bpe.py index 0a277e57e86a..b95bb62008ae 100644 --- a/nemo/collections/asr/metrics/wer_bpe.py +++ b/nemo/collections/asr/metrics/wer_bpe.py @@ -74,16 +74,16 @@ class CTCBPEDecoding(AbstractCTCDecoding): from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -116,7 +116,7 @@ class CTCBPEDecoding(AbstractCTCDecoding): preserve_alignments: Same as above, overrides above value. compute_timestamps: Same as above, overrides above value. preserve_frame_confidence: Same as above, overrides above value. - confidence_measure_cfg: Same as above, overrides confidence_cfg.measure_cfg. + confidence_method_cfg: Same as above, overrides confidence_cfg.method_cfg. "beam": beam_size: int, defining the beam size for beam search. Must be >= 1. diff --git a/nemo/collections/asr/models/confidence_ensemble.py b/nemo/collections/asr/models/confidence_ensemble.py index bf65ff96ef5c..dcbb0a05976c 100644 --- a/nemo/collections/asr/models/confidence_ensemble.py +++ b/nemo/collections/asr/models/confidence_ensemble.py @@ -25,7 +25,7 @@ from nemo.collections.asr.models.hybrid_rnnt_ctc_models import EncDecHybridRNNTCTCModel from nemo.collections.asr.parts.utils.asr_confidence_utils import ( ConfidenceConfig, - ConfidenceMeasureConfig, + ConfidenceMethodConfig, get_confidence_aggregation_bank, get_confidence_measure_bank, ) @@ -61,7 +61,7 @@ def to_confidence_config(self) -> ConfidenceConfig: return ConfidenceConfig( exclude_blank=self.exclude_blank, aggregation=self.aggregation, - measure_cfg=ConfidenceMeasureConfig( + method_cfg=ConfidenceMethodConfig( name=name, entropy_type=entropy_type, alpha=self.alpha, entropy_norm=entropy_norm, ), ) @@ -126,7 +126,7 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig) hypothesis: generated hypothesis as returned from the transcribe method of the ASR model. confidence_cfg: confidence config specifying what kind of - measure/aggregation should be used. + method/aggregation should be used. Returns: float: confidence score. @@ -135,12 +135,12 @@ def compute_confidence(hypothesis: Hypothesis, confidence_cfg: ConfidenceConfig) filtered_logprobs = get_filtered_logprobs(hypothesis, confidence_cfg.exclude_blank) vocab_size = filtered_logprobs.shape[1] aggr_func = get_confidence_aggregation_bank()[confidence_cfg.aggregation] - if confidence_cfg.measure_cfg.name == "max_prob": + if confidence_cfg.method_cfg.name == "max_prob": conf_type = "max_prob" alpha = 1.0 else: - conf_type = f"entropy_{confidence_cfg.measure_cfg.entropy_type}_{confidence_cfg.measure_cfg.entropy_norm}" - alpha = confidence_cfg.measure_cfg.alpha + conf_type = f"entropy_{confidence_cfg.method_cfg.entropy_type}_{confidence_cfg.method_cfg.entropy_norm}" + alpha = confidence_cfg.method_cfg.alpha conf_func = get_confidence_measure_bank()[conf_type] conf_value = aggr_func(conf_func(filtered_logprobs, v=vocab_size, t=alpha)).cpu().item() diff --git a/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py b/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py index 1f29a511fc9c..44ae9f4a134b 100644 --- a/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/ctc_greedy_decoding.py @@ -19,7 +19,7 @@ from omegaconf import DictConfig, OmegaConf from nemo.collections.asr.parts.utils import rnnt_utils -from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMeasureConfig, ConfidenceMeasureMixin +from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMethodConfig, ConfidenceMethodMixin from nemo.core.classes import Typing, typecheck from nemo.core.neural_types import HypothesisType, LengthsType, LogprobsType, NeuralType from nemo.utils import logging @@ -55,7 +55,7 @@ def _states_to_device(dec_state, device='cpu'): return dec_state -class GreedyCTCInfer(Typing, ConfidenceMeasureMixin): +class GreedyCTCInfer(Typing, ConfidenceMethodMixin): """A greedy CTC decoder. Provides a common abstraction for sample level and batch level greedy decoding. @@ -71,15 +71,15 @@ class GreedyCTCInfer(Typing, ConfidenceMeasureMixin): preserve_frame_confidence: Bool flag which preserves the history of per-frame confidence scores generated during decoding. When set to true, the Hypothesis will contain the non-null value for `frame_confidence` in it. Here, `frame_confidence` is a List of floats. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -130,7 +130,7 @@ def __init__( preserve_alignments: bool = False, compute_timestamps: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__() @@ -140,8 +140,8 @@ def __init__( self.compute_timestamps = compute_timestamps | preserve_frame_confidence self.preserve_frame_confidence = preserve_frame_confidence - # set confidence calculation measure - self._init_confidence_measure(confidence_measure_cfg) + # set confidence calculation method + self._init_confidence_method(confidence_method_cfg) @typecheck() def forward( @@ -253,27 +253,12 @@ class GreedyCTCInferConfig: preserve_alignments: bool = False compute_timestamps: bool = False preserve_frame_confidence: bool = False - confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig() - confidence_method_cfg: str = "DEPRECATED" + confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_measure_cfg - if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_measure_cfg) + self.confidence_method_cfg = OmegaConf.structured( + self.confidence_method_cfg + if isinstance(self.confidence_method_cfg, ConfidenceMethodConfig) + else ConfidenceMethodConfig(**self.confidence_method_cfg) ) - if self.confidence_method_cfg != "DEPRECATED": - logging.warning( - "`confidence_method_cfg` is deprecated and will be removed in the future. " - "Please use `confidence_measure_cfg` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.") - # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_method_cfg - if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_method_cfg) - ) diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py index dfa3ac27854b..185a3abf1151 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py @@ -35,7 +35,7 @@ from nemo.collections.asr.modules import rnnt_abstract from nemo.collections.asr.parts.utils import rnnt_utils -from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMeasureConfig, ConfidenceMeasureMixin +from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceMethodConfig, ConfidenceMethodMixin from nemo.collections.common.parts.rnn import label_collate from nemo.core.classes import Typing, typecheck from nemo.core.neural_types import AcousticEncodedRepresentation, ElementType, HypothesisType, LengthsType, NeuralType @@ -69,7 +69,7 @@ def _states_to_device(dec_state, device='cpu'): return dec_state -class _GreedyRNNTInfer(Typing, ConfidenceMeasureMixin): +class _GreedyRNNTInfer(Typing, ConfidenceMethodMixin): """A greedy transducer decoder. Provides a common abstraction for sample level and batch level greedy decoding. @@ -96,15 +96,15 @@ class _GreedyRNNTInfer(Typing, ConfidenceMeasureMixin): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -154,7 +154,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__() self.decoder = decoder_model @@ -166,8 +166,8 @@ def __init__( self.preserve_alignments = preserve_alignments self.preserve_frame_confidence = preserve_frame_confidence - # set confidence calculation measure - self._init_confidence_measure(confidence_measure_cfg) + # set confidence calculation method + self._init_confidence_method(confidence_method_cfg) def __call__(self, *args, **kwargs): return self.forward(*args, **kwargs) @@ -263,15 +263,15 @@ class GreedyRNNTInfer(_GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -305,7 +305,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -314,7 +314,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) @typecheck() @@ -502,15 +502,15 @@ class GreedyBatchedRNNTInfer(_GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -544,7 +544,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -553,7 +553,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) # Depending on availability of `blank_as_pad` support @@ -1478,15 +1478,15 @@ class GreedyMultiblankRNNTInfer(GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -1521,7 +1521,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -1530,7 +1530,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) self.big_blank_durations = big_blank_durations self._SOS = blank_index - len(big_blank_durations) @@ -1682,15 +1682,15 @@ class GreedyBatchedMultiblankRNNTInfer(GreedyBatchedRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -1725,7 +1725,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -1734,7 +1734,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) self.big_blank_durations = big_blank_durations @@ -2203,31 +2203,15 @@ class GreedyRNNTInferConfig: max_symbols_per_step: Optional[int] = 10 preserve_alignments: bool = False preserve_frame_confidence: bool = False - confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig() - confidence_method_cfg: str = "DEPRECATED" + confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_measure_cfg - if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_measure_cfg) + self.confidence_method_cfg = OmegaConf.structured( + self.confidence_method_cfg + if isinstance(self.confidence_method_cfg, ConfidenceMethodConfig) + else ConfidenceMethodConfig(**self.confidence_method_cfg) ) - if self.confidence_method_cfg != "DEPRECATED": - logging.warning( - "`confidence_method_cfg` is deprecated and will be removed in the future. " - "Please use `confidence_measure_cfg` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.") - # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_method_cfg - if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_method_cfg) - ) - self.confidence_method_cfg = "DEPRECATED" @dataclass @@ -2235,31 +2219,15 @@ class GreedyBatchedRNNTInferConfig: max_symbols_per_step: Optional[int] = 10 preserve_alignments: bool = False preserve_frame_confidence: bool = False - confidence_measure_cfg: Optional[ConfidenceMeasureConfig] = ConfidenceMeasureConfig() - confidence_method_cfg: str = "DEPRECATED" + confidence_method_cfg: Optional[ConfidenceMethodConfig] = ConfidenceMethodConfig() def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_measure_cfg - if isinstance(self.confidence_measure_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_measure_cfg) + self.confidence_method_cfg = OmegaConf.structured( + self.confidence_method_cfg + if isinstance(self.confidence_method_cfg, ConfidenceMethodConfig) + else ConfidenceMethodConfig(**self.confidence_method_cfg) ) - if self.confidence_method_cfg != "DEPRECATED": - logging.warning( - "`confidence_method_cfg` is deprecated and will be removed in the future. " - "Please use `confidence_measure_cfg` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `confidence_measure_cfg` with the value of `confidence_method_cfg`.") - # OmegaConf.structured ensures that post_init check is always executed - self.confidence_measure_cfg = OmegaConf.structured( - self.confidence_method_cfg - if isinstance(self.confidence_method_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.confidence_method_cfg) - ) - self.confidence_method_cfg = "DEPRECATED" class GreedyTDTInfer(_GreedyRNNTInfer): @@ -2288,15 +2256,15 @@ class GreedyTDTInfer(_GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -2331,7 +2299,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -2340,7 +2308,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) self.durations = durations @@ -2544,15 +2512,15 @@ class GreedyBatchedTDTInfer(_GreedyRNNTInfer): The length of the list corresponds to the Acoustic Length (T). Each value in the list (Ti) is a torch.Tensor (U), representing 1 or more confidence scores. U is the number of target tokens for the current timestep Ti. - confidence_measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + confidence_method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -2587,7 +2555,7 @@ def __init__( max_symbols_per_step: Optional[int] = None, preserve_alignments: bool = False, preserve_frame_confidence: bool = False, - confidence_measure_cfg: Optional[DictConfig] = None, + confidence_method_cfg: Optional[DictConfig] = None, ): super().__init__( decoder_model=decoder_model, @@ -2596,7 +2564,7 @@ def __init__( max_symbols_per_step=max_symbols_per_step, preserve_alignments=preserve_alignments, preserve_frame_confidence=preserve_frame_confidence, - confidence_measure_cfg=confidence_measure_cfg, + confidence_method_cfg=confidence_method_cfg, ) self.durations = durations diff --git a/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py index 958195a4bb11..0e057e012542 100644 --- a/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py +++ b/nemo/collections/asr/parts/utils/asr_confidence_benchmarking_utils.py @@ -173,11 +173,11 @@ def apply_confidence_parameters(decoding_cfg, hp): """ new_decoding_cfg = copy.deepcopy(decoding_cfg) confidence_cfg_fields = ("aggregation", "exclude_blank") - confidence_measure_cfg_fields = ("name", "alpha", "entropy_type", "entropy_norm") + confidence_method_cfg_fields = ("name", "alpha", "entropy_type", "entropy_norm") with open_dict(new_decoding_cfg): for p, v in hp.items(): if p in confidence_cfg_fields: new_decoding_cfg.confidence_cfg[p] = v - elif p in confidence_measure_cfg_fields: - new_decoding_cfg.confidence_cfg.measure_cfg[p] = v + elif p in confidence_method_cfg_fields: + new_decoding_cfg.confidence_cfg.method_cfg[p] = v return new_decoding_cfg diff --git a/nemo/collections/asr/parts/utils/asr_confidence_utils.py b/nemo/collections/asr/parts/utils/asr_confidence_utils.py index 29c49529a509..ddfac3744c6a 100644 --- a/nemo/collections/asr/parts/utils/asr_confidence_utils.py +++ b/nemo/collections/asr/parts/utils/asr_confidence_utils.py @@ -25,7 +25,7 @@ from nemo.utils import logging -class ConfidenceMeasureConstants: +class ConfidenceMethodConstants: NAMES = ("max_prob", "entropy") ENTROPY_TYPES = ("gibbs", "tsallis", "renyi") ENTROPY_NORMS = ("lin", "exp") @@ -48,17 +48,17 @@ def print(cls): @dataclass -class ConfidenceMeasureConfig: - """A Config which contains the measure name and settings to compute per-frame confidence scores. +class ConfidenceMethodConfig: + """A Config which contains the method name and settings to compute per-frame confidence scores. Args: - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. entropy_type: Which type of entropy to use (str). - Used if confidence_measure_cfg.name is set to `entropy`. + Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -92,31 +92,25 @@ class ConfidenceMeasureConfig: def __post_init__(self): if self.temperature != "DEPRECATED": - logging.warning( - "`temperature` is deprecated and will be removed in the future. Please use `alpha` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `alpha` with the value of `temperature`.") # self.temperature has type str self.alpha = float(self.temperature) self.temperature = "DEPRECATED" - if self.name not in ConfidenceMeasureConstants.NAMES: + if self.name not in ConfidenceMethodConstants.NAMES: raise ValueError( f"`name` must be one of the following: " - f"{'`' + '`, `'.join(ConfidenceMeasureConstants.NAMES) + '`'}. Provided: `{self.name}`" + f"{'`' + '`, `'.join(ConfidenceMethodConstants.NAMES) + '`'}. Provided: `{self.name}`" ) - if self.entropy_type not in ConfidenceMeasureConstants.ENTROPY_TYPES: + if self.entropy_type not in ConfidenceMethodConstants.ENTROPY_TYPES: raise ValueError( f"`entropy_type` must be one of the following: " - f"{'`' + '`, `'.join(ConfidenceMeasureConstants.ENTROPY_TYPES) + '`'}. Provided: `{self.entropy_type}`" + f"{'`' + '`, `'.join(ConfidenceMethodConstants.ENTROPY_TYPES) + '`'}. Provided: `{self.entropy_type}`" ) if self.alpha <= 0.0: raise ValueError(f"`alpha` must be > 0. Provided: {self.alpha}") - if self.entropy_norm not in ConfidenceMeasureConstants.ENTROPY_NORMS: + if self.entropy_norm not in ConfidenceMethodConstants.ENTROPY_NORMS: raise ValueError( f"`entropy_norm` must be one of the following: " - f"{'`' + '`, `'.join(ConfidenceMeasureConstants.ENTROPY_NORMS) + '`'}. Provided: `{self.entropy_norm}`" + f"{'`' + '`, `'.join(ConfidenceMethodConstants.ENTROPY_NORMS) + '`'}. Provided: `{self.entropy_norm}`" ) @@ -142,15 +136,15 @@ class ConfidenceConfig: from the `token_confidence`. aggregation: Which aggregation type to use for collapsing per-token confidence into per-word confidence. Valid options are `mean`, `min`, `max`, `prod`. - measure_cfg: A dict-like object which contains the measure name and settings to compute per-frame + method_cfg: A dict-like object which contains the method name and settings to compute per-frame confidence scores. - name: The measure name (str). + name: The method name (str). Supported values: - 'max_prob' for using the maximum token probability as a confidence. - 'entropy' for using a normalized entropy of a log-likelihood vector. - entropy_type: Which type of entropy to use (str). Used if confidence_measure_cfg.name is set to `entropy`. + entropy_type: Which type of entropy to use (str). Used if confidence_method_cfg.name is set to `entropy`. Supported values: - 'gibbs' for the (standard) Gibbs entropy. If the alpha (α) is provided, the formula is the following: H_α = -sum_i((p^α_i)*log(p^α_i)). @@ -181,34 +175,19 @@ class ConfidenceConfig: preserve_word_confidence: bool = False exclude_blank: bool = True aggregation: str = "min" - measure_cfg: ConfidenceMeasureConfig = ConfidenceMeasureConfig() - method_cfg: str = "DEPRECATED" + method_cfg: ConfidenceMethodConfig = ConfidenceMethodConfig() def __post_init__(self): # OmegaConf.structured ensures that post_init check is always executed - self.measure_cfg = OmegaConf.structured( - self.measure_cfg - if isinstance(self.measure_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.measure_cfg) + self.method_cfg = OmegaConf.structured( + self.method_cfg + if isinstance(self.method_cfg, ConfidenceMethodConfig) + else ConfidenceMethodConfig(**self.method_cfg) ) - if self.method_cfg != "DEPRECATED": - logging.warning( - "`method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead." - ) - - # TODO (alaptev): delete the following two lines sometime in the future - logging.warning("Re-writing `measure_cfg` with the value of `method_cfg`.") - # OmegaConf.structured ensures that post_init check is always executed - self.measure_cfg = OmegaConf.structured( - self.method_cfg - if isinstance(self.method_cfg, ConfidenceMeasureConfig) - else ConfidenceMeasureConfig(**self.method_cfg) - ) - self.method_cfg = "DEPRECATED" if self.aggregation not in ConfidenceConstants.AGGREGATIONS: raise ValueError( f"`aggregation` has to be one of the following: " - f"{'`' + '`, `'.join(ConfidenceMeasureConstants.AGGREGATIONS) + '`'}. Provided: `{self.aggregation}`" + f"{'`' + '`, `'.join(ConfidenceConstants.AGGREGATIONS) + '`'}. Provided: `{self.aggregation}`" ) @@ -284,7 +263,7 @@ def entropy_gibbs_exp(x, v, t): def get_confidence_aggregation_bank(): """Generate a dictionary with confidence aggregation functions. - Supported confidence measures: + Supported confidence aggregation functions: min: minimum max: maximum mean: arithmetic mean @@ -305,26 +284,26 @@ def get_confidence_aggregation_bank(): return confidence_aggregation_bank -class ConfidenceMeasureMixin(ABC): - """Confidence Measure Mixin class. +class ConfidenceMethodMixin(ABC): + """Confidence Method Mixin class. - It initializes per-frame confidence measure. + It initializes per-frame confidence method. """ - def _init_confidence_measure(self, confidence_measure_cfg: Optional[DictConfig] = None): - """Initialize per-frame confidence measure from config. + def _init_confidence_method(self, confidence_method_cfg: Optional[DictConfig] = None): + """Initialize per-frame confidence method from config. """ # OmegaConf.structured ensures that post_init check is always executed - confidence_measure_cfg = OmegaConf.structured( - ConfidenceMeasureConfig() - if confidence_measure_cfg is None - else ConfidenceMeasureConfig(**confidence_measure_cfg) + confidence_method_cfg = OmegaConf.structured( + ConfidenceMethodConfig() + if confidence_method_cfg is None + else ConfidenceMethodConfig(**confidence_method_cfg) ) - # set confidence calculation measure + # set confidence calculation method # we suppose that self.blank_id == len(vocabulary) self.num_tokens = (self.blank_id if hasattr(self, "blank_id") else self._blank_index) + 1 - self.alpha = confidence_measure_cfg.alpha + self.alpha = confidence_method_cfg.alpha # init confidence measure bank self.confidence_measure_bank = get_confidence_measure_bank() @@ -332,14 +311,14 @@ def _init_confidence_measure(self, confidence_measure_cfg: Optional[DictConfig] measure = None # construct measure_name measure_name = "" - if confidence_measure_cfg.name == "max_prob": + if confidence_method_cfg.name == "max_prob": measure_name = "max_prob" - elif confidence_measure_cfg.name == "entropy": + elif confidence_method_cfg.name == "entropy": measure_name = '_'.join( - [confidence_measure_cfg.name, confidence_measure_cfg.entropy_type, confidence_measure_cfg.entropy_norm] + [confidence_method_cfg.name, confidence_method_cfg.entropy_type, confidence_method_cfg.entropy_norm] ) else: - raise ValueError(f"Unsupported `confidence_measure_cfg.name`: `{confidence_measure_cfg.name}`") + raise ValueError(f"Unsupported `confidence_method_cfg.name`: `{confidence_method_cfg.name}`") if measure_name not in self.confidence_measure_bank: raise ValueError(f"Unsupported measure setup: `{measure_name}`") measure = partial(self.confidence_measure_bank[measure_name], v=self.num_tokens, t=self.alpha) @@ -359,7 +338,7 @@ def _init_confidence(self, confidence_cfg: Optional[DictConfig] = None): confidence_cfg = OmegaConf.structured( ConfidenceConfig() if confidence_cfg is None else ConfidenceConfig(**confidence_cfg) ) - self.confidence_measure_cfg = confidence_cfg.measure_cfg + self.confidence_method_cfg = confidence_cfg.method_cfg # extract the config self.preserve_word_confidence = confidence_cfg.get('preserve_word_confidence', False) @@ -384,11 +363,11 @@ def _init_confidence(self, confidence_cfg: Optional[DictConfig] = None): if self.cfg.strategy in ['greedy', 'greedy_batch']: self.preserve_frame_confidence = self.cfg.greedy.get('preserve_frame_confidence', False) # OmegaConf.structured ensures that post_init check is always executed - confidence_measure_cfg = OmegaConf.structured(self.cfg.greedy).get('confidence_measure_cfg', None) - self.confidence_measure_cfg = ( - OmegaConf.structured(ConfidenceMeasureConfig()) - if confidence_measure_cfg is None - else OmegaConf.structured(ConfidenceMeasureConfig(**confidence_measure_cfg)) + confidence_method_cfg = OmegaConf.structured(self.cfg.greedy).get('confidence_method_cfg', None) + self.confidence_method_cfg = ( + OmegaConf.structured(ConfidenceMethodConfig()) + if confidence_method_cfg is None + else OmegaConf.structured(ConfidenceMethodConfig(**confidence_method_cfg)) ) @abstractmethod diff --git a/scripts/confidence_ensembles/build_ensemble.py b/scripts/confidence_ensembles/build_ensemble.py index bc32a4f99840..99bfa6187b30 100644 --- a/scripts/confidence_ensembles/build_ensemble.py +++ b/scripts/confidence_ensembles/build_ensemble.py @@ -97,7 +97,7 @@ ) from nemo.collections.asr.parts.utils.asr_confidence_utils import ( ConfidenceConfig, - ConfidenceMeasureConfig, + ConfidenceMethodConfig, get_confidence_aggregation_bank, get_confidence_measure_bank, ) @@ -214,7 +214,7 @@ class BuildEnsembleConfig: preserve_frame_confidence=True, exclude_blank=True, aggregation="mean", - measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + method_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), ) temperature: float = 1.0 diff --git a/scripts/confidence_ensembles/ensemble_config.yaml b/scripts/confidence_ensembles/ensemble_config.yaml index 590318ee3b28..8184d4d5acb5 100644 --- a/scripts/confidence_ensembles/ensemble_config.yaml +++ b/scripts/confidence_ensembles/ensemble_config.yaml @@ -16,7 +16,7 @@ temperature: 1.0 confidence: exclude_blank: True aggregation: mean - measure_cfg: + method_cfg: name: entropy entropy_type: renyi alpha: 0.25 diff --git a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py index 8922fe09176d..246aa61c2c0e 100644 --- a/scripts/speech_recognition/confidence/benchmark_asr_confidence.py +++ b/scripts/speech_recognition/confidence/benchmark_asr_confidence.py @@ -83,11 +83,11 @@ def get_experiment_params(cfg): """ blank = "no_blank" if cfg.exclude_blank else "blank" aggregation = cfg.aggregation - method_name = cfg.measure_cfg.name - alpha = cfg.measure_cfg.alpha + method_name = cfg.method_cfg.name + alpha = cfg.method_cfg.alpha if method_name == "entropy": - entropy_type = cfg.measure_cfg.entropy_type - entropy_norm = cfg.measure_cfg.entropy_norm + entropy_type = cfg.method_cfg.entropy_type + entropy_norm = cfg.method_cfg.entropy_norm experiment_param_list = [ aggregation, str(cfg.exclude_blank), diff --git a/tests/collections/asr/confidence/test_asr_confidence.py b/tests/collections/asr/confidence/test_asr_confidence.py index 11b127424908..e95a0bd8127b 100644 --- a/tests/collections/asr/confidence/test_asr_confidence.py +++ b/tests/collections/asr/confidence/test_asr_confidence.py @@ -106,32 +106,21 @@ def test_run_confidence_benchmark( @pytest.mark.integration @pytest.mark.with_downloads @pytest.mark.parametrize('model_name', ("ctc", "rnnt")) - @pytest.mark.parametrize('arg', ("method_cfg", "temperature", "all")) - def test_deprecated_config_args(self, model_name, arg, conformer_ctc_bpe_model, conformer_rnnt_bpe_model): - assert ConfidenceConfig().measure_cfg.alpha == 0.33, "default `alpha` is supposed to be 0.33" + def test_deprecated_config_args(self, model_name, conformer_ctc_bpe_model, conformer_rnnt_bpe_model): + assert ConfidenceConfig().method_cfg.alpha == 0.33, "default `alpha` is supposed to be 0.33" model = conformer_ctc_bpe_model if model_name == "ctc" else conformer_rnnt_bpe_model assert isinstance(model, ASRModel) - if arg == "all": - conf = OmegaConf.create({"temperature": 0.5}) - test_args_main = {"method_cfg": conf} - test_args_greedy = {"confidence_method_cfg": conf} - elif arg == "method_cfg": - conf = OmegaConf.create({"alpha": 0.5}) - test_args_main = {"method_cfg": conf} - test_args_greedy = {"confidence_method_cfg": conf} - elif arg == "temperature": - conf = OmegaConf.create({"temperature": 0.5}) - test_args_main = {"measure_cfg": conf} - test_args_greedy = {"confidence_measure_cfg": conf} - else: - raise NotImplementedError(arg) + + conf = OmegaConf.create({"temperature": 0.5}) + test_args_main = {"method_cfg": conf} + test_args_greedy = {"confidence_method_cfg": conf} confidence_cfg = ConfidenceConfig(preserve_word_confidence=True, **test_args_main) model.change_decoding_strategy( RNNTDecodingConfig(fused_batch_size=-1, strategy="greedy", confidence_cfg=confidence_cfg) if model_name == "rnnt" else CTCDecodingConfig(confidence_cfg=confidence_cfg) ) - assert model.cfg.decoding.confidence_cfg.measure_cfg.alpha == 0.5 + assert model.cfg.decoding.confidence_cfg.method_cfg.alpha == 0.5 model.change_decoding_strategy( RNNTDecodingConfig( fused_batch_size=-1, @@ -141,4 +130,4 @@ def test_deprecated_config_args(self, model_name, arg, conformer_ctc_bpe_model, if model_name == "rnnt" else CTCDecodingConfig(greedy=GreedyCTCInferConfig(preserve_frame_confidence=True, **test_args_greedy)) ) - assert model.cfg.decoding.greedy.confidence_measure_cfg.alpha == 0.5 + assert model.cfg.decoding.greedy.confidence_method_cfg.alpha == 0.5 diff --git a/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py b/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py index 8687ed683833..22926b6516ee 100644 --- a/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py +++ b/tests/collections/asr/test_asr_hybrid_rnnt_ctc_model_char.py @@ -242,8 +242,7 @@ def test_decoding_change(self, hybrid_asr_model): @pytest.mark.unit def test_GreedyRNNTInferConfig(self): - # confidence_method_cfg is deprecated - IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg'] + IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index'] result = assert_dataclass_signature_match( greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyRNNTInferConfig, ignore_args=IGNORE_ARGS @@ -257,8 +256,7 @@ def test_GreedyRNNTInferConfig(self): @pytest.mark.unit def test_GreedyBatchedRNNTInferConfig(self): - # confidence_method_cfg is deprecated - IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg'] + IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index'] result = assert_dataclass_signature_match( greedy_decode.GreedyBatchedRNNTInfer, greedy_decode.GreedyBatchedRNNTInferConfig, ignore_args=IGNORE_ARGS diff --git a/tests/collections/asr/test_asr_rnnt_encdec_model.py b/tests/collections/asr/test_asr_rnnt_encdec_model.py index 775a146c74c4..68f1e38f797b 100644 --- a/tests/collections/asr/test_asr_rnnt_encdec_model.py +++ b/tests/collections/asr/test_asr_rnnt_encdec_model.py @@ -242,8 +242,7 @@ def test_decoding_change(self, asr_model): @pytest.mark.unit def test_GreedyRNNTInferConfig(self): - # confidence_method_cfg is deprecated - IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg'] + IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index'] result = assert_dataclass_signature_match( greedy_decode.GreedyRNNTInfer, greedy_decode.GreedyRNNTInferConfig, ignore_args=IGNORE_ARGS @@ -257,8 +256,7 @@ def test_GreedyRNNTInferConfig(self): @pytest.mark.unit def test_GreedyBatchedRNNTInferConfig(self): - # confidence_method_cfg is deprecated - IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index', 'confidence_method_cfg'] + IGNORE_ARGS = ['decoder_model', 'joint_model', 'blank_index'] result = assert_dataclass_signature_match( greedy_decode.GreedyBatchedRNNTInfer, greedy_decode.GreedyBatchedRNNTInferConfig, ignore_args=IGNORE_ARGS diff --git a/tests/collections/asr/test_confidence_ensembles.py b/tests/collections/asr/test_confidence_ensembles.py index b8b027dd3426..e926475009e2 100644 --- a/tests/collections/asr/test_confidence_ensembles.py +++ b/tests/collections/asr/test_confidence_ensembles.py @@ -19,7 +19,7 @@ from nemo.collections.asr.metrics.wer import CTCDecodingConfig from nemo.collections.asr.models import EncDecCTCModel, EncDecHybridRNNTCTCModel, EncDecRNNTModel from nemo.collections.asr.models.confidence_ensemble import ConfidenceEnsembleModel -from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, ConfidenceMeasureConfig +from nemo.collections.asr.parts.utils.asr_confidence_utils import ConfidenceConfig, ConfidenceMethodConfig def get_model_config(model_class): @@ -117,7 +117,7 @@ def test_model_creation_2models(self, tmp_path, model_class0, model_class1): preserve_frame_confidence=True, exclude_blank=True, aggregation="mean", - measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + method_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), ) # just checking that no errors are raised when creating the model @@ -148,7 +148,7 @@ def test_model_creation_5models(self, tmp_path): preserve_frame_confidence=True, exclude_blank=True, aggregation="mean", - measure_cfg=ConfidenceMeasureConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), + method_cfg=ConfidenceMethodConfig(name="entropy", entropy_type="renyi", alpha=0.25, entropy_norm="lin",), ) # just checking that no errors are raised when creating the model diff --git a/tutorials/asr/ASR_Confidence_Estimation.ipynb b/tutorials/asr/ASR_Confidence_Estimation.ipynb index 2a1ad024a889..7a92ed026f07 100644 --- a/tutorials/asr/ASR_Confidence_Estimation.ipynb +++ b/tutorials/asr/ASR_Confidence_Estimation.ipynb @@ -443,8 +443,8 @@ "from nemo.collections.asr.parts.utils.asr_confidence_utils import (\n", " ConfidenceConfig,\n", " ConfidenceConstants,\n", - " ConfidenceMeasureConfig,\n", - " ConfidenceMeasureConstants,\n", + " ConfidenceMethodConfig,\n", + " ConfidenceMethodConstants,\n", ")\n", "from nemo.collections.asr.parts.utils.asr_confidence_benchmarking_utils import (\n", " apply_confidence_parameters,\n", @@ -454,11 +454,11 @@ ")\n", "\n", "\n", - "# List allowed options for ConfidenceMeasureConfig and ConfidenceConfig\n", - "print(f\"Allowed options for ConfidenceMeasureConfig: {ConfidenceMeasureConstants.print()}\\n\")\n", + "# List allowed options for ConfidenceMethodConfig and ConfidenceConfig\n", + "print(f\"Allowed options for ConfidenceMethodConfig: {ConfidenceMethodConstants.print()}\\n\")\n", "print(f\"Allowed options for ConfidenceConfig: {ConfidenceConstants.print()}\\n\")\n", "\n", - "# Initialize ConfidenceConfig and ConfidenceMeasureConfig\n", + "# Initialize ConfidenceConfig and ConfidenceMethodConfig\n", "confidence_cfg = ConfidenceConfig(\n", " preserve_frame_confidence=True, # Internally set to true if preserve_token_confidence == True\n", " # or preserve_word_confidence == True\n", @@ -466,7 +466,7 @@ " preserve_word_confidence=True,\n", " aggregation=\"prod\", # How to aggregate frame scores to token scores and token scores to word scores\n", " exclude_blank=False, # If true, only non-blank emissions contribute to confidence scores\n", - " measure_cfg=ConfidenceMeasureConfig( # Config for per-frame scores calculation (before aggregation)\n", + " method_cfg=ConfidenceMethodConfig( # Config for per-frame scores calculation (before aggregation)\n", " name=\"max_prob\", # Or \"entropy\" (default), which usually works better\n", " entropy_type=\"gibbs\", # Used only for name == \"entropy\". Recommended: \"tsallis\" (default) or \"renyi\"\n", " alpha=0.5, # Low values (<1) increase sensitivity, high values decrease sensitivity\n", @@ -1058,7 +1058,7 @@ " preserve_word_confidence=True,\n", " preserve_token_confidence=True,\n", " aggregation=\"min\",\n", - " measure_cfg=DictConfig({\"entropy_type\": \"tsallis\", \"alpha\": 1.5, \"entropy_norm\": \"lin\"}),\n", + " method_cfg=DictConfig({\"entropy_type\": \"tsallis\", \"alpha\": 1.5, \"entropy_norm\": \"lin\"}),\n", ")\n", "\n", "model.change_decoding_strategy(\n",