Skip to content

Commit

Permalink
Fix union type annotations for autodoc+mock-import rendering (#8956)
Browse files Browse the repository at this point in the history
Signed-off-by: Piotr Żelasko <[email protected]>
Co-authored-by: Pablo Garay <[email protected]>
Co-authored-by: Elena Rastorgueva <[email protected]>
Signed-off-by: Ao Tang <[email protected]>
  • Loading branch information
3 people authored and suiyoubi committed May 2, 2024
1 parent 27eb531 commit fcdd175
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 37 deletions.
2 changes: 1 addition & 1 deletion docs/source/asr/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Model Classes

.. _confidence-ensembles-api:

.. autoclass:: nemo.collections.asr.models.confidence_ensembles.ConfidenceEnsembleModel
.. autoclass:: nemo.collections.asr.models.confidence_ensemble.ConfidenceEnsembleModel
:show-inheritance:
:members: transcribe

Expand Down
4 changes: 4 additions & 0 deletions docs/source/asr/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -806,13 +806,17 @@ We recommend to pre-compute the bucket duration bins in order to accelerate the
The following script may be used:

.. code-block:: bash
$ python scripts/speech_recognition/estimate_duration_bins.py -b 30 manifest.json
Use the following options in your config:
num_buckets=30
bucket_duration_bins=[1.78,2.34,2.69,...
<other diagnostic information about the dataset>
For multi-dataset setups, one may provide multiple manifests and even their weights:
.. code-block:: bash
$ python scripts/speech_recognition/estimate_duration_bins.py -b 30 [[manifest.json,0.7],[other.json,0.3]]
Use the following options in your config:
num_buckets=30
Expand Down
2 changes: 1 addition & 1 deletion docs/source/asr/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ For more details about this model, see the `paper <https://arxiv.org/abs/2306.15
or read our `tutorial <https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/asr/Confidence_Ensembles.ipynb>`_.

NeMo support Confidence-based Ensembles through the
:ref:`nemo.collections.asr.models.confidence_ensembles.ConfidenceEnsembleModel <confidence-ensembles-api>` class.
:ref:`nemo.collections.asr.models.confidence_ensemble.ConfidenceEnsembleModel <confidence-ensembles-api>` class.

A typical workflow to create and use the ensemble is like this

Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/common/data/lhotse/cutset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from functools import partial
from itertools import repeat
from pathlib import Path
from typing import Sequence, Tuple
from typing import Sequence, Tuple, Union

from lhotse import CutSet, Features, Recording
from lhotse.array import Array, TemporalArray
Expand Down Expand Up @@ -200,7 +200,7 @@ def attach_tags(cut, tags: dict):


def parse_and_combine_datasets(
config_list: list[DictConfig] | ListConfig, propagate_attrs: dict
config_list: Union[list[DictConfig], ListConfig], propagate_attrs: dict
) -> tuple[CutSet, bool]:
cuts = []
weights = []
Expand Down
41 changes: 14 additions & 27 deletions nemo/collections/common/data/lhotse/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

import warnings
from dataclasses import dataclass
from functools import partial, singledispatch
from typing import Any, Optional
from functools import partial
from typing import Any, Optional, TypeVar, Union

import numpy as np
import torch
Expand Down Expand Up @@ -375,37 +375,24 @@ def measure_length(self, example: Any) -> float:
raise RuntimeError(f"Unsupported example type: {type(example)}")


# The functions below are overloads for different types of examples.
# This is required for multi-modal dataloading since we will iterate
# over a union type now.


def is_text(example) -> bool:
return isinstance(example, (TextExample, TextPairExample))


@singledispatch
def tokenize(example, tokenizer):
raise RuntimeError(f"Unsupported type of example: {type(example)}")


@tokenize.register
def _(example: Cut, tokenizer) -> Cut:
for s in example.supervisions:
s.tokens = np.asarray(tokenizer(s.text, s.language))
return example
Example = TypeVar("Example", bound=Union[Cut, TextExample, TextPairExample])


@tokenize.register
def _(example: TextExample, tokenizer) -> TextExample:
example.tokens = np.asarray(tokenizer(example.text, example.language))
return example


@tokenize.register
def _(example: TextPairExample, tokenizer) -> TextPairExample:
example.source.tokens = np.asarray(tokenizer(example.source.text, example.source.language))
example.target.tokens = np.asarray(tokenizer(example.source.text, example.target.language))
def tokenize(example: Example, tokenizer) -> Example:
if isinstance(example, Cut):
for s in example.supervisions:
s.tokens = np.asarray(tokenizer(s.text, s.language))
elif isinstance(example, TextExample):
example.tokens = np.asarray(tokenizer(example.text, example.language))
elif isinstance(example, TextPairExample):
example.source.tokens = np.asarray(tokenizer(example.source.text, example.source.language))
example.target.tokens = np.asarray(tokenizer(example.source.text, example.target.language))
else:
raise RuntimeError(f"Unsupported type of example: {type(example)}")
return example


Expand Down
12 changes: 6 additions & 6 deletions nemo/collections/common/data/lhotse/text_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import random
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, Literal
from typing import Iterator, Literal, Union

from lhotse.cut.text import TextExample, TextPairExample
from lhotse.dataset.dataloading import resolve_seed
Expand All @@ -31,10 +31,10 @@ class LhotseTextAdapter:
each line into a ``TextExample``.
"""

paths: Pathlike | list[Pathlike]
paths: Union[Pathlike, list[Pathlike]]
language: str | None = None
shuffle_shards: bool = False
shard_seed: int | Literal["trng", "randomized"] = "trng"
shard_seed: Union[int, Literal["trng", "randomized"]] = "trng"

def __post_init__(self):
self.paths = expand_sharded_filepaths(self.paths)
Expand Down Expand Up @@ -62,12 +62,12 @@ class LhotseTextPairAdapter:
with Lhotse together with training examples in audio modality.
"""

source_paths: Pathlike | list[Pathlike]
target_paths: Pathlike | list[Pathlike]
source_paths: Union[Pathlike, list[Pathlike]]
target_paths: Union[Pathlike, list[Pathlike]]
source_language: str | None = None
target_language: str | None = None
shuffle_shards: bool = False
shard_seed: int | Literal["trng", "randomized"] = "trng"
shard_seed: Union[int, Literal["trng", "randomized"]] = "trng"

def __post_init__(self):
ASSERT_MSG = "Both source and target must be a single path or lists of paths"
Expand Down

0 comments on commit fcdd175

Please sign in to comment.