diff --git a/Jenkinsfile b/Jenkinsfile index 27aca951a5bd..5916ed6e797a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1035,7 +1035,7 @@ pipeline { steps { sh 'cd examples/tts/g2p && \ TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR=output_${TIME} && \ - python heteronym_classification_train_and_evaluate.py \ + python g2p_heteronym_classification_train_and_evaluate.py \ train_manifest=/home/TestData/g2p/manifest.json \ validation_manifest=/home/TestData/g2p/manifest.json \ test_manifest=/home/TestData/g2p/manifest.json \ @@ -1047,7 +1047,7 @@ pipeline { exp_manager.exp_dir=${OUTPUT_DIR} \ +exp_manager.use_datetime_version=False\ +exp_manager.version=test && \ - python heteronym_classification_inference.py \ + python g2p_heteronym_classification_inference.py \ manifest=/home/TestData/g2p/manifest.json \ pretrained_model=${OUTPUT_DIR}/HeteronymClassification/test/checkpoints/HeteronymClassification.nemo \ output_manifest=preds.json' diff --git a/docs/source/tts/g2p.rst b/docs/source/tts/g2p.rst index 5b04db59d7da..53a95eec8ffa 100644 --- a/docs/source/tts/g2p.rst +++ b/docs/source/tts/g2p.rst @@ -59,7 +59,7 @@ To train ByT5 G2P model and evaluate it after at the end of the training, run: do_training=True \ do_testing=True -Example of the config file: ``NeMo/examples/text_processing/g2p/conf/t5_g2p.yaml``. +Example of the config file: ``NeMo/examples/tts/g2p/conf/g2p_t5.yaml``. To train G2P-Conformer model and evaluate it after at the end of the training, run: @@ -168,7 +168,7 @@ To train the model, run: .. code-block:: - python heteronym_classification_train_and_evaluate.py \ + python g2p_heteronym_classification_train_and_evaluate.py \ train_manifest=" \ validation_manifest=" \ model.wordids= \ @@ -179,7 +179,7 @@ To train the model and evaluate it when the training is complete, run: .. code-block:: - python heteronym_classification_train_and_evaluate.py \ + python g2p_heteronym_classification_train_and_evaluate.py \ train_manifest=" \ validation_manifest=" \ model.test_ds.dataset.manifest=" \ @@ -191,7 +191,7 @@ To evaluate pretrained model, run: .. code-block:: - python heteronym_classification_train_and_evaluate.py \ + python g2p_heteronym_classification_train_and_evaluate.py \ do_training=False \ do_testing=True \ model.test_ds.dataset.manifest=" \ @@ -201,7 +201,7 @@ To run inference with a pretrained HeteronymClassificationModel, run: .. code-block:: - python heteronym_classification_inference.py \ + python g2p_heteronym_classification_inference.py \ manifest="" \ pretrained_model="" \ output_file="" diff --git a/examples/tts/g2p/conf/heteronym_classification.yaml b/examples/tts/g2p/conf/g2p_heteronym_classification.yaml similarity index 100% rename from examples/tts/g2p/conf/heteronym_classification.yaml rename to examples/tts/g2p/conf/g2p_heteronym_classification.yaml diff --git a/examples/tts/g2p/conf/t5_g2p.yaml b/examples/tts/g2p/conf/g2p_t5.yaml similarity index 100% rename from examples/tts/g2p/conf/t5_g2p.yaml rename to examples/tts/g2p/conf/g2p_t5.yaml diff --git a/examples/tts/g2p/heteronym_classification_inference.py b/examples/tts/g2p/g2p_heteronym_classification_inference.py similarity index 97% rename from examples/tts/g2p/heteronym_classification_inference.py rename to examples/tts/g2p/g2p_heteronym_classification_inference.py index 931f01439d3d..5980f8de1fa1 100644 --- a/examples/tts/g2p/heteronym_classification_inference.py +++ b/examples/tts/g2p/g2p_heteronym_classification_inference.py @@ -22,7 +22,7 @@ import torch from omegaconf import OmegaConf -from nemo.collections.tts.g2p.models.heteronym_classification import HeteronymClassificationModel +from nemo.collections.tts.models.g2p_heteronym_classification import HeteronymClassificationModel from nemo.core.config import hydra_runner from nemo.utils import logging @@ -34,7 +34,7 @@ Inference form manifest: -python heteronym_classification_inference.py \ +python g2p_heteronym_classification_inference.py \ manifest="" \ pretrained_model="" \ output_manifest="" \ @@ -42,7 +42,7 @@ Interactive inference: -python heteronym_classification_inference.py \ +python g2p_heteronym_classification_inference.py \ pretrained_model="" \ wordid_to_phonemes_file="" # Optional diff --git a/examples/tts/g2p/heteronym_classification_train_and_evaluate.py b/examples/tts/g2p/g2p_heteronym_classification_train_and_evaluate.py similarity index 93% rename from examples/tts/g2p/heteronym_classification_train_and_evaluate.py rename to examples/tts/g2p/g2p_heteronym_classification_train_and_evaluate.py index 171760f1cd3d..492c87fefc95 100644 --- a/examples/tts/g2p/heteronym_classification_train_and_evaluate.py +++ b/examples/tts/g2p/g2p_heteronym_classification_train_and_evaluate.py @@ -18,7 +18,7 @@ import torch from nemo.collections.common.callbacks import LogEpochTimeCallback -from nemo.collections.tts.g2p.models.heteronym_classification import HeteronymClassificationModel +from nemo.collections.tts.models.g2p_heteronym_classification import HeteronymClassificationModel from nemo.core.config import hydra_runner from nemo.utils import logging from nemo.utils.exp_manager import exp_manager @@ -29,14 +29,14 @@ To prepare dataset, see NeMo/scripts/dataset_processing/g2p/export_wikihomograph_data_to_manifest.py To run training: -python heteronym_classification_train_and_evaluate.py \ +python g2p_heteronym_classification_train_and_evaluate.py \ train_manifest=" \ validation_manifest=" \ model.wordids="" \ do_training=True To run training and testing (once the training is complete): -python heteronym_classification_train_and_evaluate.py \ +python g2p_heteronym_classification_train_and_evaluate.py \ train_manifest=" \ validation_manifest=" \ model.test_ds.dataset.manifest=" \ @@ -45,7 +45,7 @@ do_testing=True To run testing: -python heteronym_classification_train_and_evaluate.py \ +python g2p_heteronym_classification_train_and_evaluate.py \ do_training=False \ do_testing=True \ model.test_ds.dataset.manifest=" \ @@ -60,7 +60,7 @@ """ -@hydra_runner(config_path="conf", config_name="heteronym_classification.yaml") +@hydra_runner(config_path="conf", config_name="g2p_heteronym_classification.yaml") def main(cfg): trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) diff --git a/examples/tts/g2p/g2p_inference.py b/examples/tts/g2p/g2p_inference.py index 4b8f48ef1319..e7bffa888653 100644 --- a/examples/tts/g2p/g2p_inference.py +++ b/examples/tts/g2p/g2p_inference.py @@ -21,7 +21,7 @@ from omegaconf import OmegaConf from utils import get_metrics -from nemo.collections.tts.g2p.models.g2p_model import G2PModel +from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging diff --git a/examples/tts/g2p/g2p_train_and_evaluate.py b/examples/tts/g2p/g2p_train_and_evaluate.py index d7a98a4d77ad..ff7b2b0675ea 100644 --- a/examples/tts/g2p/g2p_train_and_evaluate.py +++ b/examples/tts/g2p/g2p_train_and_evaluate.py @@ -19,14 +19,14 @@ from utils import get_model from nemo.collections.common.callbacks import LogEpochTimeCallback -from nemo.collections.tts.g2p.models.g2p_model import G2PModel +from nemo.collections.tts.models.base import G2PModel from nemo.core.config import hydra_runner from nemo.utils import logging, model_utils from nemo.utils.exp_manager import exp_manager """ This script supports training of G2PModels -(for T5G2PModel use t5_g2p.yaml, for CTCG2PModel use either g2p_conformer.yaml or g2p_t5_ctc.yaml) +(for T5G2PModel use g2p_t5.yaml, for CTCG2PModel use either g2p_conformer.yaml or g2p_t5_ctc.yaml) # Training T5G2PModel and evaluation at the end of training: python examples/text_processing/g2p/g2p_train_and_evaluate.py \ @@ -38,7 +38,7 @@ do_training=True \ do_testing=True - Example of the config file: NeMo/examples/text_processing/g2p/conf/t5_g2p.yaml + Example of the config file: NeMo/examples/tts/g2p/conf/g2p_t5.yaml # Training Conformer-G2P Model and evaluation at the end of training: python examples/text_processing/g2p/g2p_train_and_evaluate.py \ @@ -64,7 +64,7 @@ """ -@hydra_runner(config_path="conf", config_name="t5_g2p") +@hydra_runner(config_path="conf", config_name="g2p_t5") def main(cfg): trainer = pl.Trainer(**cfg.trainer) exp_manager(trainer, cfg.get("exp_manager", None)) diff --git a/examples/tts/g2p/utils.py b/examples/tts/g2p/utils.py index 143259356ce6..a25b79ef075d 100644 --- a/examples/tts/g2p/utils.py +++ b/examples/tts/g2p/utils.py @@ -15,8 +15,8 @@ import json from nemo.collections.asr.metrics.wer import word_error_rate -from nemo.collections.tts.g2p.models import CTCG2PModel -from nemo.collections.tts.g2p.models.t5_g2p import T5G2PModel +from nemo.collections.tts.models.g2p_ctc import CTCG2PModel +from nemo.collections.tts.models.g2p_t5 import T5G2PModel from nemo.utils import logging diff --git a/nemo/collections/tts/g2p/data/__init__.py b/nemo/collections/tts/g2p/data/__init__.py deleted file mode 100644 index a1cf281f0908..000000000000 --- a/nemo/collections/tts/g2p/data/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/nemo/collections/tts/g2p/models/__init__.py b/nemo/collections/tts/g2p/models/__init__.py deleted file mode 100644 index e1eeee3b2c33..000000000000 --- a/nemo/collections/tts/g2p/models/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from nemo.collections.tts.g2p.models.ctc_g2p import CTCG2PModel -from nemo.collections.tts.g2p.models.t5_g2p import T5G2PModel - -__all__ = ["T5G2PModel", "CTCG2PModel"] diff --git a/nemo/collections/tts/g2p/models/g2p_model.py b/nemo/collections/tts/g2p/models/g2p_model.py deleted file mode 100644 index 7839221f108b..000000000000 --- a/nemo/collections/tts/g2p/models/g2p_model.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import json -from abc import ABC -from typing import List, Optional - -import torch -from omegaconf import DictConfig -from tqdm import tqdm - -from nemo.core.classes import ModelPT -from nemo.utils import logging, model_utils - -__all__ = ["G2PModel"] - - -class G2PModel(ModelPT, ABC): - @torch.no_grad() - def convert_graphemes_to_phonemes( - self, - manifest_filepath: str, - output_manifest_filepath: str, - grapheme_field: str = "text_graphemes", - batch_size: int = 32, - num_workers: int = 0, - pred_field: Optional[str] = "pred_text", - ) -> List[str]: - - """ - Main function for Inference. Converts grapheme entries from the manifest "graheme_field" to phonemes - Args: - manifest_filepath: Path to .json manifest file - output_manifest_filepath: Path to .json manifest file to save predictions, will be saved in "target_field" - grapheme_field: name of the field in manifest_filepath for input grapheme text - pred_field: name of the field in the output_file to save predictions - batch_size: int = 32 # Batch size to use for inference - num_workers: int = 0 # Number of workers to use for DataLoader during inference - - Returns: Predictions generated by the model - """ - config = { - "manifest_filepath": manifest_filepath, - "grapheme_field": grapheme_field, - "drop_last": False, - "shuffle": False, - "batch_size": batch_size, - "num_workers": num_workers, - } - - all_preds = self._infer(DictConfig(config)) - with open(manifest_filepath, "r") as f_in: - with open(output_manifest_filepath, 'w', encoding="utf-8") as f_out: - for i, line in tqdm(enumerate(f_in)): - line = json.loads(line) - line[pred_field] = all_preds[i] - f_out.write(json.dumps(line, ensure_ascii=False) + "\n") - - logging.info(f"Predictions saved to {output_manifest_filepath}.") - return all_preds - - @classmethod - def list_available_models(cls) -> 'List[PretrainedModelInfo]': - """ - This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. - Returns: - List of available pre-trained models. - """ - # recursively walk the subclasses to generate pretrained model info - list_of_models = model_utils.resolve_subclass_pretrained_model_info(cls) - return list_of_models diff --git a/nemo/collections/tts/g2p/modules.py b/nemo/collections/tts/g2p/modules.py index 524ca860a87f..b3aeefb5d5aa 100644 --- a/nemo/collections/tts/g2p/modules.py +++ b/nemo/collections/tts/g2p/modules.py @@ -79,7 +79,7 @@ def setup_heteronym_model( """ try: - from nemo.collections.tts.g2p.models.heteronym_classification import HeteronymClassificationModel + from nemo.collections.tts.models.g2p_heteronym_classification import HeteronymClassificationModel self.heteronym_model = heteronym_model self.heteronym_model.set_wordid_to_phonemes(wordid_to_phonemes_file) diff --git a/nemo/collections/tts/models/base.py b/nemo/collections/tts/models/base.py index 4f7a6a7c7640..8ef147b9b145 100644 --- a/nemo/collections/tts/models/base.py +++ b/nemo/collections/tts/models/base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,17 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import json from abc import ABC, abstractmethod from contextlib import ExitStack, contextmanager -from typing import List +from typing import List, Optional import torch +from omegaconf import DictConfig +from tqdm import tqdm from nemo.collections.tts.parts.utils.helpers import OperationMode from nemo.core.classes import ModelPT from nemo.core.classes.common import PretrainedModelInfo, typecheck from nemo.core.neural_types.elements import AudioSignal from nemo.core.neural_types.neural_type import NeuralType +from nemo.utils import logging, model_utils class SpectrogramGenerator(ModelPT, ABC): @@ -269,3 +274,59 @@ def list_available_models(cls) -> 'List[PretrainedModelInfo]': if subclass_models is not None and len(subclass_models) > 0: list_of_models.extend(subclass_models) return list_of_models + + +class G2PModel(ModelPT, ABC): + @torch.no_grad() + def convert_graphemes_to_phonemes( + self, + manifest_filepath: str, + output_manifest_filepath: str, + grapheme_field: str = "text_graphemes", + batch_size: int = 32, + num_workers: int = 0, + pred_field: Optional[str] = "pred_text", + ) -> List[str]: + + """ + Main function for Inference. Converts grapheme entries from the manifest "graheme_field" to phonemes + Args: + manifest_filepath: Path to .json manifest file + output_manifest_filepath: Path to .json manifest file to save predictions, will be saved in "target_field" + grapheme_field: name of the field in manifest_filepath for input grapheme text + pred_field: name of the field in the output_file to save predictions + batch_size: int = 32 # Batch size to use for inference + num_workers: int = 0 # Number of workers to use for DataLoader during inference + + Returns: Predictions generated by the model + """ + config = { + "manifest_filepath": manifest_filepath, + "grapheme_field": grapheme_field, + "drop_last": False, + "shuffle": False, + "batch_size": batch_size, + "num_workers": num_workers, + } + + all_preds = self._infer(DictConfig(config)) + with open(manifest_filepath, "r") as f_in: + with open(output_manifest_filepath, 'w', encoding="utf-8") as f_out: + for i, line in tqdm(enumerate(f_in)): + line = json.loads(line) + line[pred_field] = all_preds[i] + f_out.write(json.dumps(line, ensure_ascii=False) + "\n") + + logging.info(f"Predictions saved to {output_manifest_filepath}.") + return all_preds + + @classmethod + def list_available_models(cls) -> 'List[PretrainedModelInfo]': + """ + This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud. + Returns: + List of available pre-trained models. + """ + # recursively walk the subclasses to generate pretrained model info + list_of_models = model_utils.resolve_subclass_pretrained_model_info(cls) + return list_of_models diff --git a/nemo/collections/tts/g2p/models/ctc_g2p.py b/nemo/collections/tts/models/g2p_ctc.py similarity index 99% rename from nemo/collections/tts/g2p/models/ctc_g2p.py rename to nemo/collections/tts/models/g2p_ctc.py index 58123d915c2a..ae21c9b30970 100644 --- a/nemo/collections/tts/g2p/models/ctc_g2p.py +++ b/nemo/collections/tts/models/g2p_ctc.py @@ -25,7 +25,7 @@ from transformers import AutoConfig, AutoModel, AutoTokenizer from nemo.collections.tts.data.g2p_dataset import CTCG2PBPEDataset -from nemo.collections.tts.g2p.models.g2p_model import G2PModel +from nemo.collections.tts.models.base import G2PModel from nemo.core.classes.common import PretrainedModelInfo from nemo.utils import logging diff --git a/nemo/collections/tts/g2p/models/heteronym_classification.py b/nemo/collections/tts/models/g2p_heteronym_classification.py similarity index 100% rename from nemo/collections/tts/g2p/models/heteronym_classification.py rename to nemo/collections/tts/models/g2p_heteronym_classification.py diff --git a/nemo/collections/tts/g2p/models/t5_g2p.py b/nemo/collections/tts/models/g2p_t5.py similarity index 99% rename from nemo/collections/tts/g2p/models/t5_g2p.py rename to nemo/collections/tts/models/g2p_t5.py index b8561a5bdb34..27a5b4218d2d 100644 --- a/nemo/collections/tts/g2p/models/t5_g2p.py +++ b/nemo/collections/tts/models/g2p_t5.py @@ -23,7 +23,7 @@ from nemo.collections.asr.metrics.wer import word_error_rate from nemo.collections.tts.data.g2p_dataset import T5G2PDataset -from nemo.collections.tts.g2p.models.g2p_model import G2PModel +from nemo.collections.tts.models.base import G2PModel from nemo.core.classes.common import PretrainedModelInfo, typecheck from nemo.core.neural_types import LabelsType, LossType, MaskType, NeuralType, TokenIndex from nemo.utils import logging