Skip to content

Commit

Permalink
[TTS/TN/G2P] Remove Text Processing from NeMo, move G2P to TTS (NVIDI…
Browse files Browse the repository at this point in the history
…A#5982)

* remove TN

Signed-off-by: ekmb <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix imports

Signed-off-by: ekmb <[email protected]>

* fix import

Signed-off-by: ekmb <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add missing init

Signed-off-by: ekmb <[email protected]>

* fix import

Signed-off-by: ekmb <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* rename unit test

Signed-off-by: ekmb <[email protected]>

* fix import

Signed-off-by: ekmb <[email protected]>

* fix modules test

Signed-off-by: ekmb <[email protected]>

* fix imports

Signed-off-by: ekmb <[email protected]>

* remove whitelist from config

Signed-off-by: ekmb <[email protected]>

* delete wordid file

Signed-off-by: ekmb <[email protected]>

* remove pynini_install from tutorials

Signed-off-by: ekmb <[email protected]>

* update requirements

Signed-off-by: ekmb <[email protected]>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add support warning

Signed-off-by: ekmb <[email protected]>

* review

Signed-off-by: ekmb <[email protected]>

---------

Signed-off-by: ekmb <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
2 people authored and titu1994 committed Mar 24, 2023
1 parent 7667324 commit dd98487
Show file tree
Hide file tree
Showing 1,137 changed files with 194 additions and 95,825 deletions.
7 changes: 1 addition & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ WORKDIR /tmp/nemo
COPY requirements .
RUN for f in $(ls requirements*.txt); do pip3 install --disable-pip-version-check --no-cache-dir -r $f; done

# install pynini
COPY nemo_text_processing/install_pynini.sh /tmp/nemo/
RUN /bin/bash /tmp/nemo/install_pynini.sh

# install k2, skip if installation fails
COPY scripts /tmp/nemo/scripts/
RUN /bin/bash /tmp/nemo/scripts/speech_recognition/k2/setup.sh || exit 0
Expand All @@ -81,8 +77,7 @@ RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]"

# Check install
RUN python -c "import nemo.collections.nlp as nemo_nlp" && \
python -c "import nemo.collections.tts as nemo_tts" && \
python -c "import nemo_text_processing.text_normalization as text_normalization"
python -c "import nemo.collections.tts as nemo_tts"

# TODO: Update to newer numba 0.56.0RC1 for 22.03 container if possible
# install pinned numba version
Expand Down
92 changes: 3 additions & 89 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -102,92 +102,6 @@ pipeline {
}
}


stage('L0: TN/ITN Tests CPU') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('En TN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize.py --text="1" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22'
}
}
stage('En ITN grammars') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/inverse_text_normalization/inverse_normalize.py --language en --text="twenty" --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22'
}
}
stage('Test En non-deterministic TN & Run all En TN/ITN tests (restore grammars from cache)') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/text_normalization/normalize_with_audio.py --text "\$.01" --n_tagged 2 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22'
sh 'CUDA_VISIBLE_DEVICES="" pytest tests/nemo_text_processing/en/ -m "not pleasefixme" --cpu --tn_cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22'
}
}
stage('Test En Hybrid TN') {
steps {
sh 'CUDA_VISIBLE_DEVICES="" python nemo_text_processing/hybrid/wfst_lm_rescoring.py --data /home/TestData/nlp/text_norm/hybrid_tn/test.txt --regenerate_pkl --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22 | grep "all_correct: True" || exit 1'
}
}
}
}

stage('L2: NeMo text processing') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
parallel {
stage('L2: Eng TN') {
steps {
sh 'cd tools/text_processing_deployment && python pynini_export.py --output=/home/TestData/nlp/text_norm/output/ --grammars=tn_grammars --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22 --language=en && ls -R /home/TestData/nlp/text_norm/output/ && echo ".far files created "|| exit 1'
sh 'cd nemo_text_processing/text_normalization/ && python normalize.py --input_file=/home/TestData/nlp/text_norm/ci/test.txt --input_case="lower_cased" --language=en --output_file=/home/TestData/nlp/text_norm/output/test.pynini.txt --verbose'
sh 'cat /home/TestData/nlp/text_norm/output/test.pynini.txt'
sh 'cmp --silent /home/TestData/nlp/text_norm/output/test.pynini.txt /home/TestData/nlp/text_norm/ci/test_goal_py_05-25.txt || exit 1'
sh 'rm -rf /home/TestData/nlp/text_norm/output/*'
}
}

stage('L2: Eng ITN export') {
steps {
sh 'cd tools/text_processing_deployment && python pynini_export.py --output=/home/TestData/nlp/text_denorm/output/ --grammars=itn_grammars --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22 --language=en && ls -R /home/TestData/nlp/text_denorm/output/ && echo ".far files created "|| exit 1'
sh 'cd nemo_text_processing/inverse_text_normalization/ && python inverse_normalize.py --input_file=/home/TestData/nlp/text_denorm/ci/test.txt --language=en --output_file=/home/TestData/nlp/text_denorm/output/test.pynini.txt --verbose'
sh 'cmp --silent /home/TestData/nlp/text_denorm/output/test.pynini.txt /home/TestData/nlp/text_denorm/ci/test_goal_py.txt || exit 1'
sh 'rm -rf /home/TestData/nlp/text_denorm/output/*'
}
}
stage('L2: TN with Audio (audio and raw text)') {
steps {
sh 'cd nemo_text_processing/text_normalization && \
python normalize_with_audio.py --language=en --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22 --text "The total amounts to \\$4.76." \
--audio_data /home/TestData/nlp/text_norm/audio_based/audio.wav | tail -n2 | head -n1 > /tmp/out_raw.txt 2>&1 && \
cmp --silent /tmp/out_raw.txt /home/TestData/nlp/text_norm/audio_based/result.txt || exit 1'
}
}
stage('L2: TN with Audio (audio and text file)') {
steps {
sh 'cd nemo_text_processing/text_normalization && \
python normalize_with_audio.py --language=en --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22 --text /home/TestData/nlp/text_norm/audio_based/text.txt \
--audio_data /home/TestData/nlp/text_norm/audio_based/audio.wav | tail -n2 | head -n1 > /tmp/out_file.txt 2>&1 && \
cmp --silent /tmp/out_file.txt /home/TestData/nlp/text_norm/audio_based/result.txt || exit 1'
}
}
stage('L2: TN with Audio (manifest)') {
steps {
sh 'cd nemo_text_processing/text_normalization && \
python normalize_with_audio.py --language=en --audio_data /home/TestData/nlp/text_norm/audio_based/manifest.json --n_tagged=120 --cache_dir /home/TestData/nlp/text_norm/ci/grammars/11-16-22'
}
}
}
}

stage('L2: ASR dev run') {
when {
anyOf {
Expand Down Expand Up @@ -1073,7 +987,7 @@ pipeline {
parallel {
stage('G2P Conformer training, evaluation and inference') {
steps {
sh 'cd examples/text_processing/g2p && \
sh 'cd examples/tts/g2p && \
TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR_CONFORMER=output_ctc_${TIME} && \
python g2p_train_and_evaluate.py \
train_manifest=/home/TestData/g2p/g2p.json \
Expand All @@ -1097,7 +1011,7 @@ pipeline {
}
stage('ByT5G2P training, evaluation and inference') {
steps {
sh 'TRANSFORMERS_OFFLINE=0 && cd examples/text_processing/g2p && \
sh 'TRANSFORMERS_OFFLINE=0 && cd examples/tts/g2p && \
TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR_T5=output_byt5_${TIME} && \
python g2p_train_and_evaluate.py \
train_manifest=/home/TestData/g2p/g2p.json \
Expand All @@ -1119,7 +1033,7 @@ pipeline {
}
stage('HeteronymClassificationModel training, evaluation and inference') {
steps {
sh 'cd examples/text_processing/g2p && \
sh 'cd examples/tts/g2p && \
TIME=`date +"%Y-%m-%d-%T"` && OUTPUT_DIR=output_${TIME} && \
python heteronym_classification_train_and_evaluate.py \
train_manifest=/home/TestData/g2p/manifest.json \
Expand Down
6 changes: 1 addition & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -243,11 +243,7 @@ Transformer Engine enables FP8 training on NVIDIA Hopper GPUs.

NeMo Text Processing
~~~~~~~~~~~~~~~~~~~~
NeMo Text Processing, specifically (Inverse) Text Normalization, requires `Pynini <https://pypi.org/project/pynini/>`_ to be installed.

.. code-block:: bash
bash NeMo/nemo_text_processing/install_pynini.sh
NeMo Text Processing, specifically (Inverse) Text Normalization, is now a separate repository `https://github.com/NVIDIA/NeMo-text-processing <https://github.com/NVIDIA/NeMo-text-processing>`_.

Docker containers:
~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion docs/source/tts/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,4 +96,4 @@ Dataset Processing Classes

.. autoclass:: nemo.collections.tts.torch.data.VocoderDataset
:show-inheritance:
:members:
:members:
2 changes: 1 addition & 1 deletion docs/source/tts/checkpoints.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,4 @@ End2End models
.. csv-table::
:file: data/ngc_models_e2e.csv
:align: left
:header-rows: 1
:header-rows: 1
5 changes: 2 additions & 3 deletions docs/source/tts/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ Text normalization (TN) converts text from written form into its verbalized form
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: en
input_case: cased
whitelist: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"
text_normalizer_call_kwargs:
verbose: false
Expand All @@ -118,7 +117,7 @@ Tokenization converts input text string to a list of integer tokens. It may pad
apostrophe: true
pad_with_space: true
g2p:
_target_: nemo_text_processing.g2p.modules.EnglishG2p
_target_: nemo.collections.tts.g2p.modules.EnglishG2p
phoneme_dict: ${phoneme_dict_path}
heteronyms: ${heteronyms_path}
phoneme_probability: 0.5
Expand Down Expand Up @@ -260,4 +259,4 @@ Fine-tuning via a Pytorch Lightning checkpoint
trainer.devices=-1 \
trainer.accelerator='gpu' \
trainer.max_epochs=50 \
+init_from_ptl_ckpt="<name of pytorch lightning checkpoint>"
+init_from_ptl_ckpt="<name of pytorch lightning checkpoint>"
4 changes: 1 addition & 3 deletions docs/source/tts/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,7 @@ LJSpeech
.. code-block:: shell-session
$ python scripts/dataset_processing/tts/ljspeech/get_data.py \
--data-root <your_local_dataset_root> \
--whitelist-path <your_local_whitelist_filepath> \
or default nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv
--data-root <your_local_dataset_root>
$ python scripts/dataset_processing/tts/extract_sup_data.py \
--config-path ljspeech/ds_conf \
Expand Down
4 changes: 1 addition & 3 deletions examples/tts/conf/aligner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ window: hann

phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.10"
heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"

model:
symbols_embedding_dim: 384
Expand All @@ -41,7 +40,6 @@ model:
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: en
input_case: cased
whitelist: ${whitelist_path}

text_normalizer_call_kwargs:
verbose: false
Expand All @@ -56,7 +54,7 @@ model:
apostrophe: true
pad_with_space: true
g2p:
_target_: nemo_text_processing.g2p.modules.EnglishG2p
_target_: nemo.collections.tts.g2p.modules.EnglishG2p
phoneme_dict: ${phoneme_dict_path}
heteronyms: ${heteronyms_path}

Expand Down
3 changes: 0 additions & 3 deletions examples/tts/conf/de/fastpitch_align_22050_grapheme.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ lowfreq: 0
highfreq: null
window: hann

whitelist_path: "nemo_text_processing/text_normalization/de/data/whitelist.tsv"

model:
learn_alignment: true
bin_loss_warmup_epochs: 100
Expand Down Expand Up @@ -58,7 +56,6 @@ model:
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: de
input_case: cased
whitelist: ${whitelist_path}

text_normalizer_call_kwargs:
verbose: false
Expand Down
4 changes: 1 addition & 3 deletions examples/tts/conf/de/fastpitch_align_22050_mix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ window: hann

phoneme_dict_path: "scripts/tts_dataset_files/de/de_nv230119.dict"
heteronyms_path: "scripts/tts_dataset_files/de/de_nv230119.heteronyms"
whitelist_path: "nemo_text_processing/text_normalization/de/data/whitelist.tsv"

model:
learn_alignment: true
Expand Down Expand Up @@ -60,7 +59,6 @@ model:
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: de
input_case: cased
whitelist: ${whitelist_path}

text_normalizer_call_kwargs:
verbose: false
Expand All @@ -74,7 +72,7 @@ model:
apostrophe: true
pad_with_space: true
g2p:
_target_: nemo_text_processing.g2p.modules.IPAG2P
_target_: nemo.collections.tts.g2p.modules.IPAG2P
locale: 'de-DE'
phoneme_dict: ${phoneme_dict_path}
heteronyms: ${heteronyms_path}
Expand Down
3 changes: 0 additions & 3 deletions examples/tts/conf/de/fastpitch_align_44100_grapheme.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ lowfreq: 0
highfreq: null
window: hann

whitelist_path: "nemo_text_processing/text_normalization/de/data/whitelist.tsv"

model:
learn_alignment: true
bin_loss_warmup_epochs: 100
Expand Down Expand Up @@ -58,7 +56,6 @@ model:
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: de
input_case: cased
whitelist: ${whitelist_path}

text_normalizer_call_kwargs:
verbose: false
Expand Down
3 changes: 0 additions & 3 deletions examples/tts/conf/de/fastpitch_align_44100_phoneme.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ lowfreq: 0
highfreq: null
window: hann

whitelist_path: "nemo_text_processing/text_normalization/de/data/whitelist.tsv"

model:
learn_alignment: true
bin_loss_warmup_epochs: 100
Expand Down Expand Up @@ -58,7 +56,6 @@ model:
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: de
input_case: cased
whitelist: ${whitelist_path}

text_normalizer_call_kwargs:
verbose: false
Expand Down
2 changes: 1 addition & 1 deletion examples/tts/conf/es/fastpitch_align_44100_ipa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ model:
apostrophe: true
pad_with_space: true
g2p:
_target_: nemo_text_processing.g2p.modules.IPAG2P
_target_: nemo.collections.tts.g2p.modules.IPAG2P
locale: es-ES
phoneme_dict: ${phoneme_dict_path}
phoneme_probability: 0.5
Expand Down
2 changes: 1 addition & 1 deletion examples/tts/conf/es/fastpitch_align_44100_ipa_multi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ model:
apostrophe: true
pad_with_space: true
g2p:
_target_: nemo_text_processing.g2p.modules.IPAG2P
_target_: nemo.collections.tts.g2p.modules.IPAG2P
locale: es-ES
phoneme_dict: ${phoneme_dict_path}
phoneme_probability: 0.5
Expand Down
4 changes: 1 addition & 3 deletions examples/tts/conf/fastpitch_align_44100.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ window: hann

phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.10"
heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"

model:
learn_alignment: true
Expand Down Expand Up @@ -59,7 +58,6 @@ model:
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: en
input_case: cased
whitelist: ${whitelist_path}

text_normalizer_call_kwargs:
verbose: false
Expand All @@ -74,7 +72,7 @@ model:
apostrophe: true
pad_with_space: true
g2p:
_target_: nemo_text_processing.g2p.modules.EnglishG2p
_target_: nemo.collections.tts.g2p.modules.EnglishG2p
phoneme_dict: ${phoneme_dict_path}
heteronyms: ${heteronyms_path}
phoneme_probability: 0.5
Expand Down
4 changes: 1 addition & 3 deletions examples/tts/conf/fastpitch_align_ipa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ window: hann

phoneme_dict_path: "scripts/tts_dataset_files/ipa_cmudict-0.7b_nv22.10.txt"
heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"

model:
learn_alignment: true
Expand Down Expand Up @@ -60,7 +59,6 @@ model:
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: en
input_case: cased
whitelist: ${whitelist_path}

text_normalizer_call_kwargs:
verbose: false
Expand All @@ -73,7 +71,7 @@ model:
apostrophe: true
pad_with_space: true
g2p:
_target_: nemo_text_processing.g2p.modules.IPAG2P
_target_: nemo.collections.tts.g2p.modules.IPAG2P
phoneme_dict: ${phoneme_dict_path}
heteronyms: ${heteronyms_path}
phoneme_probability: 0.8
Expand Down
4 changes: 1 addition & 3 deletions examples/tts/conf/fastpitch_align_v1.05.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ window: hann

phoneme_dict_path: "scripts/tts_dataset_files/cmudict-0.7b_nv22.10"
heteronyms_path: "scripts/tts_dataset_files/heteronyms-052722"
whitelist_path: "nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv"

model:
learn_alignment: true
Expand Down Expand Up @@ -60,7 +59,6 @@ model:
_target_: nemo_text_processing.text_normalization.normalize.Normalizer
lang: en
input_case: cased
whitelist: ${whitelist_path}

text_normalizer_call_kwargs:
verbose: false
Expand All @@ -75,7 +73,7 @@ model:
apostrophe: true
pad_with_space: true
g2p:
_target_: nemo_text_processing.g2p.modules.EnglishG2p
_target_: nemo.collections.tts.g2p.modules.EnglishG2p
phoneme_dict: ${phoneme_dict_path}
heteronyms: ${heteronyms_path}
phoneme_probability: 0.5
Expand Down
Loading

0 comments on commit dd98487

Please sign in to comment.