From 286088e0f03b3084589d1d48f73782ea9ea23c64 Mon Sep 17 00:00:00 2001 From: Ryan Date: Mon, 13 Mar 2023 16:35:25 -0700 Subject: [PATCH 1/5] [TTS] Add script for mapping speaker names to indices Signed-off-by: Ryan --- .../tts/create_speaker_map.py | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 scripts/dataset_processing/tts/create_speaker_map.py diff --git a/scripts/dataset_processing/tts/create_speaker_map.py b/scripts/dataset_processing/tts/create_speaker_map.py new file mode 100644 index 000000000000..f5758d8ea011 --- /dev/null +++ b/scripts/dataset_processing/tts/create_speaker_map.py @@ -0,0 +1,85 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +This script takes a list of TTS manifests and creates a JSON mapping the input speaker names to +unique indices for multi-speaker TTS training. + +To ensure that speaker names are unique across datasets, it is recommended that you prepend the speaker +names in your manifest with the name of the dataset. + +$ python /scripts/dataset_processing/tts/create_speaker_map.py \ + --manifest_paths=manifest1.json,manifest2.json \ + --speaker_map_path=speakers.json + +Example output: + +{ + "vctk_p225": 0, + "vctk_p226": 1, + "vctk_p227": 2, + ... +} + +""" + +import argparse +from pathlib import Path +import json + +from nemo.collections.asr.parts.utils.manifest_utils import read_manifest + + +def get_args(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Compute speaker level pitch statistics.", + ) + parser.add_argument( + "--manifest_paths", required=True, type=str, help="Path to training manifests, comma delimited", + ) + parser.add_argument( + "--speaker_map_path", required=True, type=Path, help="Path for output speaker index JSON", + ) + args = parser.parse_args() + return args + + +def main(): + args = get_args() + manifest_path_string = args.manifest_paths + speaker_map_path = args.speaker_map_path + + manifest_paths = [Path(manifest_path) for manifest_path in manifest_path_string.split(",")] + + for manifest_path in manifest_paths: + if not manifest_path.exists(): + raise ValueError(f"Manifest {manifest_path} does not exist.") + + speaker_set = set() + for manifest_path in manifest_paths: + entries = read_manifest(manifest_path) + for entry in entries: + speaker = str(entry["speaker"]) + speaker_set.add(speaker) + + speaker_list = list(speaker_set) + speaker_list.sort() + speaker_index_map = {speaker_list[i]: i for i in range(len(speaker_list))} + + with open(speaker_map_path, 'w', encoding="utf-8") as stats_f: + json.dump(speaker_index_map, stats_f, indent=4) + + +if __name__ == "__main__": + main() From bab0c261b781729a71a5026fd5936d2b39f2f5af Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 27 Apr 2023 20:55:41 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- scripts/dataset_processing/tts/create_speaker_map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dataset_processing/tts/create_speaker_map.py b/scripts/dataset_processing/tts/create_speaker_map.py index f5758d8ea011..de7f0285bd3d 100644 --- a/scripts/dataset_processing/tts/create_speaker_map.py +++ b/scripts/dataset_processing/tts/create_speaker_map.py @@ -35,8 +35,8 @@ """ import argparse -from pathlib import Path import json +from pathlib import Path from nemo.collections.asr.parts.utils.manifest_utils import read_manifest From e9f53c54d39b730322010f415938a91f0848fad6 Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 2 May 2023 15:38:17 -0700 Subject: [PATCH 3/5] [TTS] Add overwrite flag Signed-off-by: Ryan --- scripts/dataset_processing/tts/create_speaker_map.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/dataset_processing/tts/create_speaker_map.py b/scripts/dataset_processing/tts/create_speaker_map.py index de7f0285bd3d..bf3b99a904b5 100644 --- a/scripts/dataset_processing/tts/create_speaker_map.py +++ b/scripts/dataset_processing/tts/create_speaker_map.py @@ -51,6 +51,9 @@ def get_args(): parser.add_argument( "--speaker_map_path", required=True, type=Path, help="Path for output speaker index JSON", ) + parser.add_argument( + "--overwrite", default=False, type=bool, help="Whether to overwrite the output speaker file if it exists.", + ) args = parser.parse_args() return args @@ -59,6 +62,7 @@ def main(): args = get_args() manifest_path_string = args.manifest_paths speaker_map_path = args.speaker_map_path + overwrite = args.overwrite manifest_paths = [Path(manifest_path) for manifest_path in manifest_path_string.split(",")] @@ -66,6 +70,12 @@ def main(): if not manifest_path.exists(): raise ValueError(f"Manifest {manifest_path} does not exist.") + if speaker_map_path.exists(): + if overwrite: + print(f"Will overwrite existing speaker path: {speaker_map_path}") + else: + raise ValueError(f"Speaker path already exists: {speaker_map_path}") + speaker_set = set() for manifest_path in manifest_paths: entries = read_manifest(manifest_path) From e15424a86f2cf89894edd602ea7b120febdd753b Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 2 May 2023 15:52:16 -0700 Subject: [PATCH 4/5] [TTS] Fix script description Signed-off-by: Ryan --- scripts/dataset_processing/tts/create_speaker_map.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/dataset_processing/tts/create_speaker_map.py b/scripts/dataset_processing/tts/create_speaker_map.py index bf3b99a904b5..e6c416977265 100644 --- a/scripts/dataset_processing/tts/create_speaker_map.py +++ b/scripts/dataset_processing/tts/create_speaker_map.py @@ -43,7 +43,8 @@ def get_args(): parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Compute speaker level pitch statistics.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + description="Create mapping from speaker names to numerical speaker indices.", ) parser.add_argument( "--manifest_paths", required=True, type=str, help="Path to training manifests, comma delimited", From e164d6ff851c0a4781e8ac595e17e58cfa8a14f2 Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 4 May 2023 09:47:10 -0700 Subject: [PATCH 5/5] [TTS] Change manifest paths argument to list Signed-off-by: Ryan --- scripts/dataset_processing/tts/create_speaker_map.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/dataset_processing/tts/create_speaker_map.py b/scripts/dataset_processing/tts/create_speaker_map.py index e6c416977265..027a5c6e3e35 100644 --- a/scripts/dataset_processing/tts/create_speaker_map.py +++ b/scripts/dataset_processing/tts/create_speaker_map.py @@ -20,7 +20,8 @@ names in your manifest with the name of the dataset. $ python /scripts/dataset_processing/tts/create_speaker_map.py \ - --manifest_paths=manifest1.json,manifest2.json \ + --manifest_path=manifest1.json \ + --manifest_path=manifest2.json \ --speaker_map_path=speakers.json Example output: @@ -47,7 +48,7 @@ def get_args(): description="Create mapping from speaker names to numerical speaker indices.", ) parser.add_argument( - "--manifest_paths", required=True, type=str, help="Path to training manifests, comma delimited", + "--manifest_path", required=True, type=Path, action="append", help="Path to training manifest(s).", ) parser.add_argument( "--speaker_map_path", required=True, type=Path, help="Path for output speaker index JSON", @@ -61,12 +62,10 @@ def get_args(): def main(): args = get_args() - manifest_path_string = args.manifest_paths + manifest_paths = args.manifest_path speaker_map_path = args.speaker_map_path overwrite = args.overwrite - manifest_paths = [Path(manifest_path) for manifest_path in manifest_path_string.split(",")] - for manifest_path in manifest_paths: if not manifest_path.exists(): raise ValueError(f"Manifest {manifest_path} does not exist.")