Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip trim_long_silences in preprocess_wav if webrtcvad not available #376

Merged
6 commits merged into from Jun 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions encoder/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
from encoder.params_data import *
from pathlib import Path
from typing import Optional, Union
from warnings import warn
import numpy as np
import webrtcvad
import librosa
import struct

try:
import webrtcvad
except:
warn("Unable to import 'webrtcvad'. This package enables noise removal and is recommended.")
webrtcvad=None

int16_max = (2 ** 15) - 1


Expand Down Expand Up @@ -35,7 +41,8 @@ def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray],

# Apply the preprocessing: normalize volume and shorten long silences
wav = normalize_volume(wav, audio_norm_target_dBFS, increase_only=True)
wav = trim_long_silences(wav)
if webrtcvad:
wav = trim_long_silences(wav)

return wav

Expand Down
15 changes: 13 additions & 2 deletions encoder_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
import argparse


if __name__ == "__main__":
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
pass
Expand Down Expand Up @@ -37,15 +36,27 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
parser.add_argument("-s", "--skip_existing", action="store_true", help=\
"Whether to skip existing output files with the same name. Useful if this script was "
"interrupted.")
parser.add_argument("--no_trim", action="store_true", help=\
"Preprocess audio without trimming silences (not recommended).")
args = parser.parse_args()

# Verify webrtcvad is available
if not args.no_trim:
try:
import webrtcvad
except:
raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
"noise removal and is recommended. Please install and try again. If installation fails, "
"use --no_trim to disable this error message.")
del args.no_trim

# Process the arguments
args.datasets = args.datasets.split(",")
if not hasattr(args, "out_dir"):
args.out_dir = args.datasets_root.joinpath("SV2TTS", "encoder")
assert args.datasets_root.exists()
args.out_dir.mkdir(exist_ok=True, parents=True)

# Preprocess the datasets
print_args(args, parser)
preprocess_func = {
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ tensorflow-cpu==1.15
tensorflow-gpu==1.15
umap-learn
visdom
webrtcvad
librosa>=0.5.1
matplotlib>=2.0.2
numpy>=1.14.0
Expand Down
14 changes: 13 additions & 1 deletion synthesizer_preprocess_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
"interrupted.")
parser.add_argument("--hparams", type=str, default="", help=\
"Hyperparameter overrides as a comma-separated list of name-value pairs")
parser.add_argument("--no_trim", action="store_true", help=\
"Preprocess audio without trimming silences (not recommended).")
args = parser.parse_args()

# Process the arguments
if not hasattr(args, "out_dir"):
args.out_dir = args.datasets_root.joinpath("SV2TTS", "synthesizer")
Expand All @@ -34,6 +36,16 @@
assert args.datasets_root.exists()
args.out_dir.mkdir(exist_ok=True, parents=True)

# Verify webrtcvad is available
if not args.no_trim:
try:
import webrtcvad
except:
raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
"noise removal and is recommended. Please install and try again. If installation fails, "
"use --no_trim to disable this error message.")
del args.no_trim

# Preprocess the dataset
print_args(args, parser)
args.hparams = hparams.parse(args.hparams)
Expand Down
14 changes: 13 additions & 1 deletion vocoder_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
parser.add_argument("--hparams", default="",
help="Hyperparameter overrides as a comma-separated list of name=value "
"pairs")
parser.add_argument("--no_trim", action="store_true", help=\
"Preprocess audio without trimming silences (not recommended).")
args = parser.parse_args()
print_args(args, parser)
modified_hp = hparams.parse(args.hparams)
Expand All @@ -37,5 +39,15 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
if not hasattr(args, "out_dir"):
args.out_dir = os.path.join(args.datasets_root, "SV2TTS", "vocoder")

# Verify webrtcvad is available
if not args.no_trim:
try:
import webrtcvad
except:
raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
"noise removal and is recommended. Please install and try again. If installation fails, "
"use --no_trim to disable this error message.")
del args.no_trim

run_synthesis(args.in_dir, args.out_dir, args.model_dir, modified_hp)