Skip to content

Commit

Permalink
Skip trim_long_silences in preprocess_wav if webrtcvad not available (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
blue-fish authored Jun 26, 2020
1 parent 186096e commit 1e16877
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 7 deletions.
11 changes: 9 additions & 2 deletions encoder/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
from encoder.params_data import *
from pathlib import Path
from typing import Optional, Union
from warnings import warn
import numpy as np
import webrtcvad
import librosa
import struct

try:
import webrtcvad
except:
warn("Unable to import 'webrtcvad'. This package enables noise removal and is recommended.")
webrtcvad=None

int16_max = (2 ** 15) - 1


Expand Down Expand Up @@ -35,7 +41,8 @@ def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray],

# Apply the preprocessing: normalize volume and shorten long silences
wav = normalize_volume(wav, audio_norm_target_dBFS, increase_only=True)
wav = trim_long_silences(wav)
if webrtcvad:
wav = trim_long_silences(wav)

return wav

Expand Down
15 changes: 13 additions & 2 deletions encoder_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from pathlib import Path
import argparse


if __name__ == "__main__":
class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
pass
Expand Down Expand Up @@ -37,15 +36,27 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
parser.add_argument("-s", "--skip_existing", action="store_true", help=\
"Whether to skip existing output files with the same name. Useful if this script was "
"interrupted.")
parser.add_argument("--no_trim", action="store_true", help=\
"Preprocess audio without trimming silences (not recommended).")
args = parser.parse_args()

# Verify webrtcvad is available
if not args.no_trim:
try:
import webrtcvad
except:
raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
"noise removal and is recommended. Please install and try again. If installation fails, "
"use --no_trim to disable this error message.")
del args.no_trim

# Process the arguments
args.datasets = args.datasets.split(",")
if not hasattr(args, "out_dir"):
args.out_dir = args.datasets_root.joinpath("SV2TTS", "encoder")
assert args.datasets_root.exists()
args.out_dir.mkdir(exist_ok=True, parents=True)

# Preprocess the datasets
print_args(args, parser)
preprocess_func = {
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ tensorflow-cpu==1.15
tensorflow-gpu==1.15
umap-learn
visdom
webrtcvad
librosa>=0.5.1
matplotlib>=2.0.2
numpy>=1.14.0
Expand Down
14 changes: 13 additions & 1 deletion synthesizer_preprocess_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,10 @@
"interrupted.")
parser.add_argument("--hparams", type=str, default="", help=\
"Hyperparameter overrides as a comma-separated list of name-value pairs")
parser.add_argument("--no_trim", action="store_true", help=\
"Preprocess audio without trimming silences (not recommended).")
args = parser.parse_args()

# Process the arguments
if not hasattr(args, "out_dir"):
args.out_dir = args.datasets_root.joinpath("SV2TTS", "synthesizer")
Expand All @@ -34,6 +36,16 @@
assert args.datasets_root.exists()
args.out_dir.mkdir(exist_ok=True, parents=True)

# Verify webrtcvad is available
if not args.no_trim:
try:
import webrtcvad
except:
raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
"noise removal and is recommended. Please install and try again. If installation fails, "
"use --no_trim to disable this error message.")
del args.no_trim

# Preprocess the dataset
print_args(args, parser)
args.hparams = hparams.parse(args.hparams)
Expand Down
14 changes: 13 additions & 1 deletion vocoder_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
parser.add_argument("--hparams", default="",
help="Hyperparameter overrides as a comma-separated list of name=value "
"pairs")
parser.add_argument("--no_trim", action="store_true", help=\
"Preprocess audio without trimming silences (not recommended).")
args = parser.parse_args()
print_args(args, parser)
modified_hp = hparams.parse(args.hparams)
Expand All @@ -37,5 +39,15 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
if not hasattr(args, "out_dir"):
args.out_dir = os.path.join(args.datasets_root, "SV2TTS", "vocoder")

# Verify webrtcvad is available
if not args.no_trim:
try:
import webrtcvad
except:
raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
"noise removal and is recommended. Please install and try again. If installation fails, "
"use --no_trim to disable this error message.")
del args.no_trim

run_synthesis(args.in_dir, args.out_dir, args.model_dir, modified_hp)

0 comments on commit 1e16877

Please sign in to comment.