Skip trim_long_silences in preprocess_wav if webrtcvad not available (#…

…376)
CorentinJ · Jun 26, 2020 · 1e16877 · 1e16877
1 parent 186096e
commit 1e16877
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 7 deletions.
diff --git a/encoder/audio.py b/encoder/audio.py
@@ -2,11 +2,17 @@
 from encoder.params_data import *
 from pathlib import Path
 from typing import Optional, Union
+from warnings import warn
 import numpy as np
-import webrtcvad
 import librosa
 import struct
 
+try:
+    import webrtcvad
+except:
+    warn("Unable to import 'webrtcvad'. This package enables noise removal and is recommended.")
+    webrtcvad=None
+
 int16_max = (2 ** 15) - 1
 
 
@@ -35,7 +41,8 @@ def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray],
 
     # Apply the preprocessing: normalize volume and shorten long silences 
     wav = normalize_volume(wav, audio_norm_target_dBFS, increase_only=True)
-    wav = trim_long_silences(wav)
+    if webrtcvad:
+        wav = trim_long_silences(wav)
 
     return wav
 

diff --git a/encoder_preprocess.py b/encoder_preprocess.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 import argparse
 
-
 if __name__ == "__main__":
     class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
         pass
@@ -37,15 +36,27 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
     parser.add_argument("-s", "--skip_existing", action="store_true", help=\
         "Whether to skip existing output files with the same name. Useful if this script was "
         "interrupted.")
+    parser.add_argument("--no_trim", action="store_true", help=\
+        "Preprocess audio without trimming silences (not recommended).")
     args = parser.parse_args()
 
+    # Verify webrtcvad is available
+    if not args.no_trim:
+        try:
+            import webrtcvad
+        except:
+            raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
+                "noise removal and is recommended. Please install and try again. If installation fails, "
+                "use --no_trim to disable this error message.")
+    del args.no_trim
+
     # Process the arguments
     args.datasets = args.datasets.split(",")
     if not hasattr(args, "out_dir"):
         args.out_dir = args.datasets_root.joinpath("SV2TTS", "encoder")
     assert args.datasets_root.exists()
     args.out_dir.mkdir(exist_ok=True, parents=True)
-    
+
     # Preprocess the datasets
     print_args(args, parser)
     preprocess_func = {

diff --git a/requirements.txt b/requirements.txt
@@ -5,7 +5,6 @@ tensorflow-cpu==1.15
 tensorflow-gpu==1.15
 umap-learn
 visdom
-webrtcvad
 librosa>=0.5.1
 matplotlib>=2.0.2
 numpy>=1.14.0

diff --git a/synthesizer_preprocess_audio.py b/synthesizer_preprocess_audio.py
@@ -24,8 +24,10 @@
         "interrupted.")
     parser.add_argument("--hparams", type=str, default="", help=\
         "Hyperparameter overrides as a comma-separated list of name-value pairs")
+    parser.add_argument("--no_trim", action="store_true", help=\
+        "Preprocess audio without trimming silences (not recommended).")
     args = parser.parse_args()
-    
+
     # Process the arguments
     if not hasattr(args, "out_dir"):
         args.out_dir = args.datasets_root.joinpath("SV2TTS", "synthesizer")
@@ -34,6 +36,16 @@
     assert args.datasets_root.exists()
     args.out_dir.mkdir(exist_ok=True, parents=True)
 
+    # Verify webrtcvad is available
+    if not args.no_trim:
+        try:
+            import webrtcvad
+        except:
+            raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
+                "noise removal and is recommended. Please install and try again. If installation fails, "
+                "use --no_trim to disable this error message.")
+    del args.no_trim
+
     # Preprocess the dataset
     print_args(args, parser)
     args.hparams = hparams.parse(args.hparams)

diff --git a/vocoder_preprocess.py b/vocoder_preprocess.py
@@ -28,6 +28,8 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
     parser.add_argument("--hparams", default="",
                         help="Hyperparameter overrides as a comma-separated list of name=value "
                              "pairs")
+    parser.add_argument("--no_trim", action="store_true", help=\
+        "Preprocess audio without trimming silences (not recommended).")
     args = parser.parse_args()
     print_args(args, parser)
     modified_hp = hparams.parse(args.hparams)
@@ -37,5 +39,15 @@ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptio
     if not hasattr(args, "out_dir"):
         args.out_dir = os.path.join(args.datasets_root, "SV2TTS", "vocoder")
 
+    # Verify webrtcvad is available
+    if not args.no_trim:
+        try:
+            import webrtcvad
+        except:
+            raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
+                "noise removal and is recommended. Please install and try again. If installation fails, "
+                "use --no_trim to disable this error message.")
+    del args.no_trim
+
     run_synthesis(args.in_dir, args.out_dir, args.model_dir, modified_hp)
-    
+