Skip to content

Commit

Permalink
use PySoundFile instead of librosa.load (audioread)
Browse files Browse the repository at this point in the history
  • Loading branch information
albertz committed Feb 22, 2018
1 parent e5fbba2 commit 2f9a477
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
20 changes: 15 additions & 5 deletions GeneratingDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,12 @@ class _NltkCorpusReaderDataset(CachedDataset2):


class ExtractAudioFeatures:
"""
Currently uses librosa to extract MFCC features.
We could also use python_speech_features.
We could also add support e.g. to directly extract log-filterbanks or so.
"""

def __init__(self,
window_len=0.025, step_len=0.010,
num_feature_filters=40, with_delta=False, norm_mean=None, norm_std_dev=None,
Expand Down Expand Up @@ -1498,9 +1504,6 @@ def __init__(self, path, prefix, bpe, audio, partition_epoch=None, fixed_random_
self.prefix = prefix
assert prefix in ["train", "dev", "eval"]
assert os.path.exists(path + "/train-clean-100")
import Util
Util.monkeyfix_glib()
Util.monkeypatch_audioread()
self.bpe = BytePairEncoding(**bpe)
self.labels = self.bpe.labels
self._fixed_random_seed = fixed_random_seed
Expand Down Expand Up @@ -1593,13 +1596,20 @@ def _collect_single_seq(self, seq_idx):
:param int seq_idx:
:rtype: DatasetSeq
"""
# Don't use librosa.load which internally uses audioread which would use Gstreamer as a backend,
# which has multiple issues:
# https://github.com/beetbox/audioread/issues/62
# https://github.com/beetbox/audioread/issues/63
# Instead, use PySoundFile, which is also faster. See here for discussions:
# https://github.com/beetbox/audioread/issues/64
# https://github.com/librosa/librosa/issues/681
import os
import librosa
import soundfile # pip install pysoundfile
subdir, speaker_id, chapter_id, seq_id = self._reference_seq_order[self._get_ref_seq_idx(seq_idx)]
audio_fn = "%(p)s/%(sd)s/%(sp)i/%(ch)i/%(sp)i-%(ch)i-%(i)04i.flac" % {
"p": self.path, "sd": subdir, "sp": speaker_id, "ch": chapter_id, "i": seq_id}
assert os.path.exists(audio_fn)
audio, sample_rate = librosa.load(audio_fn, sr=None)
audio, sample_rate = soundfile.read(audio_fn)
features = self.feature_extractor.get_audio_features(audio=audio, sample_rate=sample_rate)
targets_txt = self.transs[(subdir, speaker_id, chapter_id, seq_id)]
targets = numpy.array(self.bpe.get_seq(targets_txt), dtype="int32")
Expand Down
12 changes: 11 additions & 1 deletion Util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2570,8 +2570,8 @@ def monkeyfix_glib():
"""
Fixes some stupid bugs such that SIGINT is not working.
This is used by audioread, and indirectly by librosa for loading audio.
https://stackoverflow.com/questions/16410852/
See also :func:`monkeypatch_audioread`.
"""
try:
import gi
Expand All @@ -2593,6 +2593,16 @@ def monkeypatch_audioread():
audioread does not behave optimal in some cases.
E.g. each call to _ca_available() takes quite long because of the ctypes.util.find_library usage.
We will patch this.
However, the recommendation would be to not use audioread (librosa.load).
audioread uses Gstreamer as a backend by default currently (on Linux).
Gstreamer has multiple issues. See also :func:`monkeyfix_glib`, and here for discussion:
https://github.com/beetbox/audioread/issues/62
https://github.com/beetbox/audioread/issues/63
Instead, use PySoundFile, which is also faster. See here for discussions:
https://github.com/beetbox/audioread/issues/64
https://github.com/librosa/librosa/issues/681
"""
try:
import audioread
Expand Down

0 comments on commit 2f9a477

Please sign in to comment.