Skip to content

Commit

Permalink
Fixes for #1152 #1153 and #1154 (#1156)
Browse files Browse the repository at this point in the history
* Tutorial materials in main readme page

* Fixes for #1152 #1153 and #1154

* Fix isinstance use in Python 3.7-3.9
  • Loading branch information
pzelasko authored Sep 18, 2023
1 parent 567ba29 commit 3dde48d
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 15 deletions.
14 changes: 10 additions & 4 deletions lhotse/audio/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,7 +872,7 @@ def read_opus_ffmpeg(
if duration is not None:
cmd += f" -t {duration}"
# Add the input specifier after offset and duration.
cmd += f" -i {path}"
cmd += f" -i '{path}'"
# Optionally resample the output.
if force_opus_sampling_rate is not None:
cmd += f" -ar {force_opus_sampling_rate}"
Expand Down Expand Up @@ -1028,22 +1028,28 @@ def read_audio(


def info(
path: Pathlike,
path: Union[Pathlike, BytesIO],
force_opus_sampling_rate: Optional[int] = None,
force_read_audio: bool = False,
) -> LibsndfileCompatibleAudioInfo:

is_path = isinstance(path, (Path, str))

if force_read_audio:
# This is a reliable fallback for situations when the user knows that audio files do not
# have duration metadata in their headers.
# We will use "audioread" backend that spawns an ffmpeg process, reads the audio,
# and computes the duration.
assert (
is_path
), f"info(obj, force_read_audio=True) is not supported for object of type: {type(path)}"
return audioread_info(str(path))

if path.suffix.lower() == ".opus":
if is_path and Path(path).suffix.lower() == ".opus":
# We handle OPUS as a special case because we might need to force a certain sampling rate.
return opus_info(path, force_opus_sampling_rate=force_opus_sampling_rate)

elif path.suffix.lower() == ".sph":
if is_path and Path(path).suffix.lower() == ".sph":
# We handle SPHERE as another special case because some old codecs (i.e. "shorten" codec)
# can't be handled by neither pysoundfile nor pyaudioread.
return sph_info(path)
Expand Down
36 changes: 30 additions & 6 deletions lhotse/bin/modes/manipulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,22 @@ def copy_feats_worker(
@click.option(
"--pad/--no-pad",
default=True,
help="Whether to pad the split output idx with zeros (e.g. 01, 02, .., 10).",
help="Whether to pad the split output idx with zeros (e.g. 00, 01, 02, .., 10).",
)
@click.option(
"-i",
"--start-idx",
type=int,
default=0,
help="Count splits starting from this index.",
)
def split(
num_splits: int, manifest: Pathlike, output_dir: Pathlike, shuffle: bool, pad: bool
num_splits: int,
manifest: Pathlike,
output_dir: Pathlike,
shuffle: bool,
pad: bool,
start_idx: int,
):
"""
Load MANIFEST, split it into NUM_SPLITS equal parts and save as separate manifests in OUTPUT_DIR.
Expand All @@ -161,8 +173,8 @@ def split(
parts = any_set.split(num_splits=num_splits, shuffle=shuffle)
output_dir.mkdir(parents=True, exist_ok=True)
num_digits = len(str(num_splits))
for idx, part in enumerate(parts):
idx = f"{idx + 1}".zfill(num_digits) if pad else str(idx + 1)
for idx, part in enumerate(parts, start=start_idx):
idx = f"{idx}".zfill(num_digits) if pad else str(idx)
part.to_file((output_dir / manifest.stem).with_suffix(f".{idx}{suffix}"))


Expand All @@ -172,7 +184,16 @@ def split(
)
@click.argument("output_dir", type=click.Path(allow_dash=True))
@click.argument("chunk_size", type=int)
def split_lazy(manifest: Pathlike, output_dir: Pathlike, chunk_size: int):
@click.option(
"-i",
"--start-idx",
type=int,
default=0,
help="Count splits starting from this index.",
)
def split_lazy(
manifest: Pathlike, output_dir: Pathlike, chunk_size: int, start_idx: int
):
"""
Load MANIFEST (lazily if in JSONL format) and split it into parts,
each with CHUNK_SIZE items.
Expand All @@ -187,7 +208,10 @@ def split_lazy(manifest: Pathlike, output_dir: Pathlike, chunk_size: int):
manifest = Path(manifest)
any_set = load_manifest_lazy_or_eager(manifest)
any_set.split_lazy(
output_dir=output_dir, chunk_size=chunk_size, prefix=manifest.stem
output_dir=output_dir,
chunk_size=chunk_size,
prefix=manifest.stem,
start_idx=start_idx,
)


Expand Down
13 changes: 11 additions & 2 deletions lhotse/cut/set.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,10 @@ def total_duration_(segments: List[TimeSpan]) -> float:
print(tabulate(speaker_stats, headers="firstrow", tablefmt="fancy_grid"))

def split(
self, num_splits: int, shuffle: bool = False, drop_last: bool = False
self,
num_splits: int,
shuffle: bool = False,
drop_last: bool = False,
) -> List["CutSet"]:
"""
Split the :class:`~lhotse.CutSet` into ``num_splits`` pieces of equal size.
Expand All @@ -1000,7 +1003,10 @@ def split(
return [
CutSet.from_cuts(subset)
for subset in split_sequence(
self, num_splits=num_splits, shuffle=shuffle, drop_last=drop_last
self,
num_splits=num_splits,
shuffle=shuffle,
drop_last=drop_last,
)
]

Expand All @@ -1010,6 +1016,7 @@ def split_lazy(
chunk_size: int,
prefix: str = "",
num_digits: int = 8,
start_idx: int = 0,
) -> List["CutSet"]:
"""
Splits a manifest (either lazily or eagerly opened) into chunks, each
Expand All @@ -1027,6 +1034,7 @@ def split_lazy(
:param chunk_size: the number of items in each chunk.
:param prefix: the prefix of each manifest.
:param num_digits: the width of ``split_idx``, which will be left padded with zeros to achieve it.
:param start_idx: The split index to start counting from (default is ``0``).
:return: a list of lazily opened chunk manifests.
"""
return split_manifest_lazy(
Expand All @@ -1035,6 +1043,7 @@ def split_lazy(
chunk_size=chunk_size,
prefix=prefix,
num_digits=num_digits,
start_idx=start_idx,
)

def subset(
Expand Down
4 changes: 3 additions & 1 deletion lhotse/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def split_manifest_lazy(
chunk_size: int,
prefix: str = "",
num_digits: int = 8,
start_idx: int = 0,
) -> List:
"""
Splits a manifest (either lazily or eagerly opened) into chunks, each
Expand All @@ -297,6 +298,7 @@ def split_manifest_lazy(
:param chunk_size: the number of items in each chunk.
:param prefix: the prefix of each manifest.
:param num_digits: the width of ``split_idx``, which will be left padded with zeros to achieve it.
:param start_idx: The split index to start counting from (default is ``0``).
:return: a list of lazily opened chunk manifests.
"""
from lhotse.serialization import SequentialJsonlWriter
Expand All @@ -308,7 +310,7 @@ def split_manifest_lazy(
prefix = "split"

items = iter(it)
split_idx = 0
split_idx = start_idx
splits = []
while True:
try:
Expand Down
34 changes: 32 additions & 2 deletions test/audio/test_audio_reads.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from tempfile import NamedTemporaryFile
import shutil
from io import BytesIO
from pathlib import Path
from tempfile import NamedTemporaryFile, TemporaryDirectory

import numpy as np
import pytest
Expand All @@ -7,7 +10,12 @@

import lhotse
from lhotse import AudioSource, Recording
from lhotse.audio.backend import read_opus_ffmpeg, read_opus_torchaudio, torchaudio_load
from lhotse.audio.backend import (
info,
read_opus_ffmpeg,
read_opus_torchaudio,
torchaudio_load,
)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -78,6 +86,14 @@ def test_resample_opus():
r1.load_audio()


def test_opus_name_with_whitespaces():
with TemporaryDirectory() as d:
path_with_ws = Path(d) / "white space.opus"
shutil.copy("test/fixtures/mono_c0.opus", path_with_ws)
r = Recording.from_file(path_with_ws)
r.load_audio() # does not raise


@pytest.mark.parametrize(
"path",
[
Expand Down Expand Up @@ -223,3 +239,17 @@ def test_audio_loading_optimization_returns_expected_num_samples():
cut.duration = reduced_num_samples / cut.sampling_rate
audio = cut.load_audio()
assert audio.shape[1] == reduced_num_samples


def test_audio_info_from_bytes_io():
audio_filelike = BytesIO(open("test/fixtures/mono_c0.wav", "rb").read())

meta = info(audio_filelike)
assert meta.duration == 0.5
assert meta.frames == 4000
assert meta.samplerate == 8000
assert meta.channels == 1

with pytest.raises(AssertionError):
# force_read_audio won't work with a filelike object
assert info(audio_filelike, force_read_audio=True)

0 comments on commit 3dde48d

Please sign in to comment.