Skip to content

Commit

Permalink
Remove some deprecated methods (#900)
Browse files Browse the repository at this point in the history
Removes:
* `compute_and_store_recordings()`
* `mix_same_recording_channels()`
  • Loading branch information
desh2608 authored Nov 22, 2022
2 parents 4defcff + e59e140 commit 7909704
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 106 deletions.
21 changes: 0 additions & 21 deletions lhotse/cut/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,27 +576,6 @@ def index_supervisions(
)
return indexed

@deprecated(
"Cut.compute_and_store_recording will be removed in a future release. Please use save_audio() instead."
)
def compute_and_store_recording(
self,
storage_path: Pathlike,
augment_fn: Optional[AugmentFn] = None,
) -> "Cut":
"""
Store this cut's waveform as audio recording to disk.
:param storage_path: The path to location where we will store the audio recordings.
:param augment_fn: an optional callable used for audio augmentation.
Be careful with the types of augmentations used: if they modify
the start/end/duration times of the cut and its supervisions,
you will end up with incorrect supervision information when using this API.
E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead.
:return: a new MonoCut instance.
"""
return self.save_audio(storage_path=storage_path, augment_fn=augment_fn)

def save_audio(
self,
storage_path: Pathlike,
Expand Down
77 changes: 1 addition & 76 deletions lhotse/cut/set.py
Original file line number Diff line number Diff line change
Expand Up @@ -954,37 +954,6 @@ def trim_to_unsupervised_segments(self) -> "CutSet":
cuts.append(cut.truncate(offset=start, duration=end - start))
return CutSet.from_cuts(cuts)

@deprecated(
"Cut.mix_same_recording_channels will be removed in a future release. Please use "
"`combine_same_recording_channels()` instead."
)
def mix_same_recording_channels(self) -> "CutSet":
"""
Find cuts that come from the same recording and have matching start and end times, but
represent different channels. Then, mix them together (in matching groups) and return
a new ``CutSet`` that contains their mixes. This is useful for processing microphone array
recordings.
It is intended to be used as the first operation after creating a new ``CutSet`` (but
might also work in other circumstances, e.g. if it was cut to windows first).
Example:
>>> ami = prepare_ami('path/to/ami')
>>> cut_set = CutSet.from_manifests(recordings=ami['train']['recordings'])
>>> multi_channel_cut_set = cut_set.mix_same_recording_channels()
In the AMI example, the ``multi_channel_cut_set`` will yield MixedCuts that hold all single-channel
Cuts together.
"""
if self.mixed_cuts:
raise ValueError(
"This operation is not applicable to CutSet's containing MixedCut's."
)
from cytoolz.itertoolz import groupby

groups = groupby(lambda cut: (cut.recording.id, cut.start, cut.end), self)
return CutSet.from_cuts(mix_cuts(cuts) for cuts in groups.values())

def combine_same_recording_channels(self) -> "CutSet":
"""
Find cuts that come from the same recording and have matching start and end times, but
Expand All @@ -1002,8 +971,6 @@ def combine_same_recording_channels(self) -> "CutSet":
In the AMI example, the ``multi_channel_cut_set`` will yield MultiCuts that hold all single-channel
Cuts together.
.. note:: See also :func:`CutSet.mix_same_recording_channels`, which is now deprecated.
"""
if self.mixed_cuts or self.multi_cuts:
raise ValueError(
Expand All @@ -1012,7 +979,7 @@ def combine_same_recording_channels(self) -> "CutSet":
from cytoolz.itertoolz import groupby

groups = groupby(lambda cut: (cut.recording.id, cut.start, cut.end), self)
return CutSet.from_cuts(MultiCut.from_mono(cuts) for cuts in groups.values())
return CutSet.from_cuts(MultiCut.from_mono(*cuts) for cuts in groups.values())

def sort_by_duration(self, ascending: bool = False) -> "CutSet":
"""
Expand Down Expand Up @@ -1865,48 +1832,6 @@ def compute_and_store_features_batch(
# otherwise everything is in memory.
return cuts_writer.open_manifest()

@deprecated(
"CutSet.compute_and_store_recordings will be removed in a future release. Please use save_audios() instead."
)
def compute_and_store_recordings(
self,
storage_path: Pathlike,
num_jobs: Optional[int] = None,
executor: Optional[Executor] = None,
augment_fn: Optional[AugmentFn] = None,
progress_bar: bool = True,
) -> "CutSet":
"""
Store waveforms of all cuts as audio recordings to disk.
:param storage_path: The path to location where we will store the audio recordings.
For each cut, a sub-directory will be created that starts with the first 3
characters of the cut's ID. The audio recording is then stored in the sub-directory
using the cut ID as filename and '.flac' as suffix.
:param num_jobs: The number of parallel processes used to store the audio recordings.
We will internally split the CutSet into this many chunks
and process each chunk in parallel.
:param augment_fn: an optional callable used for audio augmentation.
Be careful with the types of augmentations used: if they modify
the start/end/duration times of the cut and its supervisions,
you will end up with incorrect supervision information when using this API.
E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead.
:param executor: when provided, will be used to parallelize the process.
By default, we will instantiate a ProcessPoolExecutor.
Learn more about the ``Executor`` API at
https://lhotse.readthedocs.io/en/latest/parallelism.html
:param progress_bar: Should a progress bar be displayed (automatically turned off
for parallel computation).
:return: Returns a new ``CutSet``.
"""
return self.save_audios(
storage_path,
num_jobs=num_jobs,
executor=executor,
augment_fn=augment_fn,
progress_bar=progress_bar,
)

def save_audios(
self,
storage_path: Pathlike,
Expand Down
16 changes: 7 additions & 9 deletions test/cut/test_cut_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
load_manifest,
)
from lhotse.audio import AudioSource
from lhotse.cut import CutSet, MixedCut, MixTrack, MonoCut
from lhotse.cut import CutSet, MixedCut, MixTrack, MonoCut, MultiCut
from lhotse.serialization import load_jsonl
from lhotse.testing.dummies import (
DummyManifest,
Expand Down Expand Up @@ -415,7 +415,7 @@ def test_mixed_cut_set_prefix(cut_with_relative_paths):
assert t.cut.features.storage_path == "/data/storage_dir"


def test_mix_same_recording_channels():
def test_combine_same_recording_channels():
recording = Recording(
"rec",
sampling_rate=8000,
Expand All @@ -433,14 +433,12 @@ def test_mix_same_recording_channels():
]
)

mixed = cut_set.mix_same_recording_channels()
assert len(mixed) == 1
multi = cut_set.combine_same_recording_channels()
assert len(multi) == 1

cut = mixed[0]
assert isinstance(cut, MixedCut)
assert len(cut.tracks) == 2
assert cut.tracks[0].cut == cut_set[0]
assert cut.tracks[1].cut == cut_set[1]
cut = multi[0]
assert isinstance(cut, MultiCut)
assert cut.num_channels == 2


def test_cut_set_filter_supervisions(cut_set):
Expand Down

0 comments on commit 7909704

Please sign in to comment.