diff --git a/lhotse/cut/base.py b/lhotse/cut/base.py index 98b2f5f2c..752f7c2ee 100644 --- a/lhotse/cut/base.py +++ b/lhotse/cut/base.py @@ -576,27 +576,6 @@ def index_supervisions( ) return indexed - @deprecated( - "Cut.compute_and_store_recording will be removed in a future release. Please use save_audio() instead." - ) - def compute_and_store_recording( - self, - storage_path: Pathlike, - augment_fn: Optional[AugmentFn] = None, - ) -> "Cut": - """ - Store this cut's waveform as audio recording to disk. - - :param storage_path: The path to location where we will store the audio recordings. - :param augment_fn: an optional callable used for audio augmentation. - Be careful with the types of augmentations used: if they modify - the start/end/duration times of the cut and its supervisions, - you will end up with incorrect supervision information when using this API. - E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead. - :return: a new MonoCut instance. - """ - return self.save_audio(storage_path=storage_path, augment_fn=augment_fn) - def save_audio( self, storage_path: Pathlike, diff --git a/lhotse/cut/set.py b/lhotse/cut/set.py index 7ea535096..c8c3a9bc0 100644 --- a/lhotse/cut/set.py +++ b/lhotse/cut/set.py @@ -954,37 +954,6 @@ def trim_to_unsupervised_segments(self) -> "CutSet": cuts.append(cut.truncate(offset=start, duration=end - start)) return CutSet.from_cuts(cuts) - @deprecated( - "Cut.mix_same_recording_channels will be removed in a future release. Please use " - "`combine_same_recording_channels()` instead." - ) - def mix_same_recording_channels(self) -> "CutSet": - """ - Find cuts that come from the same recording and have matching start and end times, but - represent different channels. Then, mix them together (in matching groups) and return - a new ``CutSet`` that contains their mixes. This is useful for processing microphone array - recordings. - - It is intended to be used as the first operation after creating a new ``CutSet`` (but - might also work in other circumstances, e.g. if it was cut to windows first). - - Example: - >>> ami = prepare_ami('path/to/ami') - >>> cut_set = CutSet.from_manifests(recordings=ami['train']['recordings']) - >>> multi_channel_cut_set = cut_set.mix_same_recording_channels() - - In the AMI example, the ``multi_channel_cut_set`` will yield MixedCuts that hold all single-channel - Cuts together. - """ - if self.mixed_cuts: - raise ValueError( - "This operation is not applicable to CutSet's containing MixedCut's." - ) - from cytoolz.itertoolz import groupby - - groups = groupby(lambda cut: (cut.recording.id, cut.start, cut.end), self) - return CutSet.from_cuts(mix_cuts(cuts) for cuts in groups.values()) - def combine_same_recording_channels(self) -> "CutSet": """ Find cuts that come from the same recording and have matching start and end times, but @@ -1002,8 +971,6 @@ def combine_same_recording_channels(self) -> "CutSet": In the AMI example, the ``multi_channel_cut_set`` will yield MultiCuts that hold all single-channel Cuts together. - - .. note:: See also :func:`CutSet.mix_same_recording_channels`, which is now deprecated. """ if self.mixed_cuts or self.multi_cuts: raise ValueError( @@ -1012,7 +979,7 @@ def combine_same_recording_channels(self) -> "CutSet": from cytoolz.itertoolz import groupby groups = groupby(lambda cut: (cut.recording.id, cut.start, cut.end), self) - return CutSet.from_cuts(MultiCut.from_mono(cuts) for cuts in groups.values()) + return CutSet.from_cuts(MultiCut.from_mono(*cuts) for cuts in groups.values()) def sort_by_duration(self, ascending: bool = False) -> "CutSet": """ @@ -1865,48 +1832,6 @@ def compute_and_store_features_batch( # otherwise everything is in memory. return cuts_writer.open_manifest() - @deprecated( - "CutSet.compute_and_store_recordings will be removed in a future release. Please use save_audios() instead." - ) - def compute_and_store_recordings( - self, - storage_path: Pathlike, - num_jobs: Optional[int] = None, - executor: Optional[Executor] = None, - augment_fn: Optional[AugmentFn] = None, - progress_bar: bool = True, - ) -> "CutSet": - """ - Store waveforms of all cuts as audio recordings to disk. - - :param storage_path: The path to location where we will store the audio recordings. - For each cut, a sub-directory will be created that starts with the first 3 - characters of the cut's ID. The audio recording is then stored in the sub-directory - using the cut ID as filename and '.flac' as suffix. - :param num_jobs: The number of parallel processes used to store the audio recordings. - We will internally split the CutSet into this many chunks - and process each chunk in parallel. - :param augment_fn: an optional callable used for audio augmentation. - Be careful with the types of augmentations used: if they modify - the start/end/duration times of the cut and its supervisions, - you will end up with incorrect supervision information when using this API. - E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead. - :param executor: when provided, will be used to parallelize the process. - By default, we will instantiate a ProcessPoolExecutor. - Learn more about the ``Executor`` API at - https://lhotse.readthedocs.io/en/latest/parallelism.html - :param progress_bar: Should a progress bar be displayed (automatically turned off - for parallel computation). - :return: Returns a new ``CutSet``. - """ - return self.save_audios( - storage_path, - num_jobs=num_jobs, - executor=executor, - augment_fn=augment_fn, - progress_bar=progress_bar, - ) - def save_audios( self, storage_path: Pathlike, diff --git a/test/cut/test_cut_set.py b/test/cut/test_cut_set.py index cf5fda803..65be02514 100644 --- a/test/cut/test_cut_set.py +++ b/test/cut/test_cut_set.py @@ -17,7 +17,7 @@ load_manifest, ) from lhotse.audio import AudioSource -from lhotse.cut import CutSet, MixedCut, MixTrack, MonoCut +from lhotse.cut import CutSet, MixedCut, MixTrack, MonoCut, MultiCut from lhotse.serialization import load_jsonl from lhotse.testing.dummies import ( DummyManifest, @@ -415,7 +415,7 @@ def test_mixed_cut_set_prefix(cut_with_relative_paths): assert t.cut.features.storage_path == "/data/storage_dir" -def test_mix_same_recording_channels(): +def test_combine_same_recording_channels(): recording = Recording( "rec", sampling_rate=8000, @@ -433,14 +433,12 @@ def test_mix_same_recording_channels(): ] ) - mixed = cut_set.mix_same_recording_channels() - assert len(mixed) == 1 + multi = cut_set.combine_same_recording_channels() + assert len(multi) == 1 - cut = mixed[0] - assert isinstance(cut, MixedCut) - assert len(cut.tracks) == 2 - assert cut.tracks[0].cut == cut_set[0] - assert cut.tracks[1].cut == cut_set[1] + cut = multi[0] + assert isinstance(cut, MultiCut) + assert cut.num_channels == 2 def test_cut_set_filter_supervisions(cut_set):