Remove some deprecated methods (#900)

Removes: * `compute_and_store_recordings()` * `mix_same_recording_channels()`
lhotse-speech · Nov 22, 2022 · 7909704 · 7909704
2 parents 4defcff + e59e140
commit 7909704
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 106 deletions.
diff --git a/lhotse/cut/base.py b/lhotse/cut/base.py
@@ -576,27 +576,6 @@ def index_supervisions(
                     )
         return indexed
 
-    @deprecated(
-        "Cut.compute_and_store_recording will be removed in a future release. Please use save_audio() instead."
-    )
-    def compute_and_store_recording(
-        self,
-        storage_path: Pathlike,
-        augment_fn: Optional[AugmentFn] = None,
-    ) -> "Cut":
-        """
-        Store this cut's waveform as audio recording to disk.
-
-        :param storage_path: The path to location where we will store the audio recordings.
-        :param augment_fn: an optional callable used for audio augmentation.
-            Be careful with the types of augmentations used: if they modify
-            the start/end/duration times of the cut and its supervisions,
-            you will end up with incorrect supervision information when using this API.
-            E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead.
-        :return: a new MonoCut instance.
-        """
-        return self.save_audio(storage_path=storage_path, augment_fn=augment_fn)
-
     def save_audio(
         self,
         storage_path: Pathlike,

diff --git a/lhotse/cut/set.py b/lhotse/cut/set.py
@@ -954,37 +954,6 @@ def trim_to_unsupervised_segments(self) -> "CutSet":
                 cuts.append(cut.truncate(offset=start, duration=end - start))
         return CutSet.from_cuts(cuts)
 
-    @deprecated(
-        "Cut.mix_same_recording_channels will be removed in a future release. Please use "
-        "`combine_same_recording_channels()` instead."
-    )
-    def mix_same_recording_channels(self) -> "CutSet":
-        """
-        Find cuts that come from the same recording and have matching start and end times, but
-        represent different channels. Then, mix them together (in matching groups) and return
-        a new ``CutSet`` that contains their mixes. This is useful for processing microphone array
-        recordings.
-
-        It is intended to be used as the first operation after creating a new ``CutSet`` (but
-        might also work in other circumstances, e.g. if it was cut to windows first).
-
-        Example:
-            >>> ami = prepare_ami('path/to/ami')
-            >>> cut_set = CutSet.from_manifests(recordings=ami['train']['recordings'])
-            >>> multi_channel_cut_set = cut_set.mix_same_recording_channels()
-
-        In the AMI example, the ``multi_channel_cut_set`` will yield MixedCuts that hold all single-channel
-        Cuts together.
-        """
-        if self.mixed_cuts:
-            raise ValueError(
-                "This operation is not applicable to CutSet's containing MixedCut's."
-            )
-        from cytoolz.itertoolz import groupby
-
-        groups = groupby(lambda cut: (cut.recording.id, cut.start, cut.end), self)
-        return CutSet.from_cuts(mix_cuts(cuts) for cuts in groups.values())
-
     def combine_same_recording_channels(self) -> "CutSet":
         """
         Find cuts that come from the same recording and have matching start and end times, but
@@ -1002,8 +971,6 @@ def combine_same_recording_channels(self) -> "CutSet":
 
         In the AMI example, the ``multi_channel_cut_set`` will yield MultiCuts that hold all single-channel
         Cuts together.
-
-        .. note:: See also :func:`CutSet.mix_same_recording_channels`, which is now deprecated.
         """
         if self.mixed_cuts or self.multi_cuts:
             raise ValueError(
@@ -1012,7 +979,7 @@ def combine_same_recording_channels(self) -> "CutSet":
         from cytoolz.itertoolz import groupby
 
         groups = groupby(lambda cut: (cut.recording.id, cut.start, cut.end), self)
-        return CutSet.from_cuts(MultiCut.from_mono(cuts) for cuts in groups.values())
+        return CutSet.from_cuts(MultiCut.from_mono(*cuts) for cuts in groups.values())
 
     def sort_by_duration(self, ascending: bool = False) -> "CutSet":
         """
@@ -1865,48 +1832,6 @@ def compute_and_store_features_batch(
         # otherwise everything is in memory.
         return cuts_writer.open_manifest()
 
-    @deprecated(
-        "CutSet.compute_and_store_recordings will be removed in a future release. Please use save_audios() instead."
-    )
-    def compute_and_store_recordings(
-        self,
-        storage_path: Pathlike,
-        num_jobs: Optional[int] = None,
-        executor: Optional[Executor] = None,
-        augment_fn: Optional[AugmentFn] = None,
-        progress_bar: bool = True,
-    ) -> "CutSet":
-        """
-        Store waveforms of all cuts as audio recordings to disk.
-
-        :param storage_path: The path to location where we will store the audio recordings.
-            For each cut, a sub-directory will be created that starts with the first 3
-            characters of the cut's ID. The audio recording is then stored in the sub-directory
-            using the cut ID as filename and '.flac' as suffix.
-        :param num_jobs: The number of parallel processes used to store the audio recordings.
-            We will internally split the CutSet into this many chunks
-            and process each chunk in parallel.
-        :param augment_fn: an optional callable used for audio augmentation.
-            Be careful with the types of augmentations used: if they modify
-            the start/end/duration times of the cut and its supervisions,
-            you will end up with incorrect supervision information when using this API.
-            E.g. for speed perturbation, use ``CutSet.perturb_speed()`` instead.
-        :param executor: when provided, will be used to parallelize the process.
-            By default, we will instantiate a ProcessPoolExecutor.
-            Learn more about the ``Executor`` API at
-            https://lhotse.readthedocs.io/en/latest/parallelism.html
-        :param progress_bar: Should a progress bar be displayed (automatically turned off
-            for parallel computation).
-        :return: Returns a new ``CutSet``.
-        """
-        return self.save_audios(
-            storage_path,
-            num_jobs=num_jobs,
-            executor=executor,
-            augment_fn=augment_fn,
-            progress_bar=progress_bar,
-        )
-
     def save_audios(
         self,
         storage_path: Pathlike,

diff --git a/test/cut/test_cut_set.py b/test/cut/test_cut_set.py
@@ -17,7 +17,7 @@
     load_manifest,
 )
 from lhotse.audio import AudioSource
-from lhotse.cut import CutSet, MixedCut, MixTrack, MonoCut
+from lhotse.cut import CutSet, MixedCut, MixTrack, MonoCut, MultiCut
 from lhotse.serialization import load_jsonl
 from lhotse.testing.dummies import (
     DummyManifest,
@@ -415,7 +415,7 @@ def test_mixed_cut_set_prefix(cut_with_relative_paths):
             assert t.cut.features.storage_path == "/data/storage_dir"
 
 
-def test_mix_same_recording_channels():
+def test_combine_same_recording_channels():
     recording = Recording(
         "rec",
         sampling_rate=8000,
@@ -433,14 +433,12 @@ def test_mix_same_recording_channels():
         ]
     )
 
-    mixed = cut_set.mix_same_recording_channels()
-    assert len(mixed) == 1
+    multi = cut_set.combine_same_recording_channels()
+    assert len(multi) == 1
 
-    cut = mixed[0]
-    assert isinstance(cut, MixedCut)
-    assert len(cut.tracks) == 2
-    assert cut.tracks[0].cut == cut_set[0]
-    assert cut.tracks[1].cut == cut_set[1]
+    cut = multi[0]
+    assert isinstance(cut, MultiCut)
+    assert cut.num_channels == 2
 
 
 def test_cut_set_filter_supervisions(cut_set):