Skip to content

Commit a7303fc

Browse files
authored
add dataset-parts argument to libritts (#956)
2 parents c1a1268 + 4ec92d0 commit a7303fc

File tree

2 files changed

+36
-3
lines changed

2 files changed

+36
-3
lines changed

lhotse/bin/modes/recipes/libritts.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Sequence
2+
13
import click
24

35
from lhotse.bin.modes import download, prepare
@@ -27,9 +29,19 @@
2729
"66% of utterances have previous utterance."
2830
),
2931
)
32+
@click.option(
33+
"-p",
34+
"--dataset-parts",
35+
type=str,
36+
default=["auto"],
37+
multiple=True,
38+
help="List of dataset parts to prepare. To prepare multiple parts, pass each with `-p` "
39+
"Example: `-p train-clean-360 -p dev-other`",
40+
)
3041
def libritts(
3142
corpus_dir: Pathlike,
3243
output_dir: Pathlike,
44+
dataset_parts: Sequence[str],
3345
num_jobs: int,
3446
link_previous_utterance: bool,
3547
):
@@ -38,14 +50,25 @@ def libritts(
3850
corpus_dir,
3951
output_dir=output_dir,
4052
num_jobs=num_jobs,
53+
dataset_parts=dataset_parts,
4154
link_previous_utt=link_previous_utterance,
4255
)
4356

4457

4558
@download.command(context_settings=dict(show_default=True))
4659
@click.argument("target_dir", type=click.Path())
60+
@click.option(
61+
"-p",
62+
"--dataset-parts",
63+
type=str,
64+
default=["auto"],
65+
multiple=True,
66+
help="List of dataset parts to download. To prepare multiple parts, pass each with `-p` "
67+
"Example: `-p train-clean-360 -p dev-other`",
68+
)
4769
def libritts(
4870
target_dir: Pathlike,
71+
dataset_parts: Sequence[str],
4972
):
5073
"""LibriTTS data download."""
51-
download_libritts(target_dir)
74+
download_libritts(target_dir, dataset_parts=dataset_parts)

lhotse/workflows/whisper.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,21 @@ def annotate_with_whisper(
5252

5353
if isinstance(manifest, RecordingSet):
5454
yield from _annotate_recordings(
55-
manifest, model_name, device, force_nonoverlapping, download_root, **decode_options
55+
manifest,
56+
model_name,
57+
device,
58+
force_nonoverlapping,
59+
download_root,
60+
**decode_options,
5661
)
5762
elif isinstance(manifest, CutSet):
5863
yield from _annotate_cuts(
59-
manifest, model_name, device, force_nonoverlapping, download_root, **decode_options
64+
manifest,
65+
model_name,
66+
device,
67+
force_nonoverlapping,
68+
download_root,
69+
**decode_options,
6070
)
6171
else:
6272
raise ValueError("The ``manifest`` must be either a RecordingSet or a CutSet.")

0 commit comments

Comments
 (0)