Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* Adds fleurs recipe

* Black formatting

* Removes useless num_jobs argument in the download cli, and ran isort and black again on *recipes/fleurs.py

* Removes what appears to be an unnecessary set_ffmpeg_torchaudio_info call

* isort and black fix

* Fixes remaining black issues due to trailing space in recipes/__init__.py

* Adds FLEURS entry in docs/corpus.rst
  • Loading branch information
m-wiesner authored and Your Name committed Jan 7, 2025
1 parent eeafc82 commit 0545b7b
Show file tree
Hide file tree
Showing 5 changed files with 484 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/corpus.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ a CLI tool that create the manifests given a corpus directory.
- :func:`lhotse.recipes.prepare_fisher_english`
* - Fisher Spanish
- :func:`lhotse.recipes.prepare_fisher_spanish`
* - FLEURS
- :func:`lhotse.recipes.prepare_fleurs`
* - Fluent Speech Commands
- :func:`lhotse.recipes.slu`
* - GALE Arabic Broadcast Speech
Expand Down
1 change: 1 addition & 0 deletions lhotse/bin/modes/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from .eval2000 import *
from .fisher_english import *
from .fisher_spanish import *
from .fleurs import *
from .gale_arabic import *
from .gale_mandarin import *
from .gigaspeech import *
Expand Down
68 changes: 68 additions & 0 deletions lhotse/bin/modes/recipes/fleurs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import Optional, Sequence, Union

import click

from lhotse.bin.modes import download, prepare
from lhotse.recipes.fleurs import download_fleurs, prepare_fleurs
from lhotse.utils import Pathlike

__all__ = ["fleurs"]


@prepare.command(context_settings=dict(show_default=True))
@click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
@click.argument("output_dir", type=click.Path())
@click.option(
"-j",
"--num-jobs",
type=int,
default=1,
help="How many threads to use (can give good speed-ups with slow disks).",
)
@click.option(
"-l",
"--lang",
multiple=True,
default=["all"],
help="Specify which languages to prepare, e.g., "
" lhoste prepare librispeech mtedx_corpus data -l de -l fr -l es ",
)
def fleurs(
corpus_dir: Pathlike,
output_dir: Pathlike,
num_jobs: int,
lang: Optional[Union[str, Sequence[str]]],
):
"""Fleurs ASR data preparation."""
prepare_fleurs(corpus_dir, output_dir=output_dir, num_jobs=num_jobs, languages=lang)


@download.command(context_settings=dict(show_default=True))
@click.argument("target_dir", type=click.Path())
@click.option(
"-l",
"--lang",
multiple=True,
default=["all"],
help="Specify which languages to download, e.g., "
" lhotse download fleurs . -l hi_in -l en_us "
" lhotse download fleurs",
)
@click.option(
"--force-download",
type=bool,
is_flag=True,
default=False,
help="Specify whether to overwrite an existing archive",
)
def fleurs(
target_dir: Pathlike,
lang: Optional[Union[str, Sequence[str]]],
force_download: bool = False,
):
"""FLEURS download."""
download_fleurs(
target_dir,
languages=lang,
force_download=force_download,
)
3 changes: 3 additions & 0 deletions lhotse/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from .eval2000 import prepare_eval2000
from .fisher_english import prepare_fisher_english
from .fisher_spanish import prepare_fisher_spanish
from .fleurs import download_fleurs, prepare_fleurs
from .gale_arabic import prepare_gale_arabic
from .gale_mandarin import prepare_gale_mandarin
from .gigaspeech import prepare_gigaspeech
Expand Down Expand Up @@ -147,6 +148,8 @@
"prepare_eval2000",
"prepare_fisher_english",
"prepare_fisher_spanish",
"download_fleurs",
"prepare_fleurs",
"prepare_gale_arabic",
"prepare_gale_mandarin",
"prepare_gigaspeech",
Expand Down
Loading

0 comments on commit 0545b7b

Please sign in to comment.