Skip to content

Commit

Permalink
fix: format
Browse files Browse the repository at this point in the history
  • Loading branch information
yuta0306 committed Dec 12, 2024
1 parent 0ef24bf commit f01c868
Showing 1 changed file with 4 additions and 6 deletions.
10 changes: 4 additions & 6 deletions lhotse/recipes/reazonspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ def download_reazonspeech(
:return: the path to downloaded data and the JSON file.
"""
if is_module_available("datasets"):
from datasets import load_dataset, Audio
import soundfile as sf
from datasets import Audio, load_dataset
else:
raise ImportError(
"To process the ReazonSpeech corpus, please install optional dependencies: pip install datasets soundfile"
Expand All @@ -106,21 +106,19 @@ def download_reazonspeech(
cache_dir=corpus_dir,
num_proc=num_jobs,
)["train"]

# Prepare data for JSON export
def format_example(example: dict, idx: int) -> dict:
example["id"] = str(idx)
example["audio_filepath"] = example["audio"]["path"]
example["text"] = normalize(example["transcription"])
example["duration"] = sf.info(
example["audio"]["path"]
).duration
example["duration"] = sf.info(example["audio"]["path"]).duration
return example

ds = ds.cast_column("audio", Audio(decode=True)) # Hack: don't decode to speedup
ds = ds.map(
format_example,
with_indices=True,
with_indices=True,
remove_columns=ds.column_names,
num_proc=num_jobs,
)
Expand Down

0 comments on commit f01c868

Please sign in to comment.