Skip to content

Commit

Permalink
Add fix_manifests in all recipes (#1128)
Browse files Browse the repository at this point in the history
* add transform attribute for MixedCut

* add mix_first option in normalize_loudness

* handle the case when mix is called on MixedCut with existing transforms

* add test for mixing with transformed MixedCut

* enhancements and bug fixes

* small changes in some cutset methods

* small fix in error message

* return word alignments from ami recipe

* add word alignments for ICSI

* remove unwanted whitespace

* fix IHM preparation

* remove words with zero or negative duration

* ensure word alignments respect segment boundary

* add save-to-wav option for icsi

* add test for mixing cut with recording

* style fix

* add data prep for voxpopuli

* add fix_manifests for all recipes
  • Loading branch information
desh2608 authored Aug 24, 2023
1 parent c6fa990 commit 6914818
Show file tree
Hide file tree
Showing 56 changed files with 273 additions and 140 deletions.
2 changes: 2 additions & 0 deletions lhotse/recipes/adept.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
SupervisionSet,
validate_recordings_and_supervisions,
)
from lhotse.qa import fix_manifests
from lhotse.utils import Pathlike, resumable_download

ADEPT_URL = "https://zenodo.org/record/5117102/files/ADEPT.zip"
Expand Down Expand Up @@ -140,6 +141,7 @@ def prepare_adept(
)

supervisions = SupervisionSet.from_segments(supervisions)
recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

if output_dir is not None:
Expand Down
2 changes: 2 additions & 0 deletions lhotse/recipes/aidatatang_200zh.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, resumable_download, safe_extract

Expand Down Expand Up @@ -135,6 +136,7 @@ def prepare_aidatatang_200zh(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
2 changes: 2 additions & 0 deletions lhotse/recipes/aishell.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, resumable_download, safe_extract

Expand Down Expand Up @@ -140,6 +141,7 @@ def prepare_aishell(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
4 changes: 3 additions & 1 deletion lhotse/recipes/aishell2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike

Expand Down Expand Up @@ -73,7 +74,7 @@ def text_normalize(line: str) -> str:
IC0975W0451 明年二月底小成
ID0114W0368 我感觉就是在不断拉抽屉
ID0115W0198 我公司员工不存在持有和泰创投股份的情况
"""
new_line = []
line = list(line)
Expand Down Expand Up @@ -161,6 +162,7 @@ def prepare_aishell2(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
3 changes: 2 additions & 1 deletion lhotse/recipes/aishell3.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
validate_recordings_and_supervisions,
)
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.recipes.utils import manifests_exist, read_manifests_if_cached
from lhotse.utils import Pathlike, resumable_download, safe_extract

Expand Down Expand Up @@ -159,7 +160,7 @@ def prepare_aishell3(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)

recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
2 changes: 2 additions & 0 deletions lhotse/recipes/aishell4.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, is_module_available, resumable_download, safe_extract

Expand Down Expand Up @@ -174,6 +175,7 @@ def prepare_aishell4(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
5 changes: 3 additions & 2 deletions lhotse/recipes/ali_meeting.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@

from tqdm import tqdm

from lhotse import fix_manifests, validate_recordings_and_supervisions
from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.recipes.utils import normalize_text_alimeeting
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, is_module_available, resumable_download, safe_extract
Expand Down Expand Up @@ -204,11 +205,11 @@ def prepare_ali_meeting(
)
supervisions.append(segment)

# Fix manifests
recording_set, supervision_set = fix_manifests(
RecordingSet.from_recordings(recordings),
SupervisionSet.from_segments(supervisions),
)
# Fix manifests
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
3 changes: 2 additions & 1 deletion lhotse/recipes/aspire.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@
from pathlib import Path
from typing import Dict, NamedTuple, Optional, Union

from lhotse import fix_manifests, validate_recordings_and_supervisions
from lhotse import validate_recordings_and_supervisions
from lhotse.audio import AudioSource, Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, Seconds

Expand Down
6 changes: 6 additions & 0 deletions lhotse/recipes/atcosim.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import (
Pathlike,
Expand Down Expand Up @@ -245,4 +246,9 @@ def prepare_atcosim(

recordings = RecordingSet.from_jsonl_lazy(recs_writer.path)
supervisions = SupervisionSet.from_jsonl_lazy(sups_writer.path)

logging.warning(
"Manifests are lazily materialized. You may want to call `lhotse.qa.fix_manifests()`"
" to ensure that all supervisions fall within the corresponding recordings."
)
return recordings, supervisions
2 changes: 2 additions & 0 deletions lhotse/recipes/audio_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.serialization import load_json
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, resumable_download
Expand Down Expand Up @@ -132,6 +133,7 @@ def prepare_audio_mnist(
)

supervisions = SupervisionSet.from_segments(supervisions)
recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

if output_dir is not None:
Expand Down
5 changes: 5 additions & 0 deletions lhotse/recipes/bengaliai_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
set_ffmpeg_torchaudio_info_enabled,
)
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests, validate_recordings_and_supervisions
from lhotse.recipes.utils import manifests_exist
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike
Expand Down Expand Up @@ -189,6 +190,10 @@ def prepare_bengaliai_speech(
num_jobs=num_jobs,
)

# Fix manifests
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
supervision_set.to_file(
output_dir / f"bengaliai_speech_supervisions_{part}.jsonl.gz"
Expand Down
2 changes: 2 additions & 0 deletions lhotse/recipes/broadcast_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, check_and_rglob, recursion_limit

Expand Down Expand Up @@ -65,6 +66,7 @@ def prepare_broadcast_news(
chain.from_iterable(sups["segments"] for sups in supervisions_list)
)

recordings, segment_supervisions = fix_manifests(recordings, segment_supervisions)
validate_recordings_and_supervisions(recordings, segment_supervisions)

if output_dir is not None:
Expand Down
21 changes: 21 additions & 0 deletions lhotse/recipes/bvcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
SupervisionSet,
validate_recordings_and_supervisions,
)
from lhotse.qa import fix_manifests
from lhotse.utils import Pathlike


Expand Down Expand Up @@ -76,6 +77,11 @@ def prepare_bvcc(
)
)
main1_dev_recs = main1_recs.filter(lambda rec: rec.id in main1_dev_sup)

# Fix manifests
main1_dev_recs, main1_dev_sup = fix_manifests(main1_dev_recs, main1_dev_sup)
validate_recordings_and_supervisions(main1_dev_recs, main1_dev_sup)

manifests["main1_dev"] = {
"recordings": main1_dev_recs,
"supervisions": main1_dev_sup,
Expand All @@ -90,6 +96,11 @@ def prepare_bvcc(
)
)
main1_train_recs = main1_recs.filter(lambda rec: rec.id in main1_train_sup)

# Fix manifests
main1_train_recs, main1_train_sup = fix_manifests(main1_train_recs, main1_train_sup)
validate_recordings_and_supervisions(main1_train_recs, main1_train_sup)

manifests["main1_train"] = {
"recordings": main1_train_recs,
"supervisions": main1_train_sup,
Expand Down Expand Up @@ -134,6 +145,11 @@ def prepare_bvcc(
)
)
ood1_dev_recs = ood1_recs.filter(lambda rec: rec.id in ood1_dev_sup)

# Fix_manifests
ood1_dev_recs, ood1_dev_sup = fix_manifests(ood1_dev_recs, ood1_dev_sup)
validate_recordings_and_supervisions(ood1_dev_recs, ood1_dev_sup)

manifests["ood1_dev"] = {
"recordings": ood1_dev_recs,
"supervisions": ood1_dev_sup,
Expand All @@ -148,6 +164,11 @@ def prepare_bvcc(
)
)
ood1_train_recs = ood1_recs.filter(lambda rec: rec.id in ood1_train_sup)

# Fix manifests
ood1_train_recs, ood1_train_sup = fix_manifests(ood1_train_recs, ood1_train_sup)
validate_recordings_and_supervisions(ood1_train_recs, ood1_train_sup)

manifests["ood1_train"] = {
"recordings": ood1_train_recs,
"supervisions": ood1_train_sup,
Expand Down
6 changes: 2 additions & 4 deletions lhotse/recipes/cmu_arctic.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
SupervisionSet,
validate_recordings_and_supervisions,
)
from lhotse.qa import remove_missing_recordings_and_supervisions
from lhotse.qa import fix_manifests
from lhotse.utils import Pathlike, resumable_download, safe_extract

BASE_URL = "http://festvox.org/cmu_arctic/packed/"
Expand Down Expand Up @@ -167,9 +167,7 @@ def prepare_cmu_arctic(
supervisions = SupervisionSet.from_segments(supervisions)

# There seem to be 20 recordings missing; remove the before validation
recordings, supervisions = remove_missing_recordings_and_supervisions(
recordings, supervisions
)
recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

if output_dir is not None:
Expand Down
6 changes: 2 additions & 4 deletions lhotse/recipes/cmu_indic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
SupervisionSet,
validate_recordings_and_supervisions,
)
from lhotse.qa import remove_missing_recordings_and_supervisions
from lhotse.qa import fix_manifests
from lhotse.utils import Pathlike, resumable_download, safe_extract

BASE_URL = "http://festvox.org/h2r_indic/"
Expand Down Expand Up @@ -194,9 +194,7 @@ def prepare_cmu_indic(
supervisions = SupervisionSet.from_segments(supervisions)

# There seem to be 20 recordings missing; remove the before validation
recordings, supervisions = remove_missing_recordings_and_supervisions(
recordings, supervisions
)
recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

if output_dir is not None:
Expand Down
18 changes: 10 additions & 8 deletions lhotse/recipes/cmu_kids.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@
Summary of corpus from LDC webpage:
This database is comprised of sentences read aloud by children. It was originally designed
in order to create a training set of children's speech for the SPHINX II automatic speech
This database is comprised of sentences read aloud by children. It was originally designed
in order to create a training set of children's speech for the SPHINX II automatic speech
recognizer for its use in the LISTEN project at Carnegie Mellon University.
The children range in age from six to eleven (see details below) and were in first through
third grades (the 11-year-old was in 6th grade) at the time of recording. There were 24 male
The children range in age from six to eleven (see details below) and were in first through
third grades (the 11-year-old was in 6th grade) at the time of recording. There were 24 male
and 52 female speakers. There are 5,180 utterances in all.
The speakers come from two separate populations:
1. SIM95: They were recorded in the summer of 1995 and were enrolled in either the Chatham
College Summer Camp or the Mount Lebanon Extended Day Summer Fun program in Pittsburgh.
1. SIM95: They were recorded in the summer of 1995 and were enrolled in either the Chatham
College Summer Camp or the Mount Lebanon Extended Day Summer Fun program in Pittsburgh.
They were recorded on-site. There are 44 speakers and 3,333 utterances in this set. They
"good" reading examples.
2. FP: These are examples of errorful reading and dialectic variants. The readers come from
Fort Pitt School in Pittsburgh and were recorded in April 1996. There are 32 speakers and
2. FP: These are examples of errorful reading and dialectic variants. The readers come from
Fort Pitt School in Pittsburgh and were recorded in April 1996. There are 32 speakers and
1,847 utterances in this set.
The user should be aware that the speakers' dialect partly reflects what is locally called "Pittsburghese."
Expand All @@ -36,6 +36,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike

Expand Down Expand Up @@ -129,6 +130,7 @@ def prepare_cmu_kids(
recordings = RecordingSet.from_recordings(recordings)
supervisions = SupervisionSet.from_segments(supervisions)

recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

manifests = {
Expand Down
7 changes: 7 additions & 0 deletions lhotse/recipes/commonvoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
validate_recordings_and_supervisions,
)
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, is_module_available, resumable_download, safe_extract

Expand Down Expand Up @@ -305,6 +306,12 @@ def prepare_commonvoice(
num_jobs=num_jobs,
)

# Fix manifests
recording_set, supervision_set = fix_manifests(
recording_set, supervision_set
)
validate_recordings_and_supervisions(recording_set, supervision_set)

supervision_set.to_file(
output_dir / f"cv-{lang}_supervisions_{part}.jsonl.gz"
)
Expand Down
5 changes: 5 additions & 0 deletions lhotse/recipes/csj.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.recipes.utils import manifests_exist, read_manifests_if_cached
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike
Expand Down Expand Up @@ -889,6 +890,10 @@ def prepare_manifests(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)

recording_set, supervision_set = fix_manifests(
recording_set, supervision_set
)
validate_recordings_and_supervisions(recording_set, supervision_set)

if manifest_dir:
Expand Down
Loading

0 comments on commit 6914818

Please sign in to comment.