Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fix_manifests in all recipes #1128

Merged
merged 30 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
ce0f5c1
add transform attribute for MixedCut
desh2608 Apr 20, 2023
ab18682
add mix_first option in normalize_loudness
desh2608 Apr 20, 2023
e4bca74
handle the case when mix is called on MixedCut with existing transforms
desh2608 Apr 20, 2023
71a9236
add test for mixing with transformed MixedCut
desh2608 Apr 20, 2023
2e54646
enhancements and bug fixes
desh2608 May 16, 2023
db37a75
small changes in some cutset methods
desh2608 May 16, 2023
7b59ecd
small fix in error message
desh2608 May 16, 2023
a64727a
return word alignments from ami recipe
desh2608 May 17, 2023
850ce2c
add word alignments for ICSI
desh2608 May 18, 2023
4b39c6f
remove unwanted whitespace
desh2608 May 18, 2023
3c16b90
fix IHM preparation
desh2608 May 18, 2023
9921575
remove words with zero or negative duration
desh2608 May 18, 2023
dba413f
ensure word alignments respect segment boundary
desh2608 May 18, 2023
12be424
add save-to-wav option for icsi
desh2608 May 22, 2023
c4b957d
add test for mixing cut with recording
desh2608 May 22, 2023
04ca4aa
Merge branch 'ami_icsi'
desh2608 May 22, 2023
fef3aa3
Merge branch 'cuts'
desh2608 May 22, 2023
0de443e
Merge branch 'mixed_cut_transform'
desh2608 May 22, 2023
80619bb
Merge branch 'master' of https://github.com/lhotse-speech/lhotse
desh2608 Jun 8, 2023
752be69
style fix
desh2608 Jun 8, 2023
5bd483d
Merge branch 'master' of https://github.com/lhotse-speech/lhotse
desh2608 Jun 11, 2023
68f3ffd
Merge branch 'loudness_fix'
desh2608 Jun 11, 2023
2171d7e
add data prep for voxpopuli
desh2608 Jun 12, 2023
df32e5c
Merge branch 'master' of https://github.com/lhotse-speech/lhotse
desh2608 Jun 15, 2023
67c9223
Merge branch 'recipe/voxpopuli'
desh2608 Jun 28, 2023
0e36e3f
fix merge conflicts
desh2608 Aug 11, 2023
399834b
Merge branch 'master' of https://github.com/lhotse-speech/lhotse
desh2608 Aug 23, 2023
71ed2e1
Merge branch 'master' of https://github.com/lhotse-speech/lhotse into…
desh2608 Aug 23, 2023
c4fe1c9
add fix_manifests for all recipes
desh2608 Aug 23, 2023
2d1ff3c
Merge branch 'master' into fix_manifests
pzelasko Aug 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lhotse/recipes/adept.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
SupervisionSet,
validate_recordings_and_supervisions,
)
from lhotse.qa import fix_manifests
from lhotse.utils import Pathlike, resumable_download

ADEPT_URL = "https://zenodo.org/record/5117102/files/ADEPT.zip"
Expand Down Expand Up @@ -140,6 +141,7 @@ def prepare_adept(
)

supervisions = SupervisionSet.from_segments(supervisions)
recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

if output_dir is not None:
Expand Down
2 changes: 2 additions & 0 deletions lhotse/recipes/aidatatang_200zh.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, resumable_download, safe_extract

Expand Down Expand Up @@ -135,6 +136,7 @@ def prepare_aidatatang_200zh(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
2 changes: 2 additions & 0 deletions lhotse/recipes/aishell.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, resumable_download, safe_extract

Expand Down Expand Up @@ -140,6 +141,7 @@ def prepare_aishell(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
4 changes: 3 additions & 1 deletion lhotse/recipes/aishell2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike

Expand Down Expand Up @@ -73,7 +74,7 @@ def text_normalize(line: str) -> str:
IC0975W0451 明年二月底小成
ID0114W0368 我感觉就是在不断拉抽屉
ID0115W0198 我公司员工不存在持有和泰创投股份的情况

"""
new_line = []
line = list(line)
Expand Down Expand Up @@ -161,6 +162,7 @@ def prepare_aishell2(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
3 changes: 2 additions & 1 deletion lhotse/recipes/aishell3.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
validate_recordings_and_supervisions,
)
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.recipes.utils import manifests_exist, read_manifests_if_cached
from lhotse.utils import Pathlike, resumable_download, safe_extract

Expand Down Expand Up @@ -159,7 +160,7 @@ def prepare_aishell3(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)

recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
2 changes: 2 additions & 0 deletions lhotse/recipes/aishell4.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, is_module_available, resumable_download, safe_extract

Expand Down Expand Up @@ -174,6 +175,7 @@ def prepare_aishell4(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
5 changes: 3 additions & 2 deletions lhotse/recipes/ali_meeting.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@

from tqdm import tqdm

from lhotse import fix_manifests, validate_recordings_and_supervisions
from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.recipes.utils import normalize_text_alimeeting
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, is_module_available, resumable_download, safe_extract
Expand Down Expand Up @@ -204,11 +205,11 @@ def prepare_ali_meeting(
)
supervisions.append(segment)

# Fix manifests
recording_set, supervision_set = fix_manifests(
RecordingSet.from_recordings(recordings),
SupervisionSet.from_segments(supervisions),
)
# Fix manifests
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
Expand Down
3 changes: 2 additions & 1 deletion lhotse/recipes/aspire.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@
from pathlib import Path
from typing import Dict, NamedTuple, Optional, Union

from lhotse import fix_manifests, validate_recordings_and_supervisions
from lhotse import validate_recordings_and_supervisions
from lhotse.audio import AudioSource, Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, Seconds

Expand Down
6 changes: 6 additions & 0 deletions lhotse/recipes/atcosim.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import (
Pathlike,
Expand Down Expand Up @@ -245,4 +246,9 @@ def prepare_atcosim(

recordings = RecordingSet.from_jsonl_lazy(recs_writer.path)
supervisions = SupervisionSet.from_jsonl_lazy(sups_writer.path)

logging.warning(
"Manifests are lazily materialized. You may want to call `lhotse.qa.fix_manifests()`"
" to ensure that all supervisions fall within the corresponding recordings."
)
return recordings, supervisions
2 changes: 2 additions & 0 deletions lhotse/recipes/audio_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.serialization import load_json
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, resumable_download
Expand Down Expand Up @@ -132,6 +133,7 @@ def prepare_audio_mnist(
)

supervisions = SupervisionSet.from_segments(supervisions)
recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

if output_dir is not None:
Expand Down
5 changes: 5 additions & 0 deletions lhotse/recipes/bengaliai_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
set_ffmpeg_torchaudio_info_enabled,
)
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests, validate_recordings_and_supervisions
from lhotse.recipes.utils import manifests_exist
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike
Expand Down Expand Up @@ -189,6 +190,10 @@ def prepare_bengaliai_speech(
num_jobs=num_jobs,
)

# Fix manifests
recording_set, supervision_set = fix_manifests(recording_set, supervision_set)
validate_recordings_and_supervisions(recording_set, supervision_set)

if output_dir is not None:
supervision_set.to_file(
output_dir / f"bengaliai_speech_supervisions_{part}.jsonl.gz"
Expand Down
2 changes: 2 additions & 0 deletions lhotse/recipes/broadcast_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, check_and_rglob, recursion_limit

Expand Down Expand Up @@ -65,6 +66,7 @@ def prepare_broadcast_news(
chain.from_iterable(sups["segments"] for sups in supervisions_list)
)

recordings, segment_supervisions = fix_manifests(recordings, segment_supervisions)
validate_recordings_and_supervisions(recordings, segment_supervisions)

if output_dir is not None:
Expand Down
21 changes: 21 additions & 0 deletions lhotse/recipes/bvcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
SupervisionSet,
validate_recordings_and_supervisions,
)
from lhotse.qa import fix_manifests
from lhotse.utils import Pathlike


Expand Down Expand Up @@ -76,6 +77,11 @@ def prepare_bvcc(
)
)
main1_dev_recs = main1_recs.filter(lambda rec: rec.id in main1_dev_sup)

# Fix manifests
main1_dev_recs, main1_dev_sup = fix_manifests(main1_dev_recs, main1_dev_sup)
validate_recordings_and_supervisions(main1_dev_recs, main1_dev_sup)

manifests["main1_dev"] = {
"recordings": main1_dev_recs,
"supervisions": main1_dev_sup,
Expand All @@ -90,6 +96,11 @@ def prepare_bvcc(
)
)
main1_train_recs = main1_recs.filter(lambda rec: rec.id in main1_train_sup)

# Fix manifests
main1_train_recs, main1_train_sup = fix_manifests(main1_train_recs, main1_train_sup)
validate_recordings_and_supervisions(main1_train_recs, main1_train_sup)

manifests["main1_train"] = {
"recordings": main1_train_recs,
"supervisions": main1_train_sup,
Expand Down Expand Up @@ -134,6 +145,11 @@ def prepare_bvcc(
)
)
ood1_dev_recs = ood1_recs.filter(lambda rec: rec.id in ood1_dev_sup)

# Fix_manifests
ood1_dev_recs, ood1_dev_sup = fix_manifests(ood1_dev_recs, ood1_dev_sup)
validate_recordings_and_supervisions(ood1_dev_recs, ood1_dev_sup)

manifests["ood1_dev"] = {
"recordings": ood1_dev_recs,
"supervisions": ood1_dev_sup,
Expand All @@ -148,6 +164,11 @@ def prepare_bvcc(
)
)
ood1_train_recs = ood1_recs.filter(lambda rec: rec.id in ood1_train_sup)

# Fix manifests
ood1_train_recs, ood1_train_sup = fix_manifests(ood1_train_recs, ood1_train_sup)
validate_recordings_and_supervisions(ood1_train_recs, ood1_train_sup)

manifests["ood1_train"] = {
"recordings": ood1_train_recs,
"supervisions": ood1_train_sup,
Expand Down
6 changes: 2 additions & 4 deletions lhotse/recipes/cmu_arctic.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
SupervisionSet,
validate_recordings_and_supervisions,
)
from lhotse.qa import remove_missing_recordings_and_supervisions
from lhotse.qa import fix_manifests
from lhotse.utils import Pathlike, resumable_download, safe_extract

BASE_URL = "http://festvox.org/cmu_arctic/packed/"
Expand Down Expand Up @@ -167,9 +167,7 @@ def prepare_cmu_arctic(
supervisions = SupervisionSet.from_segments(supervisions)

# There seem to be 20 recordings missing; remove the before validation
recordings, supervisions = remove_missing_recordings_and_supervisions(
recordings, supervisions
)
recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

if output_dir is not None:
Expand Down
6 changes: 2 additions & 4 deletions lhotse/recipes/cmu_indic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
SupervisionSet,
validate_recordings_and_supervisions,
)
from lhotse.qa import remove_missing_recordings_and_supervisions
from lhotse.qa import fix_manifests
from lhotse.utils import Pathlike, resumable_download, safe_extract

BASE_URL = "http://festvox.org/h2r_indic/"
Expand Down Expand Up @@ -194,9 +194,7 @@ def prepare_cmu_indic(
supervisions = SupervisionSet.from_segments(supervisions)

# There seem to be 20 recordings missing; remove the before validation
recordings, supervisions = remove_missing_recordings_and_supervisions(
recordings, supervisions
)
recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

if output_dir is not None:
Expand Down
18 changes: 10 additions & 8 deletions lhotse/recipes/cmu_kids.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@

Summary of corpus from LDC webpage:

This database is comprised of sentences read aloud by children. It was originally designed
in order to create a training set of children's speech for the SPHINX II automatic speech
This database is comprised of sentences read aloud by children. It was originally designed
in order to create a training set of children's speech for the SPHINX II automatic speech
recognizer for its use in the LISTEN project at Carnegie Mellon University.

The children range in age from six to eleven (see details below) and were in first through
third grades (the 11-year-old was in 6th grade) at the time of recording. There were 24 male
The children range in age from six to eleven (see details below) and were in first through
third grades (the 11-year-old was in 6th grade) at the time of recording. There were 24 male
and 52 female speakers. There are 5,180 utterances in all.

The speakers come from two separate populations:

1. SIM95: They were recorded in the summer of 1995 and were enrolled in either the Chatham
College Summer Camp or the Mount Lebanon Extended Day Summer Fun program in Pittsburgh.
1. SIM95: They were recorded in the summer of 1995 and were enrolled in either the Chatham
College Summer Camp or the Mount Lebanon Extended Day Summer Fun program in Pittsburgh.
They were recorded on-site. There are 44 speakers and 3,333 utterances in this set. They
"good" reading examples.
2. FP: These are examples of errorful reading and dialectic variants. The readers come from
Fort Pitt School in Pittsburgh and were recorded in April 1996. There are 32 speakers and
2. FP: These are examples of errorful reading and dialectic variants. The readers come from
Fort Pitt School in Pittsburgh and were recorded in April 1996. There are 32 speakers and
1,847 utterances in this set.

The user should be aware that the speakers' dialect partly reflects what is locally called "Pittsburghese."
Expand All @@ -36,6 +36,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike

Expand Down Expand Up @@ -129,6 +130,7 @@ def prepare_cmu_kids(
recordings = RecordingSet.from_recordings(recordings)
supervisions = SupervisionSet.from_segments(supervisions)

recordings, supervisions = fix_manifests(recordings, supervisions)
validate_recordings_and_supervisions(recordings, supervisions)

manifests = {
Expand Down
7 changes: 7 additions & 0 deletions lhotse/recipes/commonvoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
validate_recordings_and_supervisions,
)
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike, is_module_available, resumable_download, safe_extract

Expand Down Expand Up @@ -305,6 +306,12 @@ def prepare_commonvoice(
num_jobs=num_jobs,
)

# Fix manifests
recording_set, supervision_set = fix_manifests(
recording_set, supervision_set
)
validate_recordings_and_supervisions(recording_set, supervision_set)

supervision_set.to_file(
output_dir / f"cv-{lang}_supervisions_{part}.jsonl.gz"
)
Expand Down
5 changes: 5 additions & 0 deletions lhotse/recipes/csj.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@

from lhotse import validate_recordings_and_supervisions
from lhotse.audio import Recording, RecordingSet
from lhotse.qa import fix_manifests
from lhotse.recipes.utils import manifests_exist, read_manifests_if_cached
from lhotse.supervision import SupervisionSegment, SupervisionSet
from lhotse.utils import Pathlike
Expand Down Expand Up @@ -889,6 +890,10 @@ def prepare_manifests(

recording_set = RecordingSet.from_recordings(recordings)
supervision_set = SupervisionSet.from_segments(supervisions)

recording_set, supervision_set = fix_manifests(
recording_set, supervision_set
)
validate_recordings_and_supervisions(recording_set, supervision_set)

if manifest_dir:
Expand Down
Loading