Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMI beamformed mic option #1048

Merged
merged 2 commits into from
May 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions lhotse/bin/modes/recipes/ami.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
)
@click.option(
"--mic",
type=click.Choice(["ihm", "ihm-mix", "sdm", "mdm"], case_sensitive=False),
type=click.Choice(
["ihm", "ihm-mix", "sdm", "mdm", "mdm8-bf"], case_sensitive=False
),
default="ihm",
help="AMI microphone setting.",
)
Expand Down Expand Up @@ -82,7 +84,9 @@ def ami(
)
@click.option(
"--mic",
type=click.Choice(["ihm", "ihm-mix", "sdm", "mdm"], case_sensitive=False),
type=click.Choice(
["ihm", "ihm-mix", "sdm", "mdm", "mdm8-bf"], case_sensitive=False
),
default="ihm",
help="AMI microphone setting.",
)
Expand Down
53 changes: 34 additions & 19 deletions lhotse/recipes/ami.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@
}
}

MICS = ['ihm','ihm-mix','sdm','mdm']
MICS = ['ihm','ihm-mix','sdm','mdm','mdm8-bf']
MDM_ARRAYS = ['Array1','Array2']
MDM_CHANNELS = ['01','02','03','04','05','06','07','08']
# fmt: on
Expand All @@ -176,12 +176,11 @@ def download_audio(
wav_dir = target_dir / "wav_db" / item / "audio"
wav_dir.mkdir(parents=True, exist_ok=True)
wav_path = wav_dir / wav_name
if force_download or not wav_path.is_file():
resumable_download(
wav_url,
filename=wav_path,
desc=f"Downloading {wav_name}",
)
resumable_download(
wav_url,
filename=wav_path,
force_download=force_download,
)
elif mic == "ihm-mix":
wav_name = f"{item}.Mix-Headset.wav"
wav_url = f"{url}/AMICorpusMirror/amicorpus/{item}/audio/{wav_name}"
Expand Down Expand Up @@ -211,6 +210,15 @@ def download_audio(
resumable_download(
wav_url, filename=wav_path, force_download=force_download
)
elif mic == "mdm8-bf":
wav_name = f"{item}_MDM8.wav"
wav_url = f"{url}/AMICorpusMirror/amicorpus/beamformed/{item}/{wav_name}"
wav_dir = target_dir / "wav_db" / item / "audio"
wav_dir.mkdir(parents=True, exist_ok=True)
wav_path = wav_dir / wav_name
resumable_download(
wav_url, filename=wav_path, force_download=force_download
)


def download_ami(
Expand All @@ -233,7 +241,7 @@ def download_ami(
:param annotations: Pathlike (default = None), path to save annotations zip file
:param force_download: bool (default = False), if True, download even if file is present.
:param url: str (default = 'http://groups.inf.ed.ac.uk/ami'), AMI download URL.
:param mic: str {'ihm','ihm-mix','sdm','mdm'}, type of mic setting.
:param mic: str {'ihm','ihm-mix','sdm','mdm','mdm8-bf'}, type of mic setting.
:return: the path to downloaded and extracted directory with data.
"""
target_dir = Path(target_dir)
Expand Down Expand Up @@ -468,17 +476,20 @@ def prepare_audio_grouped(
return RecordingSet.from_recordings(recordings)


# SDM and IHM-Mix settings do not require any grouping
# SDM, IHM-Mix, and mdm8-bf settings do not require any grouping


def prepare_audio_single(
audio_paths: List[Pathlike],
mic: Optional[str] = "ihm-mix",
) -> RecordingSet:
import soundfile as sf

recordings = []
for audio_path in tqdm(audio_paths, desc="Processing audio files"):
session_name = audio_path.parts[-3]
session_name = (
audio_path.parts[-3] if mic != "mdm8-bf" else audio_path.parts[-2]
)
audio_sf = sf.SoundFile(str(audio_path))
recordings.append(
Recording(
Expand Down Expand Up @@ -563,7 +574,7 @@ def prepare_supervision_other(
segments = []
for recording in tqdm(audio, desc="Preparing supervisions"):
annotation = annotation_by_id.get(recording.id)
# In these mic settings, all sources (1 for ihm-mix and sdm and 16 for mdm)
# In these mic settings, all sources (1 for ihm-mix, sdm, and mdm8-bf and 16 for mdm)
# will share supervision.
if annotation is None:
logging.warning(f"No annotation found for recording {recording.id}")
Expand Down Expand Up @@ -609,7 +620,7 @@ def prepare_ami(
:param data_dir: Pathlike, the path of the data dir.
:param annotations: Pathlike, the path of the annotations dir or zip file.
:param output_dir: Pathlike, the path where to write the manifests.
:param mic: str {'ihm','ihm-mix','sdm','mdm'}, type of mic to use.
:param mic: str {'ihm','ihm-mix','sdm','mdm','mdm8-bf'}, type of mic to use.
:param partition: str {'full-corpus','full-corpus-asr','scenario-only'}, AMI official data split
:param normalize_text: str {'none', 'upper', 'kaldi'} normalization of text
:param max_words_per_segment: int, maximum number of words per segment. If not None, we will split
Expand Down Expand Up @@ -643,6 +654,9 @@ def prepare_ami(
"No annotations directory specified and no zip file found in"
f" {data_dir}"
)
else:
annotations_dir = Path(annotations_dir)

# Prepare annotations which is a list of segment-level transcriptions
annotations = parse_ami_annotations(
annotations_dir,
Expand All @@ -661,13 +675,14 @@ def prepare_ami(
else wav_dir.rglob("*Array?-0?.wav")
)
audio = prepare_audio_grouped(list(audio_paths))
elif mic in ["ihm-mix", "sdm"]:
audio_paths = (
wav_dir.rglob("*Mix-Headset.wav")
if mic == "ihm-mix"
else wav_dir.rglob("*Array1-01.wav")
)
audio = prepare_audio_single(list(audio_paths))
elif mic in ["ihm-mix", "sdm", "mdm8-bf"]:
if mic == "ihm-mix":
audio_paths = wav_dir.rglob("*Mix-Headset.wav")
elif mic == "sdm":
audio_paths = wav_dir.rglob("*Array1-01.wav")
elif mic == "mdm8-bf":
audio_paths = wav_dir.rglob("*MDM8.wav")
audio = prepare_audio_single(list(audio_paths), mic)

# Supervisions
logging.info("Preparing supervision manifests")
Expand Down