Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

ASR HUGGINGFACE_BACKBONES use AutoModel #874

Merged
merged 9 commits into from
Nov 24, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions flash/audio/speech_recognition/backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
SPEECH_RECOGNITION_BACKBONES = FlashRegistry("backbones")

if _AUDIO_AVAILABLE:
from transformers import Wav2Vec2ForCTC
from transformers import AutoModelForCTC, Wav2Vec2ForCTC

WAV2VEC_MODELS = ["facebook/wav2vec2-base-960h", "facebook/wav2vec2-large-960h-lv60"]

Expand All @@ -31,6 +31,6 @@
providers=[_HUGGINGFACE, _FAIRSEQ],
)

HUGGINGFACE_BACKBONES = ExternalRegistry(Wav2Vec2ForCTC.from_pretrained, "backbones", providers=_HUGGINGFACE)
HUGGINGFACE_BACKBONES = ExternalRegistry(AutoModelForCTC.from_pretrained, "backbones", providers=_HUGGINGFACE)

SPEECH_RECOGNITION_BACKBONES += HUGGINGFACE_BACKBONES
2 changes: 1 addition & 1 deletion requirements/datatype_audio.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
torchaudio
librosa>=0.8.1
transformers>=4.5
transformers>=4.11.0
datasets>=1.8