Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

ASR HUGGINGFACE_BACKBONES use AutoModel #874

Merged
merged 9 commits into from
Nov 24, 2021
Next Next commit
ASR HUGGINGFACE_BACKBONES use AutoModel
flozi00 authored Oct 15, 2021
commit 48d5900eaaa9699c194928b28529302f599922da
4 changes: 2 additions & 2 deletions flash/audio/speech_recognition/backbone.py
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@
SPEECH_RECOGNITION_BACKBONES = FlashRegistry("backbones")

if _AUDIO_AVAILABLE:
from transformers import Wav2Vec2ForCTC
from transformers import AutoModelForCTC, Wav2Vec2ForCTC

WAV2VEC_MODELS = ["facebook/wav2vec2-base-960h", "facebook/wav2vec2-large-960h-lv60"]

@@ -31,6 +31,6 @@
providers=[_HUGGINGFACE, _FAIRSEQ],
)

HUGGINGFACE_BACKBONES = ExternalRegistry(Wav2Vec2ForCTC.from_pretrained, "backbones", providers=_HUGGINGFACE)
HUGGINGFACE_BACKBONES = ExternalRegistry(AutoModelForCTC.from_pretrained, "backbones", providers=_HUGGINGFACE)

SPEECH_RECOGNITION_BACKBONES += HUGGINGFACE_BACKBONES