Skip to content

Commit

Permalink
[text] uncomment
Browse files Browse the repository at this point in the history
  • Loading branch information
Mddct committed Nov 27, 2023
1 parent 49994bf commit 301af9e
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
2 changes: 1 addition & 1 deletion wenet/dataset/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from torch.nn.utils.rnn import pad_sequence
from wenet.text.base_tokenizer import BaseTokenizer

# torchaudio.utils.sox_utils.set_buffer_size(16500)
torchaudio.utils.sox_utils.set_buffer_size(16500)

AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])

Expand Down
5 changes: 3 additions & 2 deletions wenet/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
import torch
from torch.nn.utils.rnn import pad_sequence

from whisper.tokenizer import LANGUAGES as WhiserLanguages

WHISPER_LANGS = tuple(WhiserLanguages.keys())
IGNORE_ID = -1


Expand Down Expand Up @@ -173,8 +176,6 @@ def add_whisper_tokens(
ys_out (torch.Tensor) : (B, Lmax + ?)
"""
from whisper.tokenizer import LANGUAGES as WhiserLanguages
WHISPER_LANGS = tuple(WhiserLanguages.keys())
if use_prev:
# i.e., hotword list
_prev = [special_tokens["sot_prev"]]
Expand Down

0 comments on commit 301af9e

Please sign in to comment.