Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions docs/source/en/internal/generation_utils.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,6 @@ generation.
[[autodoc]] ForcedEOSTokenLogitsProcessor
- __call__

[[autodoc]] ForceTokensLogitsProcessor
- __call__

[[autodoc]] HammingDiversityLogitsProcessor
- __call__
Expand Down
3 changes: 0 additions & 3 deletions docs/source/ja/internal/generation_utils.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,6 @@ generation_output[:2]
[[autodoc]] ForcedEOSTokenLogitsProcessor
- __call__

[[autodoc]] ForceTokensLogitsProcessor
- __call__

[[autodoc]] HammingDiversityLogitsProcessor
- __call__

Expand Down
3 changes: 0 additions & 3 deletions docs/source/zh/internal/generation_utils.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,6 @@ generation_output[:2]
[[autodoc]] ForcedEOSTokenLogitsProcessor
- __call__

[[autodoc]] ForceTokensLogitsProcessor
- __call__

[[autodoc]] HammingDiversityLogitsProcessor
- __call__

Expand Down
2 changes: 0 additions & 2 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1401,7 +1401,6 @@
"ExponentialDecayLengthPenalty",
"ForcedBOSTokenLogitsProcessor",
"ForcedEOSTokenLogitsProcessor",
"ForceTokensLogitsProcessor",
"GenerationMixin",
"HammingDiversityLogitsProcessor",
"InfNanRemoveLogitsProcessor",
Expand Down Expand Up @@ -6264,7 +6263,6 @@
ExponentialDecayLengthPenalty,
ForcedBOSTokenLogitsProcessor,
ForcedEOSTokenLogitsProcessor,
ForceTokensLogitsProcessor,
GenerationMixin,
HammingDiversityLogitsProcessor,
InfNanRemoveLogitsProcessor,
Expand Down
144 changes: 0 additions & 144 deletions src/transformers/audio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,147 +679,3 @@ def amplitude_to_db(

return spectrogram


### deprecated functions below this line ###


def get_mel_filter_banks(
nb_frequency_bins: int,
nb_mel_filters: int,
frequency_min: float,
frequency_max: float,
sample_rate: int,
norm: Optional[str] = None,
mel_scale: str = "htk",
) -> np.array:
warnings.warn(
"The function `get_mel_filter_banks` is deprecated and will be removed in version 4.31.0 of Transformers",
FutureWarning,
)
return mel_filter_bank(
num_frequency_bins=nb_frequency_bins,
num_mel_filters=nb_mel_filters,
min_frequency=frequency_min,
max_frequency=frequency_max,
sampling_rate=sample_rate,
norm=norm,
mel_scale=mel_scale,
)


def fram_wave(waveform: np.array, hop_length: int = 160, fft_window_size: int = 400, center: bool = True):
"""
In order to compute the short time fourier transform, the waveform needs to be split in overlapping windowed
segments called `frames`.

The window length (window_length) defines how much of the signal is contained in each frame, while the hop length
defines the step between the beginning of each new frame.


Args:
waveform (`np.array` of shape `(sample_length,)`):
The raw waveform which will be split into smaller chunks.
hop_length (`int`, *optional*, defaults to 160):
Step between each window of the waveform.
fft_window_size (`int`, *optional*, defaults to 400):
Defines the size of the window.
center (`bool`, defaults to `True`):
Whether or not to center each frame around the middle of the frame. Centering is done by reflecting the
waveform on the left and on the right.

Return:
framed_waveform (`np.array` of shape `(waveform.shape // hop_length , fft_window_size)`):
The framed waveforms that can be fed to `np.fft`.
"""
warnings.warn(
"The function `fram_wave` is deprecated and will be removed in version 4.31.0 of Transformers",
FutureWarning,
)
frames = []
for i in range(0, waveform.shape[0] + 1, hop_length):
if center:
half_window = (fft_window_size - 1) // 2 + 1
start = i - half_window if i > half_window else 0
end = i + half_window if i < waveform.shape[0] - half_window else waveform.shape[0]
frame = waveform[start:end]
if start == 0:
padd_width = (-i + half_window, 0)
frame = np.pad(frame, pad_width=padd_width, mode="reflect")

elif end == waveform.shape[0]:
padd_width = (0, (i - waveform.shape[0] + half_window))
frame = np.pad(frame, pad_width=padd_width, mode="reflect")

else:
frame = waveform[i : i + fft_window_size]
frame_width = frame.shape[0]
if frame_width < waveform.shape[0]:
frame = np.lib.pad(
frame, pad_width=(0, fft_window_size - frame_width), mode="constant", constant_values=0
)
frames.append(frame)

frames = np.stack(frames, 0)
return frames


def stft(frames: np.array, windowing_function: np.array, fft_window_size: int = None):
"""
Calculates the complex Short-Time Fourier Transform (STFT) of the given framed signal. Should give the same results
as `torch.stft`.

Args:
frames (`np.array` of dimension `(num_frames, fft_window_size)`):
A framed audio signal obtained using `audio_utils.fram_wav`.
windowing_function (`np.array` of dimension `(nb_frequency_bins, nb_mel_filters)`:
A array reprensenting the function that will be used to reduces the amplitude of the discontinuities at the
boundaries of each frame when computing the STFT. Each frame will be multiplied by the windowing_function.
For more information on the discontinuities, called *Spectral leakage*, refer to [this
tutorial]https://download.ni.com/evaluation/pxi/Understanding%20FFTs%20and%20Windowing.pdf
fft_window_size (`int`, *optional*):
Size of the window om which the Fourier transform is applied. This controls the frequency resolution of the
spectrogram. 400 means that the fourrier transform is computed on windows of 400 samples. The number of
frequency bins (`nb_frequency_bins`) used to divide the window into equal strips is equal to
`(1+fft_window_size)//2`. An increase of the fft_window_size slows the calculus time proportionnally.

Example:

```python
>>> from transformers.audio_utils import stft, fram_wave
>>> import numpy as np

>>> audio = np.random.rand(50)
>>> fft_window_size = 10
>>> hop_length = 2
>>> framed_audio = fram_wave(audio, hop_length, fft_window_size)
>>> spectrogram = stft(framed_audio, np.hanning(fft_window_size + 1))
```

Returns:
spectrogram (`np.ndarray`):
A spectrogram of shape `(num_frames, nb_frequency_bins)` obtained using the STFT algorithm
"""
warnings.warn(
"The function `stft` is deprecated and will be removed in version 4.31.0 of Transformers",
FutureWarning,
)
frame_size = frames.shape[1]

if fft_window_size is None:
fft_window_size = frame_size

if fft_window_size < frame_size:
raise ValueError("FFT size must greater or equal the frame size")
# number of FFT bins to store
nb_frequency_bins = (fft_window_size >> 1) + 1

spectrogram = np.empty((len(frames), nb_frequency_bins), dtype=np.complex64)
fft_signal = np.zeros(fft_window_size)

for f, frame in enumerate(frames):
if windowing_function is not None:
np.multiply(frame, windowing_function, out=fft_signal[:frame_size])
else:
fft_signal[:frame_size] = frame
spectrogram[f] = np.fft.fft(fft_signal, axis=0)[:nb_frequency_bins]
return spectrogram.T
40 changes: 0 additions & 40 deletions src/transformers/deepspeed.py

This file was deleted.

1 change: 0 additions & 1 deletion src/transformers/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@
is_timm_available,
is_tokenizers_available,
is_torch_available,
is_torch_bf16_available,
is_torch_cuda_available,
is_torch_fx_available,
is_torch_fx_proxy,
Expand Down
2 changes: 0 additions & 2 deletions src/transformers/generation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
"ExponentialDecayLengthPenalty",
"ForcedBOSTokenLogitsProcessor",
"ForcedEOSTokenLogitsProcessor",
"ForceTokensLogitsProcessor",
"HammingDiversityLogitsProcessor",
"InfNanRemoveLogitsProcessor",
"LogitNormalization",
Expand Down Expand Up @@ -192,7 +191,6 @@
ExponentialDecayLengthPenalty,
ForcedBOSTokenLogitsProcessor,
ForcedEOSTokenLogitsProcessor,
ForceTokensLogitsProcessor,
HammingDiversityLogitsProcessor,
InfNanRemoveLogitsProcessor,
LogitNormalization,
Expand Down
56 changes: 0 additions & 56 deletions src/transformers/generation/logits_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1708,62 +1708,6 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to
return scores


class ForceTokensLogitsProcessor(LogitsProcessor):
r"""
This processor takes a list of pairs of integers which indicates a mapping from generation indices to token
indices that will be forced before generation. The processor will set their log probs to `inf` so that they are
sampled at their corresponding index. Originally created for
[Whisper](https://huggingface.co/docs/transformers/model_doc/whisper).

Examples:
```python
>>> from transformers import AutoProcessor, WhisperForConditionalGeneration
>>> from datasets import load_dataset

>>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt")

>>> # This Whisper model forces the generation to start with `50362` at the first position by default, i.e.
>>> # `"forced_decoder_ids": [[1, 50362]]`. This means all other tokens are masked out.
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True)
>>> print(
... all(outputs.scores[0][0, i] == float("-inf") for i in range(processor.tokenizer.vocab_size) if i != 50362)
... )
True
>>> print(outputs.scores[0][0, 50362])
tensor(0.)

>>> # If we disable `forced_decoder_ids`, we stop seeing that effect
>>> outputs = model.generate(**inputs, return_dict_in_generate=True, output_scores=True, forced_decoder_ids=None)
>>> print(
... all(outputs.scores[0][0, i] == float("-inf") for i in range(processor.tokenizer.vocab_size) if i != 50362)
... )
False
>>> print(outputs.scores[0][0, 50362])
tensor(19.3140)
```
"""

def __init__(self, force_token_map: List[List[int]], _has_warned: Optional[bool] = False):
self.force_token_map = dict(force_token_map)
if not _has_warned:
# TODO(Sanchit): remove this processor entirely in v4.40
warnings.warn(
"This `ForceTokensLogitsProcessor` has been deprecated and will be removed in v4.40. Should you need to provide prompt ids for generation, specify `input_ids` to the generate method for decoder-only models, or `decoder_input_ids` for encoder-decoder models.",
FutureWarning,
)

@add_start_docstrings(LOGITS_PROCESSOR_INPUTS_DOCSTRING)
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
generation_idx = input_ids.shape[-1]
current_token = self.force_token_map.get(generation_idx, None)
if current_token is not None:
scores[:, :] = -float("inf")
scores[:, current_token] = 0
return scores


class WhisperTimeStampLogitsProcessor(LogitsProcessor):
r"""
Expand Down
30 changes: 0 additions & 30 deletions src/transformers/generation/stopping_criteria.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,36 +76,6 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa
return torch.full((input_ids.shape[0],), is_done, device=input_ids.device, dtype=torch.bool)


class MaxNewTokensCriteria(StoppingCriteria):
"""
This class can be used to stop generation whenever the generated number of tokens exceeds `max_new_tokens`. Keep in
mind for decoder-only type of transformers, this will **not** include the initial prompted tokens. This is very
close to `MaxLengthCriteria` but ignores the number of initial tokens.

Args:
start_length (`int`):
The number of initial tokens.
max_new_tokens (`int`):
The maximum number of tokens to generate.
"""

def __init__(self, start_length: int, max_new_tokens: int):
warnings.warn(
"The class `MaxNewTokensCriteria` is deprecated. "
f"Please use `MaxLengthCriteria(max_length={start_length + max_new_tokens})` "
"with `max_length = start_length + max_new_tokens` instead.",
FutureWarning,
)
self.start_length = start_length
self.max_new_tokens = max_new_tokens
self.max_length = start_length + max_new_tokens

@add_start_docstrings(STOPPING_CRITERIA_INPUTS_DOCSTRING)
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> torch.BoolTensor:
is_done = input_ids.shape[-1] >= self.max_length
return torch.full((input_ids.shape[0],), is_done, device=input_ids.device, dtype=torch.bool)


class MaxTimeCriteria(StoppingCriteria):
"""
This class can be used to stop generation whenever the full generation exceeds some amount of time. By default, the
Expand Down
8 changes: 0 additions & 8 deletions src/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
ExponentialDecayLengthPenalty,
ForcedBOSTokenLogitsProcessor,
ForcedEOSTokenLogitsProcessor,
ForceTokensLogitsProcessor,
HammingDiversityLogitsProcessor,
InfNanRemoveLogitsProcessor,
LogitNormalization,
Expand Down Expand Up @@ -864,13 +863,6 @@ def _get_logits_processor(
processors.append(
SuppressTokensAtBeginLogitsProcessor(generation_config.begin_suppress_tokens, begin_index)
)
if generation_config.forced_decoder_ids is not None:
# TODO(Sanchit): deprecate in v4.40 by removing this logic
warnings.warn(
"You have explicitly specified `forced_decoder_ids`. This functionality has been deprecated and will throw an error in v4.40. Please remove the `forced_decoder_ids` argument in favour of `input_ids` or `decoder_input_ids` respectively.",
FutureWarning,
)
processors.append(ForceTokensLogitsProcessor(generation_config.forced_decoder_ids, _has_warned=True))
processors = self._merge_criteria_processor_list(processors, logits_processor)
# `LogitNormalization` should always be the last logit processor, when present
if generation_config.renormalize_logits is True:
Expand Down
Loading