Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion requirements/test/cuda.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ vocos # required for minicpmo_26 test
peft>=0.18.1 # required for phi-4-mm test
pqdm
ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
resampy # required for audio tests
sentence-transformers>=5.2.0 # required for embedding tests
soundfile # required for audio tests
jiwer # required for audio tests
Expand Down
4 changes: 0 additions & 4 deletions requirements/test/cuda.txt
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,6 @@ numba==0.61.2
# -c requirements/cuda.txt
# -r requirements/test/cuda.in
# librosa
# resampy
numpy==2.2.6
# via
# -r requirements/test/cuda.in
Expand Down Expand Up @@ -596,7 +595,6 @@ numpy==2.2.6
# pyogrio
# pywavelets
# rasterio
# resampy
# rioxarray
# rouge-score
# runai-model-streamer
Expand Down Expand Up @@ -1015,8 +1013,6 @@ requests==2.32.3
# tacoreader
# tiktoken
# wandb
resampy==0.4.3
# via -r requirements/test/cuda.in
responses==0.25.3
# via genai-perf
rfc3339-validator==0.1.4
Expand Down
1 change: 0 additions & 1 deletion requirements/test/rocm.in
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ vocos # required for minicpmo_26 test
peft>=0.15.0 # required for phi-4-mm test
pqdm
ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests
resampy # required for audio tests
sentence-transformers>=5.2.0 # required for embedding tests
soundfile # required for audio tests
jiwer # required for audio tests
Expand Down
4 changes: 0 additions & 4 deletions requirements/test/rocm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,6 @@ numba==0.61.2
# -c requirements/rocm.txt
# -r requirements/test/rocm.in
# librosa
# resampy
numkong==7.1.1
# via albucore
numpy==2.2.6
Expand Down Expand Up @@ -708,7 +707,6 @@ numpy==2.2.6
# pytrec-eval-terrier
# pywavelets
# rasterio
# resampy
# rioxarray
# rouge-score
# runai-model-streamer
Expand Down Expand Up @@ -1193,8 +1191,6 @@ requests==2.32.5
# tacoreader
# tiktoken
# wandb
resampy==0.4.3
# via -r requirements/test/rocm.in
responses==0.26.0
# via genai-perf
rfc3339-validator==0.1.4
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1093,7 +1093,6 @@ def _read_requirements(filename: str) -> list[str]:
"runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"],
"audio": [
"av",
"resampy",
"scipy",
"soundfile",
"mistral_common[audio]",
Expand Down
20 changes: 1 addition & 19 deletions vllm/multimodal/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@
except ImportError:
av = PlaceholderModule("av") # type: ignore[assignment]

try:
import resampy
except ImportError:
resampy = PlaceholderModule("resampy") # type: ignore[assignment]

try:
import scipy.signal as scipy_signal
except ImportError:
Expand Down Expand Up @@ -229,15 +224,6 @@ def resample_audio_pyav(
return result[:expected_len]


def resample_audio_resampy(
audio: npt.NDArray[np.floating],
*,
orig_sr: float,
target_sr: float,
) -> npt.NDArray[np.floating]:
return resampy.resample(audio, sr_orig=orig_sr, sr_new=target_sr)


def resample_audio_scipy(
audio: npt.NDArray[np.floating],
*,
Expand All @@ -257,7 +243,7 @@ class AudioResampler:
def __init__(
self,
target_sr: float | None = None,
method: Literal["pyav", "resampy", "scipy"] = "resampy",
method: Literal["pyav", "scipy"] = "pyav",
):
self.target_sr = target_sr
self.method = method
Expand All @@ -281,10 +267,6 @@ def resample(
return audio
if self.method == "pyav":
return resample_audio_pyav(audio, orig_sr=orig_sr, target_sr=self.target_sr)
if self.method == "resampy":
return resample_audio_resampy(
audio, orig_sr=orig_sr, target_sr=self.target_sr
)
elif self.method == "scipy":
return resample_audio_scipy(
audio, orig_sr=orig_sr, target_sr=self.target_sr
Expand Down
9 changes: 2 additions & 7 deletions vllm/multimodal/media/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import torch

from vllm.logger import init_logger
from vllm.multimodal.audio import resample_audio_pyav
from vllm.utils.import_utils import PlaceholderModule
from vllm.utils.serial_utils import tensor2base64

Expand All @@ -28,12 +29,6 @@
soundfile = PlaceholderModule("soundfile") # type: ignore[assignment]


try:
import resampy
except ImportError:
resampy = PlaceholderModule("resampy") # type: ignore[assignment]


# Public libsndfile error codes exposed via `soundfile.LibsndfileError.code`, soundfile
# being librosa's main backend. Used to validate if an audio loading error is due to a
# server error vs a client error (invalid audio file).
Expand Down Expand Up @@ -129,7 +124,7 @@ def load_audio_soundfile(
y = np.mean(y, axis=tuple(range(y.ndim - 1)))

if sr is not None and sr != native_sr:
y = resampy.resample(y, sr_orig=native_sr, sr_new=sr)
y = resample_audio_pyav(y, orig_sr=native_sr, target_sr=sr)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

In load_audio_soundfile, the resampling logic now hardcodes resample_audio_pyav. If the av package is not installed (e.g., if the user didn't install vllm[audio]), this will raise an error from the PlaceholderModule when resampling is required. It might be safer to use the AudioResampler class or provide a fallback to scipy if av is unavailable, especially since load_audio is a general utility.

return y, int(sr)
return y, native_sr

Expand Down
Loading