From e9441297f7238d30a1435ecde277011a662f83e0 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Sat, 11 Apr 2026 02:10:27 +0800 Subject: [PATCH 1/2] remove resampy deps Signed-off-by: Isotr0py --- requirements/rocm-test.in | 1 - requirements/rocm-test.txt | 4 ---- requirements/test.in | 1 - requirements/test.txt | 4 ---- setup.py | 1 - vllm/multimodal/audio.py | 20 +------------------- vllm/multimodal/media/audio.py | 9 ++------- 7 files changed, 3 insertions(+), 37 deletions(-) diff --git a/requirements/rocm-test.in b/requirements/rocm-test.in index 23c3a0f91e0c..f936d32fc09b 100644 --- a/requirements/rocm-test.in +++ b/requirements/rocm-test.in @@ -23,7 +23,6 @@ vocos # required for minicpmo_26 test peft>=0.15.0 # required for phi-4-mm test pqdm ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests -resampy # required for audio tests sentence-transformers>=5.2.0 # required for embedding tests soundfile # required for audio tests jiwer # required for audio tests diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index 36484fd20cb1..0bb0f7e858ce 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -663,7 +663,6 @@ numba==0.61.2 # -c requirements/rocm.txt # -r requirements/rocm-test.in # librosa - # resampy numkong==7.1.1 # via albucore numpy==2.2.6 @@ -709,7 +708,6 @@ numpy==2.2.6 # pytrec-eval-terrier # pywavelets # rasterio - # resampy # rioxarray # rouge-score # runai-model-streamer @@ -1196,8 +1194,6 @@ requests==2.32.5 # tiktoken # transformers # wandb -resampy==0.4.3 - # via -r requirements/rocm-test.in responses==0.26.0 # via genai-perf rfc3339-validator==0.1.4 diff --git a/requirements/test.in b/requirements/test.in index 378ecf94222e..19b9674f161e 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -21,7 +21,6 @@ vocos # required for minicpmo_26 test peft>=0.15.0 # required for phi-4-mm test pqdm ray[cgraph,default]>=2.48.0 # Ray Compiled Graph, required by pipeline parallelism tests -resampy # required for audio tests sentence-transformers>=5.2.0 # required for embedding tests soundfile # required for audio tests jiwer # required for audio tests diff --git a/requirements/test.txt b/requirements/test.txt index 5675a2a829a1..bb60b273341c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -552,7 +552,6 @@ numba==0.61.2 # via # -r requirements/test.in # librosa - # resampy numpy==2.2.6 # via # -r requirements/test.in @@ -593,7 +592,6 @@ numpy==2.2.6 # pyogrio # pywavelets # rasterio - # resampy # rioxarray # rouge-score # runai-model-streamer @@ -1014,8 +1012,6 @@ requests==2.32.3 # tiktoken # transformers # wandb -resampy==0.4.3 - # via -r requirements/test.in responses==0.25.3 # via genai-perf rfc3339-validator==0.1.4 diff --git a/setup.py b/setup.py index 1748781985f2..65b07de5f37c 100644 --- a/setup.py +++ b/setup.py @@ -1083,7 +1083,6 @@ def _read_requirements(filename: str) -> list[str]: "runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"], "audio": [ "av", - "resampy", "scipy", "soundfile", "mistral_common[audio]", diff --git a/vllm/multimodal/audio.py b/vllm/multimodal/audio.py index 0a748a6d15c6..c232bb9ea6a5 100644 --- a/vllm/multimodal/audio.py +++ b/vllm/multimodal/audio.py @@ -16,11 +16,6 @@ except ImportError: av = PlaceholderModule("av") # type: ignore[assignment] -try: - import resampy -except ImportError: - resampy = PlaceholderModule("resampy") # type: ignore[assignment] - try: import scipy.signal as scipy_signal except ImportError: @@ -229,15 +224,6 @@ def resample_audio_pyav( return result[:expected_len] -def resample_audio_resampy( - audio: npt.NDArray[np.floating], - *, - orig_sr: float, - target_sr: float, -) -> npt.NDArray[np.floating]: - return resampy.resample(audio, sr_orig=orig_sr, sr_new=target_sr) - - def resample_audio_scipy( audio: npt.NDArray[np.floating], *, @@ -257,7 +243,7 @@ class AudioResampler: def __init__( self, target_sr: float | None = None, - method: Literal["pyav", "resampy", "scipy"] = "resampy", + method: Literal["pyav", "scipy"] = "pyav", ): self.target_sr = target_sr self.method = method @@ -281,10 +267,6 @@ def resample( return audio if self.method == "pyav": return resample_audio_pyav(audio, orig_sr=orig_sr, target_sr=self.target_sr) - if self.method == "resampy": - return resample_audio_resampy( - audio, orig_sr=orig_sr, target_sr=self.target_sr - ) elif self.method == "scipy": return resample_audio_scipy( audio, orig_sr=orig_sr, target_sr=self.target_sr diff --git a/vllm/multimodal/media/audio.py b/vllm/multimodal/media/audio.py index 47c2743bb99a..f5f709196b84 100644 --- a/vllm/multimodal/media/audio.py +++ b/vllm/multimodal/media/audio.py @@ -9,6 +9,7 @@ import pybase64 import torch +from vllm.multimodal.audio import resample_audio_pyav from vllm.utils.import_utils import PlaceholderModule from vllm.utils.serial_utils import tensor2base64 @@ -25,12 +26,6 @@ soundfile = PlaceholderModule("soundfile") # type: ignore[assignment] -try: - import resampy -except ImportError: - resampy = PlaceholderModule("resampy") # type: ignore[assignment] - - # Public libsndfile error codes exposed via `soundfile.LibsndfileError.code`, soundfile # being librosa's main backend. Used to validate if an audio loading error is due to a # server error vs a client error (invalid audio file). @@ -126,7 +121,7 @@ def load_audio_soundfile( y = np.mean(y, axis=tuple(range(y.ndim - 1))) if sr is not None and sr != native_sr: - y = resampy.resample(y, sr_orig=native_sr, sr_new=sr) + y = resample_audio_pyav(y, orig_sr=native_sr, target_sr=sr) return y, int(sr) return y, native_sr From 3e02741eceb762855a4e2219cdcc3051d2b6374e Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Wed, 15 Apr 2026 00:40:51 +0800 Subject: [PATCH 2/2] pre-commit Signed-off-by: Isotr0py --- vllm/multimodal/media/audio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/multimodal/media/audio.py b/vllm/multimodal/media/audio.py index a460d419f5ec..26d58d13a731 100644 --- a/vllm/multimodal/media/audio.py +++ b/vllm/multimodal/media/audio.py @@ -9,8 +9,8 @@ import pybase64 import torch -from vllm.multimodal.audio import resample_audio_pyav from vllm.logger import init_logger +from vllm.multimodal.audio import resample_audio_pyav from vllm.utils.import_utils import PlaceholderModule from vllm.utils.serial_utils import tensor2base64