sgl-project · WingEdge777 · Nov 19, 2025 · Nov 26, 2025 · Nov 28, 2025 · Dec 23, 2025
diff --git a/python/sglang/srt/environ.py b/python/sglang/srt/environ.py
@@ -401,6 +401,7 @@ class Envs:
     SGLANG_VLM_CACHE_SIZE_MB = EnvInt(100)
     SGLANG_IMAGE_MAX_PIXELS = EnvInt(16384 * 28 * 28)
     SGLANG_RESIZE_RESAMPLE = EnvStr("")
+    SGLANG_USE_OPENCV_VIDEO_BACKEND = EnvBool(False)
     SGLANG_MM_BUFFER_SIZE_MB = EnvInt(0)
     SGLANG_MM_PRECOMPUTE_HASH = EnvBool(False)
     SGLANG_VIT_ENABLE_CUDA_GRAPH = EnvBool(False)

@@ -22,6 +22,7 @@
     BaseMultimodalProcessor as SGLangBaseProcessor,
 )
 from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
+from sglang.srt.utils import read_video_frames_opencv
 from sglang.utils import logger
 
 IMAGE_FACTOR = 28
@@ -150,13 +151,24 @@ async def preprocess_video(
 ) -> torch.Tensor:
     entry_time = time.perf_counter()
 
-    total_frames, video_fps = len(vr), vr.get_avg_fps()
+    if envs.SGLANG_USE_OPENCV_VIDEO_BACKEND.get():
+        import cv2
+
+        total_frames, video_fps = int(vr.get(cv2.CAP_PROP_FRAME_COUNT)), vr.get(
+            cv2.CAP_PROP_FPS
+        )
+    else:
+        total_frames, video_fps = len(vr), vr.get_avg_fps()
+
     nframes = smart_nframes(
         video_config, total_frames=total_frames, video_fps=video_fps
     )
     idx = np.linspace(0, total_frames - 1, num=nframes, dtype=np.int64)
     idx = np.unique(idx)
-    video_np = vr.get_batch(idx).asnumpy()
+    if envs.SGLANG_USE_OPENCV_VIDEO_BACKEND.get():
+        video_np = read_video_frames_opencv(vr, idx)
+    else:
+        video_np = vr.get_batch(idx).asnumpy()
     video = torch.from_numpy(video_np).pin_memory()
     video = video.permute(0, 3, 1, 2)  # Convert to TCHW format
 

diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py
@@ -952,6 +952,13 @@ def get_image_bytes(image_file: Union[str, bytes]):
 
 
 def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
+    if envs.SGLANG_USE_OPENCV_VIDEO_BACKEND.get():
+        return get_video_opencv_handler(video_file)
+    else:
+        return get_video_decord_handler(video_file, use_gpu)
+
+
+def get_video_decord_handler(video_file: Union[str, bytes], use_gpu: bool = True):
     # We import decord here to avoid a strange Segmentation fault (core dumped) issue.
     from decord import VideoReader, cpu, gpu
 
@@ -1010,6 +1017,92 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
             os.unlink(tmp_file.name)
 
 
+def get_video_opencv_handler(video_file: Union[str, bytes]):
+    import cv2
+
+    tmp_file = None
+    vc = None
+    try:
+        if isinstance(video_file, bytes):
+            tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+            tmp_file.write(video_file)
+            tmp_file.close()
+            vc = cv2.VideoCapture(tmp_file.name)
+        elif isinstance(video_file, str):
+            if video_file.startswith(("http://", "https://")):
+                timeout = int(os.getenv("REQUEST_TIMEOUT", "10"))
+                response = requests.get(video_file, stream=True, timeout=timeout)
+                response.raise_for_status()
+                tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+                for chunk in response.iter_content(chunk_size=8192):
+                    tmp_file.write(chunk)
+                tmp_file.close()
+                vc = cv2.VideoCapture(tmp_file.name)
+            elif video_file.startswith("data:"):
+                _, encoded = video_file.split(",", 1)
+                video_bytes = pybase64.b64decode(encoded, validate=True)
+                tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+                tmp_file.write(video_bytes)
+                tmp_file.close()
+                vc = cv2.VideoCapture(tmp_file.name)
+            # `urlparse` supports file:// paths, and so does VideoReader
+            elif os.path.isfile(urlparse(video_file).path):
+                vc = cv2.VideoCapture(tmp_file.name)
+            else:
+                video_bytes = pybase64.b64decode(video_file, validate=True)
+                tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+                tmp_file.write(video_bytes)
+                tmp_file.close()
+                vc = cv2.VideoCapture(tmp_file.name)
+        else:
+            raise ValueError(f"Unsupported video input type: {type(video_file)}")
+
+        return vc
+
+    finally:
+        if tmp_file and os.path.exists(tmp_file.name):
+            os.unlink(tmp_file.name)
+
+
+def read_video_frames_opencv(vc, frame_idx: List[int]):
+    import cv2
+
+    try:
+        width = int(vc.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(vc.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        total_frames = int(vc.get(cv2.CAP_PROP_FRAME_COUNT))
+        n_frames = len(frame_idx)
+
+        video_np = np.empty((n_frames, height, width, 3), dtype=np.uint8)
+        mx_idx = min(total_frames, max(frame_idx) + 1)
+        frame_idx_st = set(frame_idx)
+        cnt = 0
+        for idx in range(mx_idx):
+            ok = vc.grab()
+            if not ok:
+                if idx in frame_idx_st:
+                    logger.warning(
+                        f"Failed to read frame {idx}, skipped. The video may be corrupted."
+                    )
+                continue
+            if idx in frame_idx_st:
+                ret, frame = vc.retrieve()
+                if ret:
+                    video_np[cnt] = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    cnt += 1
+                else:
+                    logger.warning(
+                        f"Failed to retrieve frame {idx}, skipped. The video may be corrupted."
+                    )
+        if cnt != n_frames:
+            logger.warning(
+                f"Expected {n_frames} frames, but only got {cnt}. The video may be corrupted."
+            )
+        return video_np[:cnt]
+    finally:
+        vc.release()
+
+
 def sample_video_frames(
     video: "VideoReader", *, desired_fps: int, max_frames: int
 ) -> list[int]: