From 80917b356619f7bfeafa4879966c4103f3d4cca2 Mon Sep 17 00:00:00 2001
From: liusy58 <liusy58@linux.alibaba.com>
Date: Mon, 8 Dec 2025 23:49:33 +0800
Subject: [PATCH] add doc

---
 docs/references/environment_variables.md     | 1 +
 python/sglang/srt/managers/schedule_batch.py | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/docs/references/environment_variables.md b/docs/references/environment_variables.md
index 67e3f92955a3..f2051f8f5044 100644
--- a/docs/references/environment_variables.md
+++ b/docs/references/environment_variables.md
@@ -36,6 +36,7 @@ SGLang supports various environment variables that can be used to configure its
 | `SGLANG_SCHEDULER_RECV_SKIPPER_WEIGHT_DECODE` | Weight increment for decode forward mode in scheduler recv skipper. Works with `--scheduler-recv-interval` to control polling frequency during decode phase. | `1` |
 | `SGLANG_SCHEDULER_RECV_SKIPPER_WEIGHT_VERIFY` | Weight increment for target verify forward mode in scheduler recv skipper. Works with `--scheduler-recv-interval` to control polling frequency during verification phase. | `1` |
 | `SGLANG_SCHEDULER_RECV_SKIPPER_WEIGHT_NONE` | Weight increment when forward mode is None in scheduler recv skipper. Works with `--scheduler-recv-interval` to control polling frequency when no specific forward mode is active. | `1` |
+| `SGLANG_MM_BUFFER_SIZE_MB` | Size of preallocated GPU buffer (in MB) for multi-modal feature hashing optimization. When set to a positive value, temporarily moves features to GPU for faster hash computation, then moves them back to CPU to save GPU memory. Larger features benefit more from GPU hashing. Set to `0` to disable. | `0` |
 
 
 ## DeepGEMM Configuration (Advanced Optimization)
diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
index f712fe0164e4..a1ab8b23b0ca 100644
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -327,6 +327,9 @@ def from_dict(obj: dict):
         ret.mm_items = [item for item in ret.mm_items if item.is_valid()]
 
         if envs.SGLANG_MM_BUFFER_SIZE_MB.get() > 0:
+            # Multi-modal feature hashing optimization:
+            # When SGLANG_MM_BUFFER_SIZE_MB > 0, we temporarily move feature tensors to GPU
+            # for faster hash computation, while avoiding OOM issues.
             from sglang.srt.managers.mm_utils import (
                 init_feature_buffer,
                 is_feature_buffer_initialized,