sgl-project · zhyncs · Apr 2, 2025 · Mar 30, 2025 · Mar 30, 2025 · Mar 30, 2025
@@ -152,8 +152,9 @@ def generate(
         Please refer to `GenerateReqInput` for the documentation.
         """
         modalities_list = []
-        if image_data is not None:
-            modalities_list.append("image")
+        for item_image_data in image_data:
+            if item_image_data is not None:
+                modalities_list.append("image")
 
         obj = GenerateReqInput(
             text=prompt,

@@ -230,7 +230,16 @@ def load_mm_data(
                             continue
 
                     image_sizes += frames[0].size * len(frames)
-                    hashes += [hash(image_file)] * len(frames)
+
+                    # Generate a hashable value for the image file
+                    if isinstance(image_file, Image.Image):
+                        # For PIL.Image objects, use the ID as a hashable value
+                        hash_value = hash(id(image_file))
+                    else:
+                        # For other types (strings, etc.), use the regular hash
+                        hash_value = hash(image_file)
+
+                    hashes += [hash_value] * len(frames)
                     images += frames
                     image_index += 1
                     if frames_to_process != 0:

@@ -51,17 +51,19 @@ def _process_images_task(images, input_text, _hf_config):
         }
 
     async def _process_single_image(self, images, input_text) -> dict:
-        if self.executor is not None:
-            loop = asyncio.get_event_loop()
-            return await loop.run_in_executor(
-                self.executor,
-                Qwen2_5VLImageProcessor._process_images_task,
-                images,
-                input_text,
-                self.hf_config,
-            )
-        else:
-            return self._process_images_task(images, input_text, self.hf_config)
+        # if self.executor is not None:
+        #     loop = asyncio.get_event_loop()
+        #     # FIXME
+        #     return await loop.run_in_executor(
+        #         self.executor,
+        #         Qwen2_5VLImageProcessor._process_images_task,
+        #         images,
+        #         input_text,
+        #         self.hf_config,
+        #     )
+        # else:
+        #     return self._process_images_task(images, input_text, self.hf_config)
+        return self._process_images_task(images, input_text, self.hf_config)
 
     async def process_mm_data_async(
         self,
@@ -149,6 +151,9 @@ def floor_by_factor(number: int, factor: int) -> int:
         ret = await self._process_single_image(
             images=images, input_text=base_output.input_text
         )
+
+        #debug
+        print(f"!!!!!! ret: {type(ret)}, {ret["image_grid_thw"]}")
 
         image_grid_thws = torch.concat([ret["image_grid_thw"]])
         video_grid_thws = None

diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
@@ -537,10 +537,12 @@ def load_audio(audio_file: str, sr: int = 16000, mono: bool = True) -> np.ndarra
     return audio
 
 
-def load_image(image_file: Union[str, bytes]) -> tuple[Image, tuple[int, int]]:
+def load_image(image_file: Union[Image.Image, str, bytes]) -> tuple[Image.Image, tuple[int, int]]:
     image = image_size = None
-
-    if isinstance(image_file, bytes):
+    if isinstance(image_file, Image.Image):
+        image = image_file
+        image_size = (image.width, image.height)
+    elif isinstance(image_file, bytes):
         image = Image.open(BytesIO(image_file))
     elif image_file.startswith("http://") or image_file.startswith("https://"):
         timeout = int(os.getenv("REQUEST_TIMEOUT", "3"))