Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions python/sglang/srt/entrypoints/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,9 @@ def generate(
Please refer to `GenerateReqInput` for the documentation.
"""
modalities_list = []
if image_data is not None:
modalities_list.append("image")
for item_image_data in image_data:
if item_image_data is not None:
modalities_list.append("image")

obj = GenerateReqInput(
text=prompt,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,16 @@ def load_mm_data(
continue

image_sizes += frames[0].size * len(frames)
hashes += [hash(image_file)] * len(frames)

# Generate a hashable value for the image file
if isinstance(image_file, Image.Image):
# For PIL.Image objects, use the ID as a hashable value
hash_value = hash(id(image_file))
else:
# For other types (strings, etc.), use the regular hash
hash_value = hash(image_file)

hashes += [hash_value] * len(frames)
images += frames
image_index += 1
if frames_to_process != 0:
Expand Down
27 changes: 16 additions & 11 deletions python/sglang/srt/managers/multimodal_processors/qwen_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,19 @@ def _process_images_task(images, input_text, _hf_config):
}

async def _process_single_image(self, images, input_text) -> dict:
if self.executor is not None:
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
self.executor,
Qwen2_5VLImageProcessor._process_images_task,
images,
input_text,
self.hf_config,
)
else:
return self._process_images_task(images, input_text, self.hf_config)
# if self.executor is not None:
# loop = asyncio.get_event_loop()
# # FIXME
# return await loop.run_in_executor(
# self.executor,
# Qwen2_5VLImageProcessor._process_images_task,
# images,
# input_text,
# self.hf_config,
# )
# else:
# return self._process_images_task(images, input_text, self.hf_config)
return self._process_images_task(images, input_text, self.hf_config)

async def process_mm_data_async(
self,
Expand Down Expand Up @@ -149,6 +151,9 @@ def floor_by_factor(number: int, factor: int) -> int:
ret = await self._process_single_image(
images=images, input_text=base_output.input_text
)

#debug
print(f"!!!!!! ret: {type(ret)}, {ret["image_grid_thw"]}")

image_grid_thws = torch.concat([ret["image_grid_thw"]])
video_grid_thws = None
Expand Down
8 changes: 5 additions & 3 deletions python/sglang/srt/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,10 +537,12 @@ def load_audio(audio_file: str, sr: int = 16000, mono: bool = True) -> np.ndarra
return audio


def load_image(image_file: Union[str, bytes]) -> tuple[Image, tuple[int, int]]:
def load_image(image_file: Union[Image.Image, str, bytes]) -> tuple[Image.Image, tuple[int, int]]:
image = image_size = None

if isinstance(image_file, bytes):
if isinstance(image_file, Image.Image):
image = image_file
image_size = (image.width, image.height)
elif isinstance(image_file, bytes):
image = Image.open(BytesIO(image_file))
elif image_file.startswith("http://") or image_file.startswith("https://"):
timeout = int(os.getenv("REQUEST_TIMEOUT", "3"))
Expand Down
Loading