Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class TestFile:
TestFile("test_update_weights_from_disk.py", 114),
TestFile("test_update_weights_from_tensor.py", 48),
TestFile("test_vertex_endpoint.py", 31),
TestFile("test_vision_chunked_prefill.py", 223),
TestFile("test_vision_chunked_prefill.py", 99),
TestFile("test_vlm_accuracy.py", 60),
TestFile("test_vision_openai_server.py", 537),
TestFile("test_fim_completion.py", 40),
Expand Down
61 changes: 35 additions & 26 deletions test/srt/test_vision_chunked_prefill.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@


class TestVisionChunkedPrefill(CustomTestCase):

def prepare_video_messages(self, video_path, max_frames_num=8):
# We import decord here to avoid a strange Segmentation fault (core dumped) issue.
# The following import order will cause Segmentation fault.
Expand Down Expand Up @@ -128,7 +129,7 @@ def generate_for_video(self, batch, num_frame) -> Union[str, list[str]]:

return responses

def run_generate(self, chunked_prefill_size, batch, num_frame):
def launch_server(self, chunked_prefill_size) -> int:
# launch server
model = "lmms-lab/llava-onevision-qwen2-7b-ov"
# model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
Expand All @@ -142,37 +143,45 @@ def run_generate(self, chunked_prefill_size, batch, num_frame):
f"{chunked_prefill_size}",
],
)
return process.pid

def _test_chunked_prefill(self, batches, num_frames):
# Chunked
try:
return self.generate_for_video(batch, num_frame)
chunked_server_pid = self.launch_server(chunked_prefill_size=1024)
outputs_chunked = []
for batch, num_frame in zip(batches, num_frames):
output_chunked = self.generate_for_video(
batch=batch, num_frame=num_frame
)
outputs_chunked += [output_chunked]
finally:
kill_process_tree(process.pid)
kill_process_tree(chunked_server_pid)

def test_chunked_prefill(self):
output_chunked = self.run_generate(
chunked_prefill_size=1024, batch=False, num_frame=1
)
output_no_chunked = self.run_generate(
chunked_prefill_size=-1, batch=False, num_frame=1
)
# None-chunked
try:
no_chunked_server_pid = self.launch_server(chunked_prefill_size=-1)
outputs_no_chunked = []
for batch, num_frame in zip(batches, num_frames):
output_no_chunked = self.generate_for_video(
batch=batch, num_frame=num_frame
)
outputs_no_chunked += [output_no_chunked]

print("output with chunked prefill:")
print(output_chunked)
print("output without chunked prefill:")
print(output_no_chunked)
assert output_chunked == output_no_chunked
finally:
kill_process_tree(no_chunked_server_pid)

output_chunked = self.run_generate(
chunked_prefill_size=1024, batch=True, num_frame=[2, 6, 8, 10]
)
output_no_chunked = self.run_generate(
chunked_prefill_size=-1, batch=True, num_frame=[2, 6, 8, 10]
)
for output_chunked, output_no_chunked in zip(
outputs_chunked, outputs_no_chunked
):
print("output with chunked prefill:")
print(output_chunked)
print("output without chunked prefill:")
print(output_no_chunked)
assert output_chunked == output_no_chunked

print("output with chunked prefill:")
print(output_chunked)
print("output without chunked prefill:")
print(output_no_chunked)
assert output_chunked == output_no_chunked
def test_chunked_prefill(self):
self._test_chunked_prefill(batches=[False, True], num_frames=[1, [2, 6, 8, 10]])


if __name__ == "__main__":
Expand Down
Loading