Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions tests/models/multimodal/generation/test_keye.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,8 @@ class ModelRequestData(NamedTuple):
sampling_params: SamplingParams | None = None


@pytest.mark.core_model
@pytest.mark.parametrize("question", [QUESTION])
def test_keye_vl(
image_assets,
question: str,
):
def test_keye_vl(image_assets, question: str):
images = [asset.pil_image for asset in image_assets]
image_urls = [encode_image_url(image) for image in images]

Expand Down
55 changes: 44 additions & 11 deletions tests/models/multimodal/generation/test_nemotron_parse.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,53 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from collections.abc import Sequence
from collections.abc import Iterable, Sequence

import pytest
import regex as re
from transformers import AutoModel

from tests.models.utils import check_logprobs_close
from vllm.assets.image import ImageAsset
from vllm.logprobs import Logprob, SampleLogprobs
from vllm.tokenizers import TokenizerLike

from ....conftest import HfRunner, PromptImageInput, VllmRunner
from ....utils import create_new_process_for_each_test

IMAGE = ImageAsset("paper-11").pil_image_ext(ext="png").convert("RGB")
PROMPT = "</s><s><predict_bbox><predict_classes><output_markdown>"


class DummyLogprobs(dict[int, Logprob]):
def __init__(self, vocab_ids: Iterable[int]):
super().__init__(dict.fromkeys(vocab_ids, Logprob(0.0)))

def __repr__(self):
return "DummyLogprobs()"


def mask_bbox_tokens(
output: tuple[list[int], str, SampleLogprobs],
tokenizer: TokenizerLike,
) -> tuple[list[int], str, SampleLogprobs]:
"""
Always pass check_logprobs_close check for bounding box tokens
because it is reasonable for them to differ slightly.
"""
ignore_pattern = r"<[xy]_[\d.]+>"
vocab = tokenizer.get_vocab()

output_ids, output_str, out_logprobs = output

masked_logprobs = list[dict[int, Logprob]]()
for token, logprobs in zip(output_ids, out_logprobs):
if re.match(ignore_pattern, tokenizer.decode(token)):
masked_logprobs.append(DummyLogprobs(vocab.values()))
else:
masked_logprobs.append(logprobs)

return output_ids, output_str, masked_logprobs


def run_test(
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
Expand Down Expand Up @@ -44,6 +76,8 @@ def run_test(
for prompts, images in inputs
]

tokenizer = vllm_model.llm.get_tokenizer()

with hf_runner(model, dtype=dtype, auto_cls=AutoModel) as hf_model:
hf_outputs_per_case = [
hf_model.generate_greedy_logprobs_limit(
Expand All @@ -58,29 +92,28 @@ def run_test(

for hf_outputs, vllm_outputs in zip(hf_outputs_per_case, vllm_outputs_per_case):
check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
outputs_0_lst=[
mask_bbox_tokens(output, tokenizer) for output in hf_outputs
],
outputs_1_lst=[
mask_bbox_tokens(output, tokenizer) for output in vllm_outputs
],
name_0="hf",
name_1="vllm",
)


@pytest.mark.core_model
@pytest.mark.parametrize("model", ["nvidia/NVIDIA-Nemotron-Parse-v1.1"])
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("num_logprobs", [5])
@create_new_process_for_each_test("spawn")
def test_models(
hf_runner, vllm_runner, model: str, dtype: str, num_logprobs: int
) -> None:
run_test(
hf_runner,
vllm_runner,
inputs=[
(
[PROMPT] * 10,
[IMAGE] * 10,
),
([PROMPT] * 10, [IMAGE] * 10),
],
model=model,
dtype=dtype,
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_terratorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from tests.utils import create_new_process_for_each_test


@create_new_process_for_each_test() # Memory is not cleaned up properly otherwise
@create_new_process_for_each_test() # Hangs otherwise
@pytest.mark.parametrize(
"model",
[
Expand Down
5 changes: 3 additions & 2 deletions vllm/model_executor/models/nemotron_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,8 +319,9 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
(".self_attn.qkv_proj", ".self_attn.q_proj", "q"),
(".self_attn.qkv_proj", ".self_attn.k_proj", "k"),
(".self_attn.qkv_proj", ".self_attn.v_proj", "v"),
(".encoder_attn.kv_proj", ".encoder_attn.k_proj", "k"),
(".encoder_attn.kv_proj", ".encoder_attn.v_proj", "v"),
# MergedColumnParallelLinear uses integer indices (0, 1)
(".encoder_attn.kv_proj", ".encoder_attn.k_proj", 0),
(".encoder_attn.kv_proj", ".encoder_attn.v_proj", 1),
]
params_dict = dict(self.named_parameters())
loaded_params: set[str] = set()
Expand Down
Loading