Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions docs/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -404,10 +404,7 @@ Specified using `--task embed`.
You should manually set mean pooling by passing `--override-pooler-config '{"pooling_type": "MEAN"}'`.

!!! note
The HF implementation of `Alibaba-NLP/gte-Qwen2-1.5B-instruct` is hardcoded to use causal attention despite what is shown in `config.json`. To compare vLLM vs HF results,
you should set `--hf-overrides '{"is_causal": true}'` in vLLM so that the two implementations are consistent with each other.

For both the 1.5B and 7B variants, you also need to enable `--trust-remote-code` for the correct tokenizer to be loaded.
For `Alibaba-NLP/gte-Qwen2-*`, you need to enable `--trust-remote-code` for the correct tokenizer to be loaded.
See [relevant issue on HF Transformers](https://github.com/huggingface/transformers/issues/34882).

!!! note
Expand Down
6 changes: 1 addition & 5 deletions tests/models/language/pooling/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
pytest.param("sentence-transformers/all-MiniLM-L12-v2"),
pytest.param("intfloat/multilingual-e5-small"),
pytest.param("Alibaba-NLP/gte-Qwen2-7B-instruct"),
pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
# [Decoder-only]
pytest.param("BAAI/bge-multilingual-gemma2",
marks=[pytest.mark.core_model]),
pytest.param("intfloat/e5-mistral-7b-instruct",
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base"),
# [Cross-Encoder]
pytest.param("sentence-transformers/stsb-roberta-base-v2"),
Expand All @@ -47,9 +46,6 @@ def test_models(
vllm_extra_kwargs["override_pooler_config"] = \
PoolerConfig(pooling_type="MEAN")

if model == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}

# The example_prompts has ending "\n", for example:
# "Write a short story about a robot that dreams for the first time.\n"
# sentence_transformers will strip the input texts, see:
Expand Down
6 changes: 0 additions & 6 deletions tests/models/language/pooling/test_gte.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@ def test_models_mteb(hf_runner, vllm_runner,
from .mteb_utils import mteb_test_embed_models

vllm_extra_kwargs: dict[str, Any] = {}
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}

if model_info.architecture == "GteNewModel":
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}

Expand All @@ -81,9 +78,6 @@ def test_models_correctness(hf_runner, vllm_runner, model_info: EmbedModelInfo,
example_prompts = [str(s).strip() for s in example_prompts]

vllm_extra_kwargs: dict[str, Any] = {}
if model_info.name == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
vllm_extra_kwargs["hf_overrides"] = {"is_causal": True}

if model_info.architecture == "GteNewModel":
vllm_extra_kwargs["hf_overrides"] = {"architectures": ["GteNewModel"]}

Expand Down