Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions examples/pooling/score/convert_model_to_seq_cls.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def no_post_processing(causal_lm, seq_cls_model, tokenizer, tokens, device):


def converting(
model_name, classifier_from_tokens, path, method, use_pad_token=False, device="cpu"
model_name, classifier_from_tokens, path, method, use_sep_token=False, device="cpu"
):
"""
Main conversion function to transform a CausalLM model to SequenceClassification.
Expand All @@ -118,7 +118,7 @@ def converting(
classifier_from_tokens: List of tokens used for classification
path: Output path to save the converted model
method: Conversion method ('from_2_way_softmax' or 'no_post_processing')
use_pad_token: Whether to use padding token in the sequence classification model
use_sep_token: Whether to use separating token in the sequence classification model
device: Device to load the model on ('cpu' or 'cuda')
"""
assert method in method_map, f"Unknown method: {method}"
Expand Down Expand Up @@ -149,10 +149,10 @@ def converting(
causal_lm, seq_cls_model, tokenizer, classifier_from_tokens, device
)

# Configure padding token settings
# Note: Reranker models typically don't use padding tokens by default
seq_cls_model.config.use_pad_token = use_pad_token
seq_cls_model.config.pad_token_id = tokenizer.pad_token_id
# Configure separating token settings
# Note: `llm as reranker` defaults to not using separating token.
seq_cls_model.config.use_sep_token = use_sep_token
seq_cls_model.config.sep_token_id = tokenizer.sep_token_id

# Save the converted model and tokenizer
seq_cls_model.save_pretrained(path)
Expand Down Expand Up @@ -203,6 +203,6 @@ def parse_args():
model_name=args.model_name,
classifier_from_tokens=json.loads(args.classifier_from_tokens),
method=args.method,
use_pad_token=args.use_pad_token,
use_sep_token=args.use_sep_token,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the name in add_argument is still use-pad-token

path=args.path,
)
16 changes: 8 additions & 8 deletions tests/entrypoints/pooling/score/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ def llm_reranker_model_config():
CROSS_ENCODER_MODEL_ID,
runner="pooling",
)
# use_pad_token is a property that reads from hf_config,
# use_sep_token is a property that reads from hf_config,
# so we set it there to override the default (True)
config.hf_config.use_pad_token = False
config.hf_config.use_sep_token = False
return config


Expand Down Expand Up @@ -230,15 +230,15 @@ def test_not_using_default_template(
cross_encoder_tokenizer, full_prompt, engine_prompt
)

def test_fallback_with_pad_token(
def test_fallback_with_sep_token(
self,
cross_encoder_model_config,
cross_encoder_tokenizer,
tokenization_kwargs,
mock_model_no_score_template,
):
"""Test fallback path when ChatTemplateResolutionError
and use_pad_token=True."""
and use_sep_token=True."""
with (
patch(
"vllm.model_executor.model_loader.get_model_cls",
Expand All @@ -250,7 +250,7 @@ def test_fallback_with_pad_token(
),
):
full_prompt, engine_prompt = get_score_prompt(
cross_encoder_model_config, # use_pad_token=True
cross_encoder_model_config, # use_sep_token=True
cross_encoder_tokenizer,
tokenization_kwargs,
"query",
Expand Down Expand Up @@ -281,15 +281,15 @@ def test_fallback_with_pad_token(
add_special_tokens=False,
)

def test_fallback_without_pad_token(
def test_fallback_without_sep_token(
self,
llm_reranker_model_config,
cross_encoder_tokenizer,
tokenization_kwargs,
mock_model_no_score_template,
):
"""Test fallback path when ChatTemplateResolutionError
and use_pad_token=False."""
and use_sep_token=False."""
with (
patch(
"vllm.model_executor.model_loader.get_model_cls",
Expand All @@ -301,7 +301,7 @@ def test_fallback_without_pad_token(
),
):
full_prompt, engine_prompt = get_score_prompt(
llm_reranker_model_config, # use_pad_token=False
llm_reranker_model_config, # use_sep_token=False
cross_encoder_tokenizer,
tokenization_kwargs,
"query",
Expand Down
16 changes: 12 additions & 4 deletions vllm/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1434,10 +1434,18 @@ def matryoshka_dimensions(self):
return getattr(self.hf_config, "matryoshka_dimensions", None)

@property
def use_pad_token(self) -> bool:
# cross_encoder models defaults to using pad_token.
# `llm as reranker` models defaults to not using pad_token.
return getattr(self.hf_config, "use_pad_token", True)
def use_sep_token(self) -> bool:
# cross_encoder models defaults to using separating token.
# `llm as reranker` defaults to not using separating token.

use_pad_token = getattr(self.hf_config, "use_pad_token", None)
if use_pad_token is not None:
logger.warning_once(
"use_pad_token has been deprecated; please use use_sep_token instead."
)
return use_pad_token

return getattr(self.hf_config, "use_sep_token", True)

@property
def head_dtype(self) -> torch.dtype:
Expand Down
6 changes: 3 additions & 3 deletions vllm/entrypoints/score_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,14 @@ def default_tokenizer_encode():
full_prompt = _apply_model_score_template(model_config, prompt_1, prompt_2)
prompt_inputs = tokenizer(full_prompt, **tokenization_kwargs)
else:
if model_config.use_pad_token:
# cross_encoder models defaults to using pad_token.
if model_config.use_sep_token:
# cross_encoder models defaults to using separating token.
prompt_inputs = tokenizer(
text=prompt_1, text_pair=prompt_2, **tokenization_kwargs
)
full_prompt = tokenizer.decode(prompt_inputs["input_ids"])
else:
# `llm as reranker` models defaults to not using pad_token.
# `llm as reranker` defaults to not using separating token.
full_prompt = prompt_1 + prompt_2
prompt_inputs = tokenizer(text=full_prompt, **tokenization_kwargs)
return full_prompt, prompt_inputs
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/models/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,9 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None:
else:
text_config.num_labels = len(tokens)

# `llm as reranker` defaults to not using pad_token
use_pad_token = getattr(text_config, "use_pad_token", False)
text_config.use_pad_token = use_pad_token
# `llm as reranker` defaults to not using separating token.
use_sep_token = getattr(text_config, "use_sep_token", False)
text_config.use_sep_token = use_sep_token


def load_weights_using_from_2_way_softmax(
Expand Down
Loading