Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
360 changes: 360 additions & 0 deletions tests/reasoning/test_hyperclovax_reasoning_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,360 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from transformers import AutoTokenizer

from tests.reasoning.utils import (
StreamingReasoningReconstructor,
run_reasoning_extraction,
run_reasoning_extraction_streaming,
)
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.reasoning import ReasoningParser, ReasoningParserManager

PARSER_NAME = "hyperclovax"

TOKENIZER_NAME = "naver-hyperclovax/HyperCLOVAX-SEED-Think-32B"


THINK_START = "/think\n"
THINK_END_BASE = "<|im_end|>\n<|im_start|>assistant"
FUNCTION_CALL_ROLE = " -> tool/function_call\n"


def _tool_payload(name: str = "search", args: str = '{"query":"weather"}') -> str:
return f'[{{"name":"{name}","arguments":{args}}}]'


@pytest.fixture(scope="module")
def hyperclovax_tokenizer():
return AutoTokenizer.from_pretrained(TOKENIZER_NAME)


@pytest.fixture
def parser(hyperclovax_tokenizer) -> ReasoningParser:
return ReasoningParserManager.get_reasoning_parser(PARSER_NAME)(
hyperclovax_tokenizer
)


def test_hyperclovax_reasoning_parser_creation(hyperclovax_tokenizer):
parser_cls = ReasoningParserManager.get_reasoning_parser(PARSER_NAME)
created = parser_cls(hyperclovax_tokenizer)
assert isinstance(created, ReasoningParser)


@pytest.fixture
def request_auto() -> ChatCompletionRequest:
return ChatCompletionRequest(messages=[], model="test-model", tool_choice=None)


REASONING_WITH_CONTENT = {
"output": THINK_START
+ "This is reasoning.\n"
+ THINK_END_BASE
+ "\nThis is the answer.",
"reasoning": "This is reasoning.\n",
"content": "\nThis is the answer.",
}

REASONING_ONLY = {
"output": THINK_START + "Still thinking...",
"reasoning": "Still thinking...",
"content": None,
}

EMPTY_THINKING_NONSTREAM = {
"output": THINK_START + THINK_END_BASE + "\nAnswer.",
"reasoning": None,
"content": "\nAnswer.",
}

NO_THINKING_NONSTREAM = {
"output": "\nDirect answer.",
"reasoning": None,
"content": "Direct answer.",
"tool_choice": "auto",
}

TOOL_CALL_AFTER_THINK_NONSTREAM = {
"output": THINK_START
+ "Let me check.\n"
+ THINK_END_BASE
+ FUNCTION_CALL_ROLE
+ _tool_payload(),
"reasoning": "Let me check.\n",
"content": FUNCTION_CALL_ROLE + _tool_payload(),
}

DIRECT_TOOL_CALL_NONSTREAM = {
"output": FUNCTION_CALL_ROLE + _tool_payload(),
"reasoning": None,
"content": _tool_payload(),
"tool_choice": "required",
}

MULTILINE_REASONING = {
"output": THINK_START
+ "Line one.\nLine two.\n"
+ THINK_END_BASE
+ "\nFinal answer.",
"reasoning": "Line one.\nLine two.\n",
"content": "\nFinal answer.",
}

NON_STREAMING_TEST_CASES = [
pytest.param(REASONING_WITH_CONTENT, id="reasoning_with_content"),
pytest.param(REASONING_ONLY, id="reasoning_only"),
pytest.param(EMPTY_THINKING_NONSTREAM, id="empty_thinking"),
pytest.param(NO_THINKING_NONSTREAM, id="no_thinking"),
pytest.param(TOOL_CALL_AFTER_THINK_NONSTREAM, id="tool_call_after_think"),
pytest.param(DIRECT_TOOL_CALL_NONSTREAM, id="direct_tool_call"),
pytest.param(MULTILINE_REASONING, id="multiline_reasoning"),
]


EMPTY_THINKING_STREAM = {
"output": THINK_START + THINK_END_BASE + "\nAnswer.",
"reasoning": "",
"content": "\nAnswer.",
}

NO_THINKING_STREAM = {
"output": "\nDirect answer.",
"reasoning": None,
"content": "\nDirect answer.",
}

TOOL_CALL_AFTER_THINK_STREAM = {
"output": THINK_START
+ "Let me check.\n"
+ THINK_END_BASE
+ FUNCTION_CALL_ROLE
+ _tool_payload(),
"reasoning": "Let me check.\n",
"content": _tool_payload(),
}

STREAMING_TEST_CASES = [
pytest.param(REASONING_WITH_CONTENT, id="reasoning_with_content"),
pytest.param(REASONING_ONLY, id="reasoning_only"),
pytest.param(EMPTY_THINKING_STREAM, id="empty_thinking"),
pytest.param(NO_THINKING_STREAM, id="no_thinking"),
pytest.param(TOOL_CALL_AFTER_THINK_STREAM, id="tool_call_after_think"),
pytest.param(MULTILINE_REASONING, id="multiline_reasoning"),
]


def _make_request(tool_choice=None) -> ChatCompletionRequest:
if tool_choice in (None, "none"):
return ChatCompletionRequest(messages=[], model="test-model")
return ChatCompletionRequest(
messages=[],
model="test-model",
tools=[
{
"type": "function",
"function": {
"name": "search",
"description": "test tool",
"parameters": {"type": "object", "properties": {}},
},
}
],
tool_choice=tool_choice,
)


@pytest.mark.parametrize("param_dict", NON_STREAMING_TEST_CASES)
def test_extract_reasoning_nonstreaming(
param_dict: dict,
parser: ReasoningParser,
):
tool_choice = param_dict.get("tool_choice", "none")
request = _make_request(tool_choice=tool_choice)

output_tokens = [
parser.model_tokenizer.convert_tokens_to_string([tok])
for tok in parser.model_tokenizer.tokenize(param_dict["output"])
]
reasoning, content = run_reasoning_extraction(
parser, output_tokens, request=request, streaming=False
)

assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]


@pytest.mark.parametrize("param_dict", STREAMING_TEST_CASES)
def test_extract_reasoning_streaming(
param_dict: dict,
hyperclovax_tokenizer,
):
fresh_parser = ReasoningParserManager.get_reasoning_parser(PARSER_NAME)(
hyperclovax_tokenizer
)

output_tokens = [
hyperclovax_tokenizer.convert_tokens_to_string([tok])
for tok in hyperclovax_tokenizer.tokenize(param_dict["output"])
]
reasoning, content = run_reasoning_extraction(
fresh_parser, output_tokens, streaming=True
)

assert reasoning == param_dict["reasoning"]
assert content == param_dict["content"]


def test_is_reasoning_end_true_with_newline_variant(parser: ReasoningParser):
ids = parser.model_tokenizer.encode(THINK_START + "hello" + THINK_END_BASE + "\n")
assert parser.is_reasoning_end(ids) is True


def test_is_reasoning_end_true_with_content_after_end(parser: ReasoningParser):
ids = parser.model_tokenizer.encode(
THINK_START + "hello" + THINK_END_BASE + FUNCTION_CALL_ROLE
)
assert parser.is_reasoning_end(ids) is True


def test_is_reasoning_end_false_start_after_end(parser: ReasoningParser):
ids = parser.model_tokenizer.encode(THINK_END_BASE + "\n" + THINK_START + "more")
assert parser.is_reasoning_end(ids) is False


def test_is_reasoning_end_false_no_end_token(parser: ReasoningParser):
regular_ids = parser.model_tokenizer.encode("hello world, still reasoning")
assert parser.is_reasoning_end(regular_ids) is False


def test_is_reasoning_end_true_single_end_token(parser: ReasoningParser):
assert parser.is_reasoning_end([parser.end_token_id]) is True


def test_is_reasoning_end_streaming_true_on_end_token_delta(parser: ReasoningParser):
assert (
parser.is_reasoning_end_streaming([parser.end_token_id], [parser.end_token_id])
is True
)


def test_is_reasoning_end_streaming_false_without_end_token_delta(
parser: ReasoningParser,
):
assert (
parser.is_reasoning_end_streaming(
[parser.end_token_id], [parser.end_token_id + 1]
)
is False
)


def test_is_reasoning_end_false_empty_sequence(parser: ReasoningParser):
assert parser.is_reasoning_end([]) is False


def test_extract_content_ids_after_end_token(parser: ReasoningParser):
sep_text = THINK_START + "abc" + THINK_END_BASE + "hello"
all_ids = parser.model_tokenizer.encode(sep_text)
content_ids = parser.extract_content_ids(all_ids)

decoded = parser.model_tokenizer.decode(content_ids, skip_special_tokens=False)
assert "hello" in decoded


def test_extract_content_ids_no_end_token(parser: ReasoningParser):
still_reasoning_ids = parser.model_tokenizer.encode("still thinking")
assert parser.extract_content_ids(still_reasoning_ids) == []


MULTI_TOKEN_DELTA_CASES = [
pytest.param(
[THINK_START + "reasoning", THINK_END_BASE + "content"],
"reasoning",
"content",
id="end_tag_and_content_in_one_delta",
),
pytest.param(
[THINK_START + "start of thinking", " more", THINK_END_BASE + "ok"],
"start of thinking more",
"ok",
id="start_marker_with_reasoning",
),
pytest.param(
[THINK_START + "reasoning", "<|im_end|>", "\n<|im_start|>assistant", "result"],
"reasoning",
"result",
id="end_tag_split_across_deltas",
),
pytest.param(
["\ndirect content"],
None,
"\ndirect content",
id="no_thinking_single_delta",
),
pytest.param(
[THINK_START + "think", THINK_END_BASE + FUNCTION_CALL_ROLE + _tool_payload()],
"think",
_tool_payload(),
id="tool_call_after_reasoning",
),
]


@pytest.mark.parametrize(
"deltas, expected_reasoning, expected_content",
MULTI_TOKEN_DELTA_CASES,
)
def test_streaming_multi_token_deltas(
deltas: list[str],
expected_reasoning: str | None,
expected_content: str | None,
hyperclovax_tokenizer,
):
fresh_parser = ReasoningParserManager.get_reasoning_parser(PARSER_NAME)(
hyperclovax_tokenizer
)
reconstructor: StreamingReasoningReconstructor = run_reasoning_extraction_streaming(
fresh_parser, deltas
)

assert reconstructor.reasoning == expected_reasoning
assert (reconstructor.other_content or None) == expected_content


def test_force_reasoning_treats_all_as_reasoning(parser: ReasoningParser):
request = ChatCompletionRequest(
messages=[],
model="test-model",
chat_template_kwargs={"force_reasoning": True},
)
reasoning, content = parser.extract_reasoning(
"No think marker but forced.", request
)
assert reasoning == "No think marker but forced."
assert content is None


def test_skip_reasoning_returns_all_as_content(parser: ReasoningParser):
request = ChatCompletionRequest(
messages=[],
model="test-model",
chat_template_kwargs={"skip_reasoning": True},
)
reasoning, content = parser.extract_reasoning(
THINK_START + "This should be content.", request
)
assert reasoning is None
assert content == THINK_START + "This should be content."


def test_force_reasoning_takes_priority_over_skip(parser: ReasoningParser):
request = ChatCompletionRequest(
messages=[],
model="test-model",
chat_template_kwargs={"force_reasoning": True, "skip_reasoning": True},
)
reasoning, content = parser.extract_reasoning("some output", request)
assert reasoning == "some output"
assert content is None
Loading
Loading