Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f05622c
fix: Anthropic streaming reasoning parser not detecting </think> boun…
MatejKosec Mar 14, 2026
dd997b3
fix: remove redundant reasoning parser from Anthropic handler
MatejKosec Mar 16, 2026
edaa39d
fix: correct ModelInput::Text comment in Anthropic handler
MatejKosec Mar 16, 2026
6d87d30
fix: revert reasoning_content skip logic in preprocessor
MatejKosec Mar 16, 2026
c5abd16
fix: strip reserved keys from chat_template_kwargs before apply_chat_…
MatejKosec Mar 17, 2026
8a9ac11
fix: address review comments — drop publisher.py, unify comment style
MatejKosec Mar 20, 2026
64661db
fix: inject reasoning_content into prompt across turns
MatejKosec Mar 20, 2026
8cd5f04
fix: collapse nested if for clippy::collapsible_if
MatejKosec Mar 20, 2026
9d9ab91
fix: remove enable_thinking gate from reasoning_content injection
MatejKosec Mar 20, 2026
5f87168
fix: default to injecting reasoning_content, skip only when thinking …
MatejKosec Mar 20, 2026
d1b9251
fix: inject reasoning_content unconditionally + add render test
MatejKosec Mar 20, 2026
cbb38c6
fix: unconditional reasoning injection + agentic roundtrip tests
MatejKosec Mar 21, 2026
785356b
fix: handle multimodal content arrays in reasoning injection
MatejKosec Mar 21, 2026
d766817
fix: formatting — collapse assignment, remove extra blank line
MatejKosec Mar 21, 2026
45b39aa
fix: inject reasoning_content in Python path for ModelInput::Text
MatejKosec Mar 21, 2026
e317f23
test: add unit tests for Python _inject_reasoning_content
MatejKosec Mar 21, 2026
bac9e62
style: format test file with black + ruff
MatejKosec Mar 21, 2026
9badfe8
fix: skip reasoning injection when template handles it natively
MatejKosec Mar 21, 2026
6718634
fix: set truncate_history_thinking=false for reasoning models
MatejKosec Mar 21, 2026
6df5d2e
fix: source() returns &str not Option, call contains() directly
MatejKosec Mar 21, 2026
299d0ec
fix: has_tools check — use map_or instead of and_then + is_some_and
MatejKosec Mar 21, 2026
e2c6186
Revert "fix: has_tools check — use map_or instead of and_then + is_so…
MatejKosec Mar 21, 2026
e584c3d
docs: add reference to NVIDIA SWE training config for truncate_histor…
MatejKosec Mar 21, 2026
9d856e5
fix: restore publisher.py to main after rebase conflict
MatejKosec Mar 29, 2026
a09d91f
style: fix cargo fmt and pre-commit lint
MatejKosec Mar 30, 2026
1e5e3f4
fix: restore add_filter closure brace clobbered by sed
MatejKosec Mar 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 67 additions & 1 deletion components/src/dynamo/common/utils/input_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,48 @@
from typing import Any, Optional


def _inject_reasoning_content(messages: list) -> None:
"""Inject reasoning_content as <think> blocks into content.

Chat templates only reference message["content"] — they don't see
reasoning_content. This converts it back to <think> blocks so the
model sees its own prior chain-of-thought across turns.
"""
for msg in messages:
if msg.get("role") != "assistant":
continue
reasoning = msg.get("reasoning_content")
if not reasoning:
continue

# Build <think> wrapped text
if isinstance(reasoning, str):
think_text = f"<think>{reasoning}</think>" if reasoning else ""
elif isinstance(reasoning, list):
# Segments variant: wrap each non-empty segment
parts = [f"<think>{seg}</think>" for seg in reasoning if seg]
think_text = "".join(parts)
else:
continue

if not think_text:
continue

# Prepend to content
existing = msg.get("content")
if isinstance(existing, str):
msg["content"] = think_text + existing
elif isinstance(existing, list):
# Multimodal content array — prepend as text part
msg["content"] = [{"type": "text", "text": think_text}] + existing
else:
# null or absent
msg["content"] = think_text

# Remove so template doesn't see both
msg.pop("reasoning_content", None)


class InputParamManager:
def __init__(self, tokenizer: Any) -> None:
self.tokenizer = tokenizer
Expand All @@ -18,8 +60,32 @@ def get_input_param(self, request: dict, use_tokenizer: bool) -> Optional[Any]:
raise ValueError("Tokenizer is not available")

if "messages" in request:
# Forward chat_template_args / chat_template_kwargs to the
# template so model-specific variables (e.g. enable_thinking)
# are available during rendering.
extra_kwargs = {}
if "chat_template_kwargs" in request:
extra_kwargs.update(request["chat_template_kwargs"])
if "chat_template_args" in request:
extra_kwargs.update(request["chat_template_args"])
# Strip keys that are already set explicitly to avoid
# TypeError: got multiple values for keyword argument.
for reserved in ("tokenize", "add_generation_prompt"):
extra_kwargs.pop(reserved, None)

# Inject reasoning_content as <think> blocks into content,
# but only if the template doesn't handle it natively.
# Templates like Nemotron and Qwen3 reference reasoning_content
# directly — injecting would produce duplicate <think> blocks.
chat_template_src = getattr(self.tokenizer, "chat_template", "") or ""
if "reasoning_content" not in chat_template_src:
_inject_reasoning_content(request["messages"])

return self.tokenizer.apply_chat_template(
request["messages"], tokenize=False, add_generation_prompt=True
request["messages"],
tokenize=False,
add_generation_prompt=True,
**extra_kwargs,
)
elif "prompt" in request:
return self.tokenizer.encode(request["prompt"])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""
Unit tests for _inject_reasoning_content in input_params.py.

Verifies that reasoning_content from prior assistant turns is converted
to <think> blocks in the content field before chat template rendering.
"""

import copy

from dynamo.common.utils.input_params import _inject_reasoning_content


class TestInjectReasoningContent:
"""Test suite for _inject_reasoning_content"""

def test_text_variant_prepends_to_content(self):
"""Text reasoning_content is wrapped in <think> and prepended."""
messages = [
{
"role": "assistant",
"content": "The answer is 12.",
"reasoning_content": "sqrt(144) = 12",
},
]
_inject_reasoning_content(messages)

assert (
messages[0]["content"] == "<think>sqrt(144) = 12</think>The answer is 12."
)
assert "reasoning_content" not in messages[0]

def test_segments_variant_wraps_each_segment(self):
"""Segments are individually wrapped in <think> blocks."""
messages = [
{
"role": "assistant",
"content": "Done.",
"reasoning_content": ["first thought", "second thought", ""],
},
]
_inject_reasoning_content(messages)

content = messages[0]["content"]
assert content.startswith("<think>first thought</think>")
assert "<think>second thought</think>" in content
assert "<think></think>" not in content # empty segment skipped
assert content.endswith("Done.")
assert "reasoning_content" not in messages[0]

def test_null_content_creates_from_reasoning(self):
"""When content is null/None, reasoning becomes the content."""
messages = [
{"role": "assistant", "content": None, "reasoning_content": "Thinking..."},
]
_inject_reasoning_content(messages)

assert messages[0]["content"] == "<think>Thinking...</think>"

def test_absent_content_creates_from_reasoning(self):
"""When content key is absent, reasoning becomes the content."""
messages = [
{"role": "assistant", "reasoning_content": "Thinking..."},
]
_inject_reasoning_content(messages)

assert messages[0]["content"] == "<think>Thinking...</think>"

def test_multimodal_content_prepends_text_part(self):
"""Array content gets a text part prepended, not replaced."""
messages = [
{
"role": "assistant",
"content": [{"type": "text", "text": "Here is the image."}],
"reasoning_content": "Analyzing the image...",
},
]
_inject_reasoning_content(messages)

content = messages[0]["content"]
assert isinstance(content, list)
assert len(content) == 2
assert content[0] == {
"type": "text",
"text": "<think>Analyzing the image...</think>",
}
assert content[1] == {"type": "text", "text": "Here is the image."}

def test_skips_non_assistant_messages(self):
"""User and tool messages are not modified."""
messages = [
{
"role": "user",
"content": "hello",
"reasoning_content": "should not touch",
},
{
"role": "tool",
"content": "result",
"reasoning_content": "should not touch",
},
]
original = copy.deepcopy(messages)
_inject_reasoning_content(messages)

assert messages == original

def test_skips_empty_reasoning(self):
"""Empty string reasoning_content is skipped."""
messages = [
{"role": "assistant", "content": "Answer.", "reasoning_content": ""},
]
_inject_reasoning_content(messages)

assert messages[0]["content"] == "Answer."
# reasoning_content not removed since we skipped (falsy check)

def test_agentic_multi_turn_tool_call_flow(self):
"""Full agentic flow: reason → tool_call → tool_result → reason → answer."""
messages = [
{"role": "user", "content": "What is sqrt(144) + sqrt(256)?"},
{
"role": "assistant",
"content": None,
"reasoning_content": "I need to compute sqrt(144) first.",
"tool_calls": [
{
"id": "call_0",
"type": "function",
"function": {
"name": "calc",
"arguments": '{"expr": "sqrt(144)"}',
},
},
],
},
{"role": "tool", "tool_call_id": "call_0", "content": "12"},
{
"role": "assistant",
"content": "The answer is 28.",
"reasoning_content": "Got 12. sqrt(256) = 16. Sum = 28.",
},
{"role": "user", "content": "Thanks!"},
]
_inject_reasoning_content(messages)

# First assistant turn: reasoning injected, null content → reasoning only
assert (
messages[1]["content"]
== "<think>I need to compute sqrt(144) first.</think>"
)
assert "reasoning_content" not in messages[1]
assert "tool_calls" in messages[1] # tool_calls untouched

# Tool message untouched
assert messages[2]["content"] == "12"

# Second assistant turn: reasoning prepended to content
assert (
messages[3]["content"]
== "<think>Got 12. sqrt(256) = 16. Sum = 28.</think>The answer is 28."
)
assert "reasoning_content" not in messages[3]

# User messages untouched
assert messages[0]["content"] == "What is sqrt(144) + sqrt(256)?"
assert messages[4]["content"] == "Thanks!"


class TestInputParamManagerReasoningInjection:
"""Test that InputParamManager respects template introspection."""

def test_injects_when_template_ignores_reasoning(self):
"""Templates without reasoning_content get injection."""
from unittest.mock import MagicMock

tokenizer = MagicMock()
tokenizer.chat_template = (
"{% for m in messages %}{{ m.role }}: {{ m.content }}{% endfor %}"
)
tokenizer.apply_chat_template = MagicMock(return_value="rendered")

from dynamo.common.utils.input_params import InputParamManager

mgr = InputParamManager(tokenizer)
request = {
"messages": [
{
"role": "assistant",
"content": "Hi.",
"reasoning_content": "thinking...",
},
{"role": "user", "content": "Bye"},
]
}
mgr.get_input_param(request, use_tokenizer=True)

# Verify injection happened: reasoning_content removed, content has <think>
called_messages = tokenizer.apply_chat_template.call_args[0][0]
assert "reasoning_content" not in called_messages[0]
assert called_messages[0]["content"].startswith("<think>thinking...</think>")

def test_skips_injection_when_template_handles_reasoning(self):
"""Templates with reasoning_content are left alone."""
from unittest.mock import MagicMock

tokenizer = MagicMock()
tokenizer.chat_template = (
"{% for m in messages %}"
"{% if m.reasoning_content %}<think>{{ m.reasoning_content }}</think>{% endif %}"
"{{ m.role }}: {{ m.content }}{% endfor %}"
)
tokenizer.apply_chat_template = MagicMock(return_value="rendered")

from dynamo.common.utils.input_params import InputParamManager

mgr = InputParamManager(tokenizer)
request = {
"messages": [
{
"role": "assistant",
"content": "Hi.",
"reasoning_content": "thinking...",
},
{"role": "user", "content": "Bye"},
]
}
mgr.get_input_param(request, use_tokenizer=True)

# Verify injection was skipped: reasoning_content still present, content unchanged
called_messages = tokenizer.apply_chat_template.call_args[0][0]
assert called_messages[0]["reasoning_content"] == "thinking..."
assert called_messages[0]["content"] == "Hi."
Loading
Loading