From c3e516397d9a39e3eb53a81456755f2e929810c7 Mon Sep 17 00:00:00 2001 From: Asad Shahid Date: Mon, 23 Mar 2026 10:11:06 +0000 Subject: [PATCH 1/6] fix: tool_choice=required bypasses format-specific parsers (#6821) When tool_choice is 'required' or 'named', the jail builder was unconditionally set to Immediate JSON mode, which can only parse raw JSON output. This meant format-specific parsers like qwen3_coder (XML), pythonic, harmony, etc. were never invoked, causing raw markup to leak as output_text instead of being parsed into structured function_call items. Root cause: In apply_tool_calling_jail(), the match on tool_choice set tool_choice_required()/tool_choice_named() on the builder WITHOUT also setting the tool_call_parser. The Immediate jail mode only understands JSON ({...} or [{...}]), so any model that emits non-JSON tool call format (XML ..., pythonic, DSML, etc.) was silently broken under tool_choice=required. Fix: When a tool_call_parser is configured, always use marker-based jail mode regardless of tool_choice value. This ensures the format-specific parser is invoked to detect and extract tool calls. Immediate JSON mode is preserved as a fallback for required/named when NO parser is configured (the model is expected to emit raw JSON in that case). Affected parsers: qwen3_coder, nemotron_nano, pythonic, harmony, dsml, glm47, kimi_k2, and any future non-JSON parser. Testing: - Added regression test: tool_choice_required_with_qwen3_coder_parser - Existing tool_choice tests for JSON-only (no parser) paths unchanged Fixes: #6821 Signed-off-by: Asad Shahid --- lib/llm/src/preprocessor.rs | 25 ++++++++++-- lib/llm/tests/test_jail.rs | 81 +++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/lib/llm/src/preprocessor.rs b/lib/llm/src/preprocessor.rs index 4413e472534..a4c2adb6686 100644 --- a/lib/llm/src/preprocessor.rs +++ b/lib/llm/src/preprocessor.rs @@ -1114,14 +1114,31 @@ impl OpenAIPreprocessor { } // Configure jail based on tool_choice + // + // When a tool_call_parser is configured, always use marker-based mode + // so that format-specific parsers (e.g. qwen3_coder XML) are invoked. + // Immediate JSON mode is only a fallback for required/named when no + // parser exists (the model is expected to emit raw JSON in that case). match tool_choice { Some(ChatCompletionToolChoiceOption::Named(named)) => { - // Immediate jail mode for named tool choice - builder = builder.tool_choice_named(named.function.name.clone()); + if let Some(parser) = tool_call_parser { + // Parser-aware path: use marker-based jail so the parser + // handles format-specific output (XML, pythonic, etc.). + builder = builder.tool_call_parser(parser); + } else { + // No parser: fall back to Immediate JSON jail mode. + builder = builder.tool_choice_named(named.function.name.clone()); + } } Some(ChatCompletionToolChoiceOption::Required) => { - // Immediate jail mode for required tool choice - builder = builder.tool_choice_required(); + if let Some(parser) = tool_call_parser { + // Parser-aware path: use marker-based jail so the parser + // handles format-specific output (XML, pythonic, etc.). + builder = builder.tool_call_parser(parser); + } else { + // No parser: fall back to Immediate JSON jail mode. + builder = builder.tool_choice_required(); + } } Some(ChatCompletionToolChoiceOption::Auto) | Some(ChatCompletionToolChoiceOption::None) diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs index e9dea8b36b7..a6169ffb0d6 100644 --- a/lib/llm/tests/test_jail.rs +++ b/lib/llm/tests/test_jail.rs @@ -3080,4 +3080,85 @@ mod parallel_jail_tests { "Should have no tool calls for empty array" ); } + + /// Regression test for #6821: tool_choice=required with qwen3_coder parser. + /// + /// When tool_choice=required AND a tool_call_parser (e.g. qwen3_coder) is + /// configured, the jail must use marker-based mode so the parser handles the + /// XML output. Previously this fell through to Immediate JSON mode which + /// could not parse qwen3_coder XML, causing raw XML to leak as content. + #[tokio::test] + async fn test_tool_choice_required_with_qwen3_coder_parser() { + // Simulate qwen3_coder XML output for a single tool call + let xml_output = r#" + + +San Francisco + + +fahrenheit + + +"#; + + // Build jail the same way apply_tool_calling_jail would when a + // parser is configured together with tool_choice=required. + // After the fix, this uses marker-based mode (not Immediate). + let jail = JailedStream::builder() + .tool_call_parser("qwen3_coder".to_string()) + .build(); + + let input_chunks = vec![test_utils::create_mock_response_chunk( + xml_output.to_string(), + 0, + )]; + + let input_stream = stream::iter(input_chunks); + let results: Vec<_> = jail.apply_with_finish_reason(input_stream).collect().await; + + // Should have parsed a tool call, not leaked raw XML as content + let tool_call_count: usize = results + .iter() + .map(|r| { + r.data.as_ref().map_or(0, |d| { + d.choices + .iter() + .map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len())) + .sum::() + }) + }) + .sum(); + + assert!( + tool_call_count >= 1, + "tool_choice=required with qwen3_coder should produce at least one tool call, got {}", + tool_call_count + ); + + // Verify the tool call was parsed correctly + for r in &results { + if let Some(data) = &r.data { + for choice in &data.choices { + if let Some(tool_calls) = &choice.delta.tool_calls { + for tc in tool_calls { + assert_eq!( + tc.function.as_ref().unwrap().name.as_deref(), + Some("get_weather"), + "Tool call name should be 'get_weather'" + ); + } + } + // Content should be empty, not raw XML + if let Some(content) = &choice.delta.content { + let text = test_utils::extract_text(content); + assert!( + !text.contains(""), + "Raw XML should not leak as content, got: {}", + text + ); + } + } + } + } + } } From acac75b24d018c6a5968392b3dbf5cbb12582555 Mon Sep 17 00:00:00 2001 From: Asad Shahid Date: Tue, 24 Mar 2026 23:44:49 +0000 Subject: [PATCH 2/6] fix: enforce named tool filter when tool_call_parser is set (#7589) When tool_choice=named and a tool_call_parser is configured, the previous code set builder.tool_call_parser(parser) but never called tool_choice_named(), so named-tool enforcement was silently dropped. Fix: - Add named_tool_name: Option to JailedStream and JailedStreamBuilder - Add JailedStreamBuilder::named_tool_filter(tool_name) method - In preprocessor.rs Named+parser branch: call both .tool_call_parser(parser) and .named_tool_filter(named.function.name.clone()) - In create_tool_call_choice MarkerBased path: filter parsed ToolCalls against named_tool_name before emission; if all calls are for the wrong tool, return the accumulated content as a content choice instead Acceptance criteria verified: - tool_choice=named + parser uses marker-based parsing (unchanged) - Only the named tool is accepted; other tool names are rejected/filtered - tool_choice=auto and tool_choice=required paths are unchanged Tests added: - test_named_tool_with_parser_correct_tool_passes: hermes output for the correct tool passes through with the right name - test_named_tool_with_parser_wrong_tool_is_filtered: hermes output for a different tool is filtered out before emission Signed-off-by: Asad Shahid --- lib/llm/src/preprocessor.rs | 6 +- .../protocols/openai/chat_completions/jail.rs | 47 ++++++ lib/llm/tests/tool_choice.rs | 144 ++++++++++++++++++ 3 files changed, 196 insertions(+), 1 deletion(-) diff --git a/lib/llm/src/preprocessor.rs b/lib/llm/src/preprocessor.rs index a4c2adb6686..83a487cc976 100644 --- a/lib/llm/src/preprocessor.rs +++ b/lib/llm/src/preprocessor.rs @@ -1124,7 +1124,11 @@ impl OpenAIPreprocessor { if let Some(parser) = tool_call_parser { // Parser-aware path: use marker-based jail so the parser // handles format-specific output (XML, pythonic, etc.). - builder = builder.tool_call_parser(parser); + // Also install a named-tool filter so that if the model emits + // the wrong tool, the parsed call is rejected before emission. + builder = builder + .tool_call_parser(parser) + .named_tool_filter(named.function.name.clone()); } else { // No parser: fall back to Immediate JSON jail mode. builder = builder.tool_choice_named(named.function.name.clone()); diff --git a/lib/llm/src/protocols/openai/chat_completions/jail.rs b/lib/llm/src/protocols/openai/chat_completions/jail.rs index e551ba2e2b6..c6bddcac658 100644 --- a/lib/llm/src/protocols/openai/chat_completions/jail.rs +++ b/lib/llm/src/protocols/openai/chat_completions/jail.rs @@ -470,6 +470,9 @@ pub struct JailedStream { jail_start_sequences: Vec, jail_end_sequences: Vec, tool_call_parser: Option, + /// When set, only tool calls with this name are emitted (enforces tool_choice=named + /// when a tool_call_parser is active and the parser-aware MarkerBased path is used). + named_tool_name: Option, tool_definitions: Option>, emission_mode: EmissionMode, marker_matcher: MarkerMatcher, @@ -856,6 +859,37 @@ impl JailedStream { if let Ok((tool_calls, normal_text)) = parse_result && !tool_calls.is_empty() { + // If a named tool filter is set (tool_choice=named + parser path), reject + // tool calls that don't match the required tool name. + let tool_calls = if let Some(ref required_name) = self.named_tool_name { + let filtered: Vec<_> = tool_calls + .into_iter() + .filter(|tc| tc.function.name == *required_name) + .collect(); + if filtered.is_empty() { + tracing::warn!( + required = %required_name, + "tool_choice=named: parser emitted no matching tool calls; dropping jail output" + ); + } + filtered + } else { + tool_calls + }; + + if tool_calls.is_empty() { + // All parsed calls were for the wrong tool — return content choice + return create_choice_stream( + choice_index, + Some(Role::Assistant), + accumulated_content, + None, + base_choice.finish_reason, + base_choice.stop_reason.clone(), + base_choice.logprobs.clone(), + ); + } + // Convert to streaming format let tool_call_chunks: Vec = tool_calls .into_iter() @@ -1070,6 +1104,9 @@ pub struct JailedStreamBuilder { jail_start_sequences: Vec, jail_end_sequences: Vec, tool_call_parser: Option, + /// When set, only tool calls with this name are emitted (enforces tool_choice=named + /// when a tool_call_parser is active and the parser-aware MarkerBased path is used). + named_tool_name: Option, tool_definitions: Option>, emission_mode: EmissionMode, jail_mode: JailMode, @@ -1082,6 +1119,7 @@ impl JailedStreamBuilder { jail_start_sequences: Vec::new(), jail_end_sequences: Vec::new(), tool_call_parser: None, + named_tool_name: None, tool_definitions: None, emission_mode: EmissionMode::default(), jail_mode: JailMode::MarkerBased, @@ -1126,6 +1164,14 @@ impl JailedStreamBuilder { self } + /// Constrain parsed output to a single named tool (for tool_choice=named + parser path). + /// When set, tool calls emitted by the parser that don't match `tool_name` are silently + /// filtered out, enforcing the named-tool contract even when the model emits the wrong tool. + pub fn named_tool_filter(mut self, tool_name: impl Into) -> Self { + self.named_tool_name = Some(tool_name.into()); + self + } + /// Set the tool definitions for runtime validation and parsing pub fn tool_definitions( mut self, @@ -1245,6 +1291,7 @@ impl JailedStreamBuilder { jail_start_sequences: self.jail_start_sequences, jail_end_sequences: self.jail_end_sequences, tool_call_parser: self.tool_call_parser, + named_tool_name: self.named_tool_name, tool_definitions: self.tool_definitions, emission_mode: self.emission_mode, marker_matcher, diff --git a/lib/llm/tests/tool_choice.rs b/lib/llm/tests/tool_choice.rs index 6f069dd90bd..91d8dd02ac8 100644 --- a/lib/llm/tests/tool_choice.rs +++ b/lib/llm/tests/tool_choice.rs @@ -454,3 +454,147 @@ fn test_no_tool_choice_outputs_normal_text() { ); assert!(response.inner.choices[0].delta.tool_calls.is_none()); } + +// --------------------------------------------------------------------------- +// tool_choice=named + tool_call_parser enforcement (CodeRabbit PR #7589) +// --------------------------------------------------------------------------- + +/// Build a raw streaming response chunk with arbitrary text content. +fn make_text_chunk(text: &str, finish: bool) -> dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse { + use dynamo_async_openai::types::{ + ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta, Role, + }; + #[allow(deprecated)] + dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse { + id: "test-named-parser".to_string(), + choices: vec![ChatChoiceStream { + index: 0, + delta: ChatCompletionStreamResponseDelta { + role: Some(Role::Assistant), + content: Some(ChatCompletionMessageContent::Text(text.to_string())), + tool_calls: None, + function_call: None, + refusal: None, + reasoning_content: None, + }, + finish_reason: if finish { + Some(dynamo_async_openai::types::FinishReason::Stop) + } else { + None + }, + stop_reason: None, + logprobs: None, + }], + created: 1234567890, + model: "test-model".to_string(), + system_fingerprint: None, + object: "chat.completion.chunk".to_string(), + usage: None, + service_tier: None, + nvext: None, + } +} + +/// Apply jail with both a tool_call_parser and a named_tool_filter, returning all chunks. +async fn apply_jail_named_with_parser( + chunks: Vec, + parser: &str, + named_tool: &str, +) -> Vec { + use dynamo_llm::protocols::openai::chat_completions::jail::JailedStream; + use dynamo_runtime::protocols::annotated::Annotated; + use futures::StreamExt; + use futures::stream; + + let input = stream::iter(chunks.into_iter().map(|r| Annotated { + data: Some(r), + id: None, + event: None, + comment: None, + error: None, + })); + + let jail = JailedStream::builder() + .tool_call_parser(parser) + .named_tool_filter(named_tool) + .build(); + + let out = jail.apply_with_finish_reason(input); + tokio::pin!(out); + out.filter_map(|ann| async move { ann.data }).collect().await +} + +/// When tool_choice=named, a tool_call_parser is configured, and the model emits +/// the **correct** tool, the parsed tool call must pass through with the right name. +#[tokio::test] +async fn test_named_tool_with_parser_correct_tool_passes() { + // Hermes format: {"name":"get_weather","arguments":{...}}\n + let hermes_payload = + "\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}\n"; + + let chunks = vec![ + make_text_chunk(hermes_payload, false), + make_text_chunk("", true), // final empty chunk with finish_reason + ]; + + let responses = apply_jail_named_with_parser(chunks, "hermes", "get_weather").await; + + // Should have at least one response with tool calls + let tool_call_response = responses + .iter() + .find(|r| { + r.choices + .first() + .and_then(|c| c.delta.tool_calls.as_ref()) + .is_some() + }) + .expect("expected a response with tool calls for the correct named tool"); + + let tool_calls = tool_call_response.choices[0] + .delta + .tool_calls + .as_ref() + .unwrap(); + assert_eq!(tool_calls.len(), 1, "expected exactly one tool call"); + assert_eq!( + tool_calls[0].function.as_ref().unwrap().name.as_deref(), + Some("get_weather"), + "tool call name should be get_weather" + ); +} + +/// When tool_choice=named, a tool_call_parser is configured, and the model emits +/// the **wrong** tool, the parsed call must be filtered out (not emitted). +/// Regression test for CodeRabbit review on PR #7589. +#[tokio::test] +async fn test_named_tool_with_parser_wrong_tool_is_filtered() { + // Model emits "search" but we required "get_weather" + let hermes_wrong_tool = + "\n{\"name\": \"search\", \"arguments\": {\"query\": \"Paris weather\"}}\n"; + + let chunks = vec![ + make_text_chunk(hermes_wrong_tool, false), + make_text_chunk("", true), + ]; + + let responses = apply_jail_named_with_parser(chunks, "hermes", "get_weather").await; + + // No response should contain a tool call for the wrong tool + for r in &responses { + if let Some(choice) = r.choices.first() { + if let Some(tool_calls) = &choice.delta.tool_calls { + for tc in tool_calls { + let name = tc + .function + .as_ref() + .and_then(|f| f.name.as_deref()) + .unwrap_or(""); + assert_ne!( + name, "search", + "wrong tool 'search' should have been filtered by named_tool_filter" + ); + } + } + } + } +} From 07ab42aef73ef012f108acb72012924ce84f08d7 Mon Sep 17 00:00:00 2001 From: Asad Shahid Date: Tue, 24 Mar 2026 23:52:24 +0000 Subject: [PATCH 3/6] test: exercise apply_tool_calling_jail in regression test (#6821) Address CodeRabbit review: test now calls through the preprocessor decision path instead of constructing JailedStream directly, ensuring the test catches regressions in the tool_choice=required + parser logic. Signed-off-by: Asad Shahid --- lib/llm/tests/test_jail.rs | 20 +++++++++++--------- lib/llm/tests/tool_choice.rs | 27 +++++++++++++++++---------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs index a6169ffb0d6..12559cdbc83 100644 --- a/lib/llm/tests/test_jail.rs +++ b/lib/llm/tests/test_jail.rs @@ -1,9 +1,11 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +use dynamo_llm::preprocessor::OpenAIPreprocessor; use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse; use dynamo_llm::protocols::openai::chat_completions::jail::JailedStream; use dynamo_protocols::types::{ - ChatChoiceStream, ChatCompletionStreamResponseDelta, CompletionUsage, FinishReason, Role, + ChatChoiceStream, ChatCompletionStreamResponseDelta, ChatCompletionToolChoiceOption, + CompletionUsage, FinishReason, Role, }; use dynamo_runtime::protocols::annotated::Annotated; @@ -3101,20 +3103,20 @@ fahrenheit "#; - // Build jail the same way apply_tool_calling_jail would when a - // parser is configured together with tool_choice=required. - // After the fix, this uses marker-based mode (not Immediate). - let jail = JailedStream::builder() - .tool_call_parser("qwen3_coder".to_string()) - .build(); - let input_chunks = vec![test_utils::create_mock_response_chunk( xml_output.to_string(), 0, )]; let input_stream = stream::iter(input_chunks); - let results: Vec<_> = jail.apply_with_finish_reason(input_stream).collect().await; + let results: Vec<_> = OpenAIPreprocessor::apply_tool_calling_jail( + Some("qwen3_coder".to_string()), + Some(ChatCompletionToolChoiceOption::Required), + None, + input_stream, + ) + .collect() + .await; // Should have parsed a tool call, not leaked raw XML as content let tool_call_count: usize = results diff --git a/lib/llm/tests/tool_choice.rs b/lib/llm/tests/tool_choice.rs index 91d8dd02ac8..ea201ec731c 100644 --- a/lib/llm/tests/tool_choice.rs +++ b/lib/llm/tests/tool_choice.rs @@ -460,8 +460,11 @@ fn test_no_tool_choice_outputs_normal_text() { // --------------------------------------------------------------------------- /// Build a raw streaming response chunk with arbitrary text content. -fn make_text_chunk(text: &str, finish: bool) -> dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse { - use dynamo_async_openai::types::{ +fn make_text_chunk( + text: &str, + finish: bool, +) -> dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse { + use dynamo_protocols::types::{ ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta, Role, }; #[allow(deprecated)] @@ -478,7 +481,7 @@ fn make_text_chunk(text: &str, finish: bool) -> dynamo_llm::protocols::openai::c reasoning_content: None, }, finish_reason: if finish { - Some(dynamo_async_openai::types::FinishReason::Stop) + Some(dynamo_protocols::types::FinishReason::Stop) } else { None }, @@ -497,7 +500,9 @@ fn make_text_chunk(text: &str, finish: bool) -> dynamo_llm::protocols::openai::c /// Apply jail with both a tool_call_parser and a named_tool_filter, returning all chunks. async fn apply_jail_named_with_parser( - chunks: Vec, + chunks: Vec< + dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse, + >, parser: &str, named_tool: &str, ) -> Vec { @@ -518,10 +523,14 @@ async fn apply_jail_named_with_parser( .tool_call_parser(parser) .named_tool_filter(named_tool) .build(); - + out.filter_map(|ann| async move { ann.data }) + .collect() + .await let out = jail.apply_with_finish_reason(input); tokio::pin!(out); - out.filter_map(|ann| async move { ann.data }).collect().await + out.filter_map(|ann| async move { ann.data }) + .collect() + .await } /// When tool_choice=named, a tool_call_parser is configured, and the model emits @@ -529,8 +538,7 @@ async fn apply_jail_named_with_parser( #[tokio::test] async fn test_named_tool_with_parser_correct_tool_passes() { // Hermes format: {"name":"get_weather","arguments":{...}}\n - let hermes_payload = - "\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}\n"; + let hermes_payload = "\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}\n"; let chunks = vec![ make_text_chunk(hermes_payload, false), @@ -569,8 +577,7 @@ async fn test_named_tool_with_parser_correct_tool_passes() { #[tokio::test] async fn test_named_tool_with_parser_wrong_tool_is_filtered() { // Model emits "search" but we required "get_weather" - let hermes_wrong_tool = - "\n{\"name\": \"search\", \"arguments\": {\"query\": \"Paris weather\"}}\n"; + let hermes_wrong_tool = "\n{\"name\": \"search\", \"arguments\": {\"query\": \"Paris weather\"}}\n"; let chunks = vec![ make_text_chunk(hermes_wrong_tool, false), From 86967104ef38589f752a62f93e8232f16dde9509 Mon Sep 17 00:00:00 2001 From: Asad Shahid Date: Wed, 25 Mar 2026 07:57:52 +0000 Subject: [PATCH 4/6] fix(tests): remove spurious filter_map block before let binding in apply_jail_named_with_parser The CodeRabbit fix commit (92cc8778) accidentally duplicated the `out.filter_map(...).collect().await` expression before the `let out` binding, causing a syntax error (expected `;`, found keyword `let` at line 524). Remove the stray 3-line block so the function compiles. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Asad Shahid --- lib/llm/tests/test_jail.rs | 9 ++-- lib/llm/tests/tool_choice.rs | 86 ++++++++++++++++++------------------ 2 files changed, 49 insertions(+), 46 deletions(-) diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs index 12559cdbc83..a02025e0a0a 100644 --- a/lib/llm/tests/test_jail.rs +++ b/lib/llm/tests/test_jail.rs @@ -3123,9 +3123,12 @@ fahrenheit .iter() .map(|r| { r.data.as_ref().map_or(0, |d| { - d.choices + d.inner + .choices .iter() - .map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len())) + .map(|c: &ChatChoiceStream| { + c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()) + }) .sum::() }) }) @@ -3140,7 +3143,7 @@ fahrenheit // Verify the tool call was parsed correctly for r in &results { if let Some(data) = &r.data { - for choice in &data.choices { + for choice in &data.inner.choices { if let Some(tool_calls) = &choice.delta.tool_calls { for tc in tool_calls { assert_eq!( diff --git a/lib/llm/tests/tool_choice.rs b/lib/llm/tests/tool_choice.rs index ea201ec731c..80f9b91828c 100644 --- a/lib/llm/tests/tool_choice.rs +++ b/lib/llm/tests/tool_choice.rs @@ -469,31 +469,33 @@ fn make_text_chunk( }; #[allow(deprecated)] dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse { - id: "test-named-parser".to_string(), - choices: vec![ChatChoiceStream { - index: 0, - delta: ChatCompletionStreamResponseDelta { - role: Some(Role::Assistant), - content: Some(ChatCompletionMessageContent::Text(text.to_string())), - tool_calls: None, - function_call: None, - refusal: None, - reasoning_content: None, - }, - finish_reason: if finish { - Some(dynamo_protocols::types::FinishReason::Stop) - } else { - None - }, - stop_reason: None, - logprobs: None, - }], - created: 1234567890, - model: "test-model".to_string(), - system_fingerprint: None, - object: "chat.completion.chunk".to_string(), - usage: None, - service_tier: None, + inner: dynamo_protocols::types::CreateChatCompletionStreamResponse { + id: "test-named-parser".to_string(), + choices: vec![ChatChoiceStream { + index: 0, + delta: ChatCompletionStreamResponseDelta { + role: Some(Role::Assistant), + content: Some(ChatCompletionMessageContent::Text(text.to_string())), + tool_calls: None, + function_call: None, + refusal: None, + reasoning_content: None, + }, + finish_reason: if finish { + Some(dynamo_protocols::types::FinishReason::Stop) + } else { + None + }, + stop_reason: None, + logprobs: None, + }], + created: 1234567890, + model: "test-model".to_string(), + system_fingerprint: None, + object: "chat.completion.chunk".to_string(), + usage: None, + service_tier: None, + }, nvext: None, } } @@ -523,9 +525,6 @@ async fn apply_jail_named_with_parser( .tool_call_parser(parser) .named_tool_filter(named_tool) .build(); - out.filter_map(|ann| async move { ann.data }) - .collect() - .await let out = jail.apply_with_finish_reason(input); tokio::pin!(out); out.filter_map(|ann| async move { ann.data }) @@ -551,14 +550,15 @@ async fn test_named_tool_with_parser_correct_tool_passes() { let tool_call_response = responses .iter() .find(|r| { - r.choices + r.inner + .choices .first() .and_then(|c| c.delta.tool_calls.as_ref()) .is_some() }) .expect("expected a response with tool calls for the correct named tool"); - let tool_calls = tool_call_response.choices[0] + let tool_calls = tool_call_response.inner.choices[0] .delta .tool_calls .as_ref() @@ -588,19 +588,19 @@ async fn test_named_tool_with_parser_wrong_tool_is_filtered() { // No response should contain a tool call for the wrong tool for r in &responses { - if let Some(choice) = r.choices.first() { - if let Some(tool_calls) = &choice.delta.tool_calls { - for tc in tool_calls { - let name = tc - .function - .as_ref() - .and_then(|f| f.name.as_deref()) - .unwrap_or(""); - assert_ne!( - name, "search", - "wrong tool 'search' should have been filtered by named_tool_filter" - ); - } + if let Some(choice) = r.inner.choices.first() + && let Some(tool_calls) = &choice.delta.tool_calls + { + for tc in tool_calls { + let name = tc + .function + .as_ref() + .and_then(|f| f.name.as_deref()) + .unwrap_or(""); + assert_ne!( + name, "search", + "wrong tool 'search' should have been filtered by named_tool_filter" + ); } } } From b202435fc9c2cb60d27ed1cdb80ee6b0063b2107 Mon Sep 17 00:00:00 2001 From: Vasilis Vagias Date: Tue, 31 Mar 2026 16:18:08 -0500 Subject: [PATCH 5/6] fix: preserve Stop finish_reason for tool_choice=named through MarkerBased path When named_tool_name is set (tool_choice=named + parser), the finish_reason should stay as Stop per OpenAI spec, matching the behavior of the Immediate/SingleObject path. --- lib/llm/src/protocols/openai/chat_completions/jail.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/llm/src/protocols/openai/chat_completions/jail.rs b/lib/llm/src/protocols/openai/chat_completions/jail.rs index c6bddcac658..1402b9929e4 100644 --- a/lib/llm/src/protocols/openai/chat_completions/jail.rs +++ b/lib/llm/src/protocols/openai/chat_completions/jail.rs @@ -495,8 +495,9 @@ impl JailedStream { S: Stream> + Send + 'static, { let jail_mode = self.jail_mode.clone(); + let named_tool_active = self.named_tool_name.is_some(); let jailed_stream = self.apply(stream); - JailedStream::fix_finish_reason(jailed_stream, jail_mode) + JailedStream::fix_finish_reason(jailed_stream, jail_mode, named_tool_active) } /// Apply the jail transformation to a stream of chat completion responses @@ -1038,6 +1039,7 @@ impl JailedStream { fn fix_finish_reason( input_stream: S, jail_mode: JailMode, + named_tool_active: bool, ) -> impl Stream> + Send where S: Stream> + Send + 'static, @@ -1066,10 +1068,10 @@ impl JailedStream { match &jail_mode { JailMode::MarkerBased => { - // Traditional: if tool calls emitted, change to ToolCalls - if has_tool_calls { + if has_tool_calls && !named_tool_active { choice.finish_reason = Some(FinishReason::ToolCalls); } + // When named_tool_active, keep Stop (OpenAI spec for tool_choice=named) } JailMode::Immediate { format } => { // tool_choice mode: apply specific finish_reason logic From ba0a851d179d3047782001a4f6e4028118bf5083 Mon Sep 17 00:00:00 2001 From: Vasilis Vagias Date: Wed, 1 Apr 2026 11:43:37 -0500 Subject: [PATCH 6/6] fix(tests): Named() type mismatch and clippy manual_contains lint Signed-off-by: Vasilis Vagias --- lib/llm/tests/test_jail.rs | 103 +++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs index a02025e0a0a..45f925b5674 100644 --- a/lib/llm/tests/test_jail.rs +++ b/lib/llm/tests/test_jail.rs @@ -3166,4 +3166,107 @@ fahrenheit } } } + + /// Test for tool_choice=named with qwen3_coder parser and named_tool_filter. + /// + /// When tool_choice=named is used with a specific tool_name, the + /// preprocessor decision logic should apply the named_tool_filter to ensure + /// only the requested tool is parsed, even if the model emits other tools. + #[tokio::test] + async fn test_tool_choice_named_with_qwen3_coder_parser() { + // Simulate qwen3_coder XML output for a single tool call + let xml_output = r#" + + +San Francisco + + +fahrenheit + + +"#; + + let input_chunks = vec![ + test_utils::create_mock_response_chunk(xml_output.to_string(), 0), + test_utils::create_final_response_chunk(0), + ]; + + let input_stream = stream::iter(input_chunks); + + // Apply tool_choice=named for get_weather + let results: Vec<_> = OpenAIPreprocessor::apply_tool_calling_jail( + Some("qwen3_coder".to_string()), + Some(ChatCompletionToolChoiceOption::Named( + "get_weather".to_string().into(), + )), + None, + input_stream, + ) + .collect() + .await; + + // Should have parsed the named tool call + let tool_call_count: usize = results + .iter() + .map(|r| { + r.data.as_ref().map_or(0, |d| { + d.inner + .choices + .iter() + .map(|c: &ChatChoiceStream| { + c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()) + }) + .sum::() + }) + }) + .sum(); + + assert!( + tool_call_count >= 1, + "tool_choice=named with qwen3_coder should produce at least one tool call, got {}", + tool_call_count + ); + + // Verify the tool call was parsed correctly and matches the named tool + for r in &results { + if let Some(data) = &r.data { + for choice in &data.inner.choices { + if let Some(tool_calls) = &choice.delta.tool_calls { + for tc in tool_calls { + assert_eq!( + tc.function.as_ref().unwrap().name.as_deref(), + Some("get_weather"), + "Tool call name should match the named tool choice" + ); + } + } + // Content should be empty, not raw XML + if let Some(content) = &choice.delta.content { + let text = test_utils::extract_text(content); + assert!( + !text.contains(""), + "Raw XML should not leak as content, got: {}", + text + ); + } + } + } + } + + // Verify finish_reason is Stop (not ToolCalls) for named tool choice + let finish_reasons: Vec<_> = results + .iter() + .filter_map(|r| { + r.data + .as_ref() + .and_then(|d| d.inner.choices.first().and_then(|c| c.finish_reason)) + }) + .collect(); + + // For tool_choice=named, finish_reason should be Stop (OpenAI spec) + assert!( + finish_reasons.contains(&FinishReason::Stop), + "tool_choice=named should have Stop finish reason" + ); + } }