From c3e516397d9a39e3eb53a81456755f2e929810c7 Mon Sep 17 00:00:00 2001
From: Asad Shahid <asad.shahid@berkeley.edu>
Date: Mon, 23 Mar 2026 10:11:06 +0000
Subject: [PATCH 1/6] fix: tool_choice=required bypasses format-specific
 parsers (#6821)

When tool_choice is 'required' or 'named', the jail builder was
unconditionally set to Immediate JSON mode, which can only parse raw
JSON output. This meant format-specific parsers like qwen3_coder (XML),
pythonic, harmony, etc. were never invoked, causing raw markup to leak
as output_text instead of being parsed into structured function_call
items.

Root cause:
In apply_tool_calling_jail(), the match on tool_choice set
tool_choice_required()/tool_choice_named() on the builder WITHOUT also
setting the tool_call_parser. The Immediate jail mode only understands
JSON ({...} or [{...}]), so any model that emits non-JSON tool call
format (XML <tool_call>...</tool_call>, pythonic, DSML, etc.) was
silently broken under tool_choice=required.

Fix:
When a tool_call_parser is configured, always use marker-based jail
mode regardless of tool_choice value. This ensures the format-specific
parser is invoked to detect and extract tool calls. Immediate JSON
mode is preserved as a fallback for required/named when NO parser is
configured (the model is expected to emit raw JSON in that case).

Affected parsers: qwen3_coder, nemotron_nano, pythonic, harmony,
dsml, glm47, kimi_k2, and any future non-JSON parser.

Testing:
- Added regression test: tool_choice_required_with_qwen3_coder_parser
- Existing tool_choice tests for JSON-only (no parser) paths unchanged

Fixes: #6821
Signed-off-by: Asad Shahid <asad.shahid@berkeley.edu>
---
 lib/llm/src/preprocessor.rs | 25 ++++++++++--
 lib/llm/tests/test_jail.rs  | 81 +++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/lib/llm/src/preprocessor.rs b/lib/llm/src/preprocessor.rs
index 4413e472534..a4c2adb6686 100644
--- a/lib/llm/src/preprocessor.rs
+++ b/lib/llm/src/preprocessor.rs
@@ -1114,14 +1114,31 @@ impl OpenAIPreprocessor {
         }
 
         // Configure jail based on tool_choice
+        //
+        // When a tool_call_parser is configured, always use marker-based mode
+        // so that format-specific parsers (e.g. qwen3_coder XML) are invoked.
+        // Immediate JSON mode is only a fallback for required/named when no
+        // parser exists (the model is expected to emit raw JSON in that case).
         match tool_choice {
             Some(ChatCompletionToolChoiceOption::Named(named)) => {
-                // Immediate jail mode for named tool choice
-                builder = builder.tool_choice_named(named.function.name.clone());
+                if let Some(parser) = tool_call_parser {
+                    // Parser-aware path: use marker-based jail so the parser
+                    // handles format-specific output (XML, pythonic, etc.).
+                    builder = builder.tool_call_parser(parser);
+                } else {
+                    // No parser: fall back to Immediate JSON jail mode.
+                    builder = builder.tool_choice_named(named.function.name.clone());
+                }
             }
             Some(ChatCompletionToolChoiceOption::Required) => {
-                // Immediate jail mode for required tool choice
-                builder = builder.tool_choice_required();
+                if let Some(parser) = tool_call_parser {
+                    // Parser-aware path: use marker-based jail so the parser
+                    // handles format-specific output (XML, pythonic, etc.).
+                    builder = builder.tool_call_parser(parser);
+                } else {
+                    // No parser: fall back to Immediate JSON jail mode.
+                    builder = builder.tool_choice_required();
+                }
             }
             Some(ChatCompletionToolChoiceOption::Auto)
             | Some(ChatCompletionToolChoiceOption::None)
diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs
index e9dea8b36b7..a6169ffb0d6 100644
--- a/lib/llm/tests/test_jail.rs
+++ b/lib/llm/tests/test_jail.rs
@@ -3080,4 +3080,85 @@ mod parallel_jail_tests {
             "Should have no tool calls for empty array"
         );
     }
+
+    /// Regression test for #6821: tool_choice=required with qwen3_coder parser.
+    ///
+    /// When tool_choice=required AND a tool_call_parser (e.g. qwen3_coder) is
+    /// configured, the jail must use marker-based mode so the parser handles the
+    /// XML output.  Previously this fell through to Immediate JSON mode which
+    /// could not parse qwen3_coder XML, causing raw XML to leak as content.
+    #[tokio::test]
+    async fn test_tool_choice_required_with_qwen3_coder_parser() {
+        // Simulate qwen3_coder XML output for a single tool call
+        let xml_output = r#"<tool_call>
+<function=get_weather>
+<parameter=city>
+San Francisco
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>"#;
+
+        // Build jail the same way apply_tool_calling_jail would when a
+        // parser is configured together with tool_choice=required.
+        // After the fix, this uses marker-based mode (not Immediate).
+        let jail = JailedStream::builder()
+            .tool_call_parser("qwen3_coder".to_string())
+            .build();
+
+        let input_chunks = vec![test_utils::create_mock_response_chunk(
+            xml_output.to_string(),
+            0,
+        )];
+
+        let input_stream = stream::iter(input_chunks);
+        let results: Vec<_> = jail.apply_with_finish_reason(input_stream).collect().await;
+
+        // Should have parsed a tool call, not leaked raw XML as content
+        let tool_call_count: usize = results
+            .iter()
+            .map(|r| {
+                r.data.as_ref().map_or(0, |d| {
+                    d.choices
+                        .iter()
+                        .map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()))
+                        .sum::<usize>()
+                })
+            })
+            .sum();
+
+        assert!(
+            tool_call_count >= 1,
+            "tool_choice=required with qwen3_coder should produce at least one tool call, got {}",
+            tool_call_count
+        );
+
+        // Verify the tool call was parsed correctly
+        for r in &results {
+            if let Some(data) = &r.data {
+                for choice in &data.choices {
+                    if let Some(tool_calls) = &choice.delta.tool_calls {
+                        for tc in tool_calls {
+                            assert_eq!(
+                                tc.function.as_ref().unwrap().name.as_deref(),
+                                Some("get_weather"),
+                                "Tool call name should be 'get_weather'"
+                            );
+                        }
+                    }
+                    // Content should be empty, not raw XML
+                    if let Some(content) = &choice.delta.content {
+                        let text = test_utils::extract_text(content);
+                        assert!(
+                            !text.contains("<tool_call>"),
+                            "Raw XML should not leak as content, got: {}",
+                            text
+                        );
+                    }
+                }
+            }
+        }
+    }
 }

From acac75b24d018c6a5968392b3dbf5cbb12582555 Mon Sep 17 00:00:00 2001
From: Asad Shahid <asad.shahid@berkeley.edu>
Date: Tue, 24 Mar 2026 23:44:49 +0000
Subject: [PATCH 2/6] fix: enforce named tool filter when tool_call_parser is
 set (#7589)

When tool_choice=named and a tool_call_parser is configured, the previous
code set builder.tool_call_parser(parser) but never called
tool_choice_named(), so named-tool enforcement was silently dropped.

Fix:
- Add named_tool_name: Option<String> to JailedStream and JailedStreamBuilder
- Add JailedStreamBuilder::named_tool_filter(tool_name) method
- In preprocessor.rs Named+parser branch: call both .tool_call_parser(parser)
  and .named_tool_filter(named.function.name.clone())
- In create_tool_call_choice MarkerBased path: filter parsed ToolCalls
  against named_tool_name before emission; if all calls are for the wrong
  tool, return the accumulated content as a content choice instead

Acceptance criteria verified:
- tool_choice=named + parser uses marker-based parsing (unchanged)
- Only the named tool is accepted; other tool names are rejected/filtered
- tool_choice=auto and tool_choice=required paths are unchanged

Tests added:
- test_named_tool_with_parser_correct_tool_passes: hermes output for the
  correct tool passes through with the right name
- test_named_tool_with_parser_wrong_tool_is_filtered: hermes output for
  a different tool is filtered out before emission

Signed-off-by: Asad Shahid <asad.shahid@berkeley.edu>
---
 lib/llm/src/preprocessor.rs                   |   6 +-
 .../protocols/openai/chat_completions/jail.rs |  47 ++++++
 lib/llm/tests/tool_choice.rs                  | 144 ++++++++++++++++++
 3 files changed, 196 insertions(+), 1 deletion(-)

diff --git a/lib/llm/src/preprocessor.rs b/lib/llm/src/preprocessor.rs
index a4c2adb6686..83a487cc976 100644
--- a/lib/llm/src/preprocessor.rs
+++ b/lib/llm/src/preprocessor.rs
@@ -1124,7 +1124,11 @@ impl OpenAIPreprocessor {
                 if let Some(parser) = tool_call_parser {
                     // Parser-aware path: use marker-based jail so the parser
                     // handles format-specific output (XML, pythonic, etc.).
-                    builder = builder.tool_call_parser(parser);
+                    // Also install a named-tool filter so that if the model emits
+                    // the wrong tool, the parsed call is rejected before emission.
+                    builder = builder
+                        .tool_call_parser(parser)
+                        .named_tool_filter(named.function.name.clone());
                 } else {
                     // No parser: fall back to Immediate JSON jail mode.
                     builder = builder.tool_choice_named(named.function.name.clone());
diff --git a/lib/llm/src/protocols/openai/chat_completions/jail.rs b/lib/llm/src/protocols/openai/chat_completions/jail.rs
index e551ba2e2b6..c6bddcac658 100644
--- a/lib/llm/src/protocols/openai/chat_completions/jail.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/jail.rs
@@ -470,6 +470,9 @@ pub struct JailedStream {
     jail_start_sequences: Vec<String>,
     jail_end_sequences: Vec<String>,
     tool_call_parser: Option<String>,
+    /// When set, only tool calls with this name are emitted (enforces tool_choice=named
+    /// when a tool_call_parser is active and the parser-aware MarkerBased path is used).
+    named_tool_name: Option<String>,
     tool_definitions: Option<Vec<dynamo_parsers::tool_calling::ToolDefinition>>,
     emission_mode: EmissionMode,
     marker_matcher: MarkerMatcher,
@@ -856,6 +859,37 @@ impl JailedStream {
                 if let Ok((tool_calls, normal_text)) = parse_result
                     && !tool_calls.is_empty()
                 {
+                    // If a named tool filter is set (tool_choice=named + parser path), reject
+                    // tool calls that don't match the required tool name.
+                    let tool_calls = if let Some(ref required_name) = self.named_tool_name {
+                        let filtered: Vec<_> = tool_calls
+                            .into_iter()
+                            .filter(|tc| tc.function.name == *required_name)
+                            .collect();
+                        if filtered.is_empty() {
+                            tracing::warn!(
+                                required = %required_name,
+                                "tool_choice=named: parser emitted no matching tool calls; dropping jail output"
+                            );
+                        }
+                        filtered
+                    } else {
+                        tool_calls
+                    };
+
+                    if tool_calls.is_empty() {
+                        // All parsed calls were for the wrong tool — return content choice
+                        return create_choice_stream(
+                            choice_index,
+                            Some(Role::Assistant),
+                            accumulated_content,
+                            None,
+                            base_choice.finish_reason,
+                            base_choice.stop_reason.clone(),
+                            base_choice.logprobs.clone(),
+                        );
+                    }
+
                     // Convert to streaming format
                     let tool_call_chunks: Vec<ChatCompletionMessageToolCallChunk> = tool_calls
                         .into_iter()
@@ -1070,6 +1104,9 @@ pub struct JailedStreamBuilder {
     jail_start_sequences: Vec<String>,
     jail_end_sequences: Vec<String>,
     tool_call_parser: Option<String>,
+    /// When set, only tool calls with this name are emitted (enforces tool_choice=named
+    /// when a tool_call_parser is active and the parser-aware MarkerBased path is used).
+    named_tool_name: Option<String>,
     tool_definitions: Option<Vec<dynamo_parsers::tool_calling::ToolDefinition>>,
     emission_mode: EmissionMode,
     jail_mode: JailMode,
@@ -1082,6 +1119,7 @@ impl JailedStreamBuilder {
             jail_start_sequences: Vec::new(),
             jail_end_sequences: Vec::new(),
             tool_call_parser: None,
+            named_tool_name: None,
             tool_definitions: None,
             emission_mode: EmissionMode::default(),
             jail_mode: JailMode::MarkerBased,
@@ -1126,6 +1164,14 @@ impl JailedStreamBuilder {
         self
     }
 
+    /// Constrain parsed output to a single named tool (for tool_choice=named + parser path).
+    /// When set, tool calls emitted by the parser that don't match `tool_name` are silently
+    /// filtered out, enforcing the named-tool contract even when the model emits the wrong tool.
+    pub fn named_tool_filter(mut self, tool_name: impl Into<String>) -> Self {
+        self.named_tool_name = Some(tool_name.into());
+        self
+    }
+
     /// Set the tool definitions for runtime validation and parsing
     pub fn tool_definitions(
         mut self,
@@ -1245,6 +1291,7 @@ impl JailedStreamBuilder {
             jail_start_sequences: self.jail_start_sequences,
             jail_end_sequences: self.jail_end_sequences,
             tool_call_parser: self.tool_call_parser,
+            named_tool_name: self.named_tool_name,
             tool_definitions: self.tool_definitions,
             emission_mode: self.emission_mode,
             marker_matcher,
diff --git a/lib/llm/tests/tool_choice.rs b/lib/llm/tests/tool_choice.rs
index 6f069dd90bd..91d8dd02ac8 100644
--- a/lib/llm/tests/tool_choice.rs
+++ b/lib/llm/tests/tool_choice.rs
@@ -454,3 +454,147 @@ fn test_no_tool_choice_outputs_normal_text() {
     );
     assert!(response.inner.choices[0].delta.tool_calls.is_none());
 }
+
+// ---------------------------------------------------------------------------
+// tool_choice=named + tool_call_parser enforcement (CodeRabbit PR #7589)
+// ---------------------------------------------------------------------------
+
+/// Build a raw streaming response chunk with arbitrary text content.
+fn make_text_chunk(text: &str, finish: bool) -> dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse {
+    use dynamo_async_openai::types::{
+        ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta, Role,
+    };
+    #[allow(deprecated)]
+    dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse {
+        id: "test-named-parser".to_string(),
+        choices: vec![ChatChoiceStream {
+            index: 0,
+            delta: ChatCompletionStreamResponseDelta {
+                role: Some(Role::Assistant),
+                content: Some(ChatCompletionMessageContent::Text(text.to_string())),
+                tool_calls: None,
+                function_call: None,
+                refusal: None,
+                reasoning_content: None,
+            },
+            finish_reason: if finish {
+                Some(dynamo_async_openai::types::FinishReason::Stop)
+            } else {
+                None
+            },
+            stop_reason: None,
+            logprobs: None,
+        }],
+        created: 1234567890,
+        model: "test-model".to_string(),
+        system_fingerprint: None,
+        object: "chat.completion.chunk".to_string(),
+        usage: None,
+        service_tier: None,
+        nvext: None,
+    }
+}
+
+/// Apply jail with both a tool_call_parser and a named_tool_filter, returning all chunks.
+async fn apply_jail_named_with_parser(
+    chunks: Vec<dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse>,
+    parser: &str,
+    named_tool: &str,
+) -> Vec<dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse> {
+    use dynamo_llm::protocols::openai::chat_completions::jail::JailedStream;
+    use dynamo_runtime::protocols::annotated::Annotated;
+    use futures::StreamExt;
+    use futures::stream;
+
+    let input = stream::iter(chunks.into_iter().map(|r| Annotated {
+        data: Some(r),
+        id: None,
+        event: None,
+        comment: None,
+        error: None,
+    }));
+
+    let jail = JailedStream::builder()
+        .tool_call_parser(parser)
+        .named_tool_filter(named_tool)
+        .build();
+
+    let out = jail.apply_with_finish_reason(input);
+    tokio::pin!(out);
+    out.filter_map(|ann| async move { ann.data }).collect().await
+}
+
+/// When tool_choice=named, a tool_call_parser is configured, and the model emits
+/// the **correct** tool, the parsed tool call must pass through with the right name.
+#[tokio::test]
+async fn test_named_tool_with_parser_correct_tool_passes() {
+    // Hermes format: <tool_call>{"name":"get_weather","arguments":{...}}\n</tool_call>
+    let hermes_payload =
+        "<tool_call>\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}\n</tool_call>";
+
+    let chunks = vec![
+        make_text_chunk(hermes_payload, false),
+        make_text_chunk("", true), // final empty chunk with finish_reason
+    ];
+
+    let responses = apply_jail_named_with_parser(chunks, "hermes", "get_weather").await;
+
+    // Should have at least one response with tool calls
+    let tool_call_response = responses
+        .iter()
+        .find(|r| {
+            r.choices
+                .first()
+                .and_then(|c| c.delta.tool_calls.as_ref())
+                .is_some()
+        })
+        .expect("expected a response with tool calls for the correct named tool");
+
+    let tool_calls = tool_call_response.choices[0]
+        .delta
+        .tool_calls
+        .as_ref()
+        .unwrap();
+    assert_eq!(tool_calls.len(), 1, "expected exactly one tool call");
+    assert_eq!(
+        tool_calls[0].function.as_ref().unwrap().name.as_deref(),
+        Some("get_weather"),
+        "tool call name should be get_weather"
+    );
+}
+
+/// When tool_choice=named, a tool_call_parser is configured, and the model emits
+/// the **wrong** tool, the parsed call must be filtered out (not emitted).
+/// Regression test for CodeRabbit review on PR #7589.
+#[tokio::test]
+async fn test_named_tool_with_parser_wrong_tool_is_filtered() {
+    // Model emits "search" but we required "get_weather"
+    let hermes_wrong_tool =
+        "<tool_call>\n{\"name\": \"search\", \"arguments\": {\"query\": \"Paris weather\"}}\n</tool_call>";
+
+    let chunks = vec![
+        make_text_chunk(hermes_wrong_tool, false),
+        make_text_chunk("", true),
+    ];
+
+    let responses = apply_jail_named_with_parser(chunks, "hermes", "get_weather").await;
+
+    // No response should contain a tool call for the wrong tool
+    for r in &responses {
+        if let Some(choice) = r.choices.first() {
+            if let Some(tool_calls) = &choice.delta.tool_calls {
+                for tc in tool_calls {
+                    let name = tc
+                        .function
+                        .as_ref()
+                        .and_then(|f| f.name.as_deref())
+                        .unwrap_or("");
+                    assert_ne!(
+                        name, "search",
+                        "wrong tool 'search' should have been filtered by named_tool_filter"
+                    );
+                }
+            }
+        }
+    }
+}

From 07ab42aef73ef012f108acb72012924ce84f08d7 Mon Sep 17 00:00:00 2001
From: Asad Shahid <asad.shahid@berkeley.edu>
Date: Tue, 24 Mar 2026 23:52:24 +0000
Subject: [PATCH 3/6] test: exercise apply_tool_calling_jail in regression test
 (#6821)

Address CodeRabbit review: test now calls through the preprocessor
decision path instead of constructing JailedStream directly, ensuring
the test catches regressions in the tool_choice=required + parser logic.

Signed-off-by: Asad Shahid <asad.shahid@berkeley.edu>
---
 lib/llm/tests/test_jail.rs   | 20 +++++++++++---------
 lib/llm/tests/tool_choice.rs | 27 +++++++++++++++++----------
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs
index a6169ffb0d6..12559cdbc83 100644
--- a/lib/llm/tests/test_jail.rs
+++ b/lib/llm/tests/test_jail.rs
@@ -1,9 +1,11 @@
 // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
+use dynamo_llm::preprocessor::OpenAIPreprocessor;
 use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
 use dynamo_llm::protocols::openai::chat_completions::jail::JailedStream;
 use dynamo_protocols::types::{
-    ChatChoiceStream, ChatCompletionStreamResponseDelta, CompletionUsage, FinishReason, Role,
+    ChatChoiceStream, ChatCompletionStreamResponseDelta, ChatCompletionToolChoiceOption,
+    CompletionUsage, FinishReason, Role,
 };
 use dynamo_runtime::protocols::annotated::Annotated;
 
@@ -3101,20 +3103,20 @@ fahrenheit
 </function>
 </tool_call>"#;
 
-        // Build jail the same way apply_tool_calling_jail would when a
-        // parser is configured together with tool_choice=required.
-        // After the fix, this uses marker-based mode (not Immediate).
-        let jail = JailedStream::builder()
-            .tool_call_parser("qwen3_coder".to_string())
-            .build();
-
         let input_chunks = vec![test_utils::create_mock_response_chunk(
             xml_output.to_string(),
             0,
         )];
 
         let input_stream = stream::iter(input_chunks);
-        let results: Vec<_> = jail.apply_with_finish_reason(input_stream).collect().await;
+        let results: Vec<_> = OpenAIPreprocessor::apply_tool_calling_jail(
+            Some("qwen3_coder".to_string()),
+            Some(ChatCompletionToolChoiceOption::Required),
+            None,
+            input_stream,
+        )
+        .collect()
+        .await;
 
         // Should have parsed a tool call, not leaked raw XML as content
         let tool_call_count: usize = results
diff --git a/lib/llm/tests/tool_choice.rs b/lib/llm/tests/tool_choice.rs
index 91d8dd02ac8..ea201ec731c 100644
--- a/lib/llm/tests/tool_choice.rs
+++ b/lib/llm/tests/tool_choice.rs
@@ -460,8 +460,11 @@ fn test_no_tool_choice_outputs_normal_text() {
 // ---------------------------------------------------------------------------
 
 /// Build a raw streaming response chunk with arbitrary text content.
-fn make_text_chunk(text: &str, finish: bool) -> dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse {
-    use dynamo_async_openai::types::{
+fn make_text_chunk(
+    text: &str,
+    finish: bool,
+) -> dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse {
+    use dynamo_protocols::types::{
         ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta, Role,
     };
     #[allow(deprecated)]
@@ -478,7 +481,7 @@ fn make_text_chunk(text: &str, finish: bool) -> dynamo_llm::protocols::openai::c
                 reasoning_content: None,
             },
             finish_reason: if finish {
-                Some(dynamo_async_openai::types::FinishReason::Stop)
+                Some(dynamo_protocols::types::FinishReason::Stop)
             } else {
                 None
             },
@@ -497,7 +500,9 @@ fn make_text_chunk(text: &str, finish: bool) -> dynamo_llm::protocols::openai::c
 
 /// Apply jail with both a tool_call_parser and a named_tool_filter, returning all chunks.
 async fn apply_jail_named_with_parser(
-    chunks: Vec<dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse>,
+    chunks: Vec<
+        dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse,
+    >,
     parser: &str,
     named_tool: &str,
 ) -> Vec<dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse> {
@@ -518,10 +523,14 @@ async fn apply_jail_named_with_parser(
         .tool_call_parser(parser)
         .named_tool_filter(named_tool)
         .build();
-
+    out.filter_map(|ann| async move { ann.data })
+        .collect()
+        .await
     let out = jail.apply_with_finish_reason(input);
     tokio::pin!(out);
-    out.filter_map(|ann| async move { ann.data }).collect().await
+    out.filter_map(|ann| async move { ann.data })
+        .collect()
+        .await
 }
 
 /// When tool_choice=named, a tool_call_parser is configured, and the model emits
@@ -529,8 +538,7 @@ async fn apply_jail_named_with_parser(
 #[tokio::test]
 async fn test_named_tool_with_parser_correct_tool_passes() {
     // Hermes format: <tool_call>{"name":"get_weather","arguments":{...}}\n</tool_call>
-    let hermes_payload =
-        "<tool_call>\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}\n</tool_call>";
+    let hermes_payload = "<tool_call>\n{\"name\": \"get_weather\", \"arguments\": {\"location\": \"Paris\"}}\n</tool_call>";
 
     let chunks = vec![
         make_text_chunk(hermes_payload, false),
@@ -569,8 +577,7 @@ async fn test_named_tool_with_parser_correct_tool_passes() {
 #[tokio::test]
 async fn test_named_tool_with_parser_wrong_tool_is_filtered() {
     // Model emits "search" but we required "get_weather"
-    let hermes_wrong_tool =
-        "<tool_call>\n{\"name\": \"search\", \"arguments\": {\"query\": \"Paris weather\"}}\n</tool_call>";
+    let hermes_wrong_tool = "<tool_call>\n{\"name\": \"search\", \"arguments\": {\"query\": \"Paris weather\"}}\n</tool_call>";
 
     let chunks = vec![
         make_text_chunk(hermes_wrong_tool, false),

From 86967104ef38589f752a62f93e8232f16dde9509 Mon Sep 17 00:00:00 2001
From: Asad Shahid <asad.shahid@berkeley.edu>
Date: Wed, 25 Mar 2026 07:57:52 +0000
Subject: [PATCH 4/6] fix(tests): remove spurious filter_map block before let
 binding in apply_jail_named_with_parser

The CodeRabbit fix commit (92cc8778) accidentally duplicated the
`out.filter_map(...).collect().await` expression before the `let out`
binding, causing a syntax error (expected `;`, found keyword `let` at
line 524). Remove the stray 3-line block so the function compiles.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Signed-off-by: Asad Shahid <asad.shahid@berkeley.edu>
---
 lib/llm/tests/test_jail.rs   |  9 ++--
 lib/llm/tests/tool_choice.rs | 86 ++++++++++++++++++------------------
 2 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs
index 12559cdbc83..a02025e0a0a 100644
--- a/lib/llm/tests/test_jail.rs
+++ b/lib/llm/tests/test_jail.rs
@@ -3123,9 +3123,12 @@ fahrenheit
             .iter()
             .map(|r| {
                 r.data.as_ref().map_or(0, |d| {
-                    d.choices
+                    d.inner
+                        .choices
                         .iter()
-                        .map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()))
+                        .map(|c: &ChatChoiceStream| {
+                            c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len())
+                        })
                         .sum::<usize>()
                 })
             })
@@ -3140,7 +3143,7 @@ fahrenheit
         // Verify the tool call was parsed correctly
         for r in &results {
             if let Some(data) = &r.data {
-                for choice in &data.choices {
+                for choice in &data.inner.choices {
                     if let Some(tool_calls) = &choice.delta.tool_calls {
                         for tc in tool_calls {
                             assert_eq!(
diff --git a/lib/llm/tests/tool_choice.rs b/lib/llm/tests/tool_choice.rs
index ea201ec731c..80f9b91828c 100644
--- a/lib/llm/tests/tool_choice.rs
+++ b/lib/llm/tests/tool_choice.rs
@@ -469,31 +469,33 @@ fn make_text_chunk(
     };
     #[allow(deprecated)]
     dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse {
-        id: "test-named-parser".to_string(),
-        choices: vec![ChatChoiceStream {
-            index: 0,
-            delta: ChatCompletionStreamResponseDelta {
-                role: Some(Role::Assistant),
-                content: Some(ChatCompletionMessageContent::Text(text.to_string())),
-                tool_calls: None,
-                function_call: None,
-                refusal: None,
-                reasoning_content: None,
-            },
-            finish_reason: if finish {
-                Some(dynamo_protocols::types::FinishReason::Stop)
-            } else {
-                None
-            },
-            stop_reason: None,
-            logprobs: None,
-        }],
-        created: 1234567890,
-        model: "test-model".to_string(),
-        system_fingerprint: None,
-        object: "chat.completion.chunk".to_string(),
-        usage: None,
-        service_tier: None,
+        inner: dynamo_protocols::types::CreateChatCompletionStreamResponse {
+            id: "test-named-parser".to_string(),
+            choices: vec![ChatChoiceStream {
+                index: 0,
+                delta: ChatCompletionStreamResponseDelta {
+                    role: Some(Role::Assistant),
+                    content: Some(ChatCompletionMessageContent::Text(text.to_string())),
+                    tool_calls: None,
+                    function_call: None,
+                    refusal: None,
+                    reasoning_content: None,
+                },
+                finish_reason: if finish {
+                    Some(dynamo_protocols::types::FinishReason::Stop)
+                } else {
+                    None
+                },
+                stop_reason: None,
+                logprobs: None,
+            }],
+            created: 1234567890,
+            model: "test-model".to_string(),
+            system_fingerprint: None,
+            object: "chat.completion.chunk".to_string(),
+            usage: None,
+            service_tier: None,
+        },
         nvext: None,
     }
 }
@@ -523,9 +525,6 @@ async fn apply_jail_named_with_parser(
         .tool_call_parser(parser)
         .named_tool_filter(named_tool)
         .build();
-    out.filter_map(|ann| async move { ann.data })
-        .collect()
-        .await
     let out = jail.apply_with_finish_reason(input);
     tokio::pin!(out);
     out.filter_map(|ann| async move { ann.data })
@@ -551,14 +550,15 @@ async fn test_named_tool_with_parser_correct_tool_passes() {
     let tool_call_response = responses
         .iter()
         .find(|r| {
-            r.choices
+            r.inner
+                .choices
                 .first()
                 .and_then(|c| c.delta.tool_calls.as_ref())
                 .is_some()
         })
         .expect("expected a response with tool calls for the correct named tool");
 
-    let tool_calls = tool_call_response.choices[0]
+    let tool_calls = tool_call_response.inner.choices[0]
         .delta
         .tool_calls
         .as_ref()
@@ -588,19 +588,19 @@ async fn test_named_tool_with_parser_wrong_tool_is_filtered() {
 
     // No response should contain a tool call for the wrong tool
     for r in &responses {
-        if let Some(choice) = r.choices.first() {
-            if let Some(tool_calls) = &choice.delta.tool_calls {
-                for tc in tool_calls {
-                    let name = tc
-                        .function
-                        .as_ref()
-                        .and_then(|f| f.name.as_deref())
-                        .unwrap_or("");
-                    assert_ne!(
-                        name, "search",
-                        "wrong tool 'search' should have been filtered by named_tool_filter"
-                    );
-                }
+        if let Some(choice) = r.inner.choices.first()
+            && let Some(tool_calls) = &choice.delta.tool_calls
+        {
+            for tc in tool_calls {
+                let name = tc
+                    .function
+                    .as_ref()
+                    .and_then(|f| f.name.as_deref())
+                    .unwrap_or("");
+                assert_ne!(
+                    name, "search",
+                    "wrong tool 'search' should have been filtered by named_tool_filter"
+                );
             }
         }
     }

From b202435fc9c2cb60d27ed1cdb80ee6b0063b2107 Mon Sep 17 00:00:00 2001
From: Vasilis Vagias <vvagias@nvidia.com>
Date: Tue, 31 Mar 2026 16:18:08 -0500
Subject: [PATCH 5/6] fix: preserve Stop finish_reason for tool_choice=named
 through MarkerBased path

When named_tool_name is set (tool_choice=named + parser), the
finish_reason should stay as Stop per OpenAI spec, matching the
behavior of the Immediate/SingleObject path.
---
 lib/llm/src/protocols/openai/chat_completions/jail.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/llm/src/protocols/openai/chat_completions/jail.rs b/lib/llm/src/protocols/openai/chat_completions/jail.rs
index c6bddcac658..1402b9929e4 100644
--- a/lib/llm/src/protocols/openai/chat_completions/jail.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/jail.rs
@@ -495,8 +495,9 @@ impl JailedStream {
         S: Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>> + Send + 'static,
     {
         let jail_mode = self.jail_mode.clone();
+        let named_tool_active = self.named_tool_name.is_some();
         let jailed_stream = self.apply(stream);
-        JailedStream::fix_finish_reason(jailed_stream, jail_mode)
+        JailedStream::fix_finish_reason(jailed_stream, jail_mode, named_tool_active)
     }
 
     /// Apply the jail transformation to a stream of chat completion responses
@@ -1038,6 +1039,7 @@ impl JailedStream {
     fn fix_finish_reason<S>(
         input_stream: S,
         jail_mode: JailMode,
+        named_tool_active: bool,
     ) -> impl Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>> + Send
     where
         S: Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>> + Send + 'static,
@@ -1066,10 +1068,10 @@ impl JailedStream {
 
                                 match &jail_mode {
                                     JailMode::MarkerBased => {
-                                        // Traditional: if tool calls emitted, change to ToolCalls
-                                        if has_tool_calls {
+                                        if has_tool_calls && !named_tool_active {
                                             choice.finish_reason = Some(FinishReason::ToolCalls);
                                         }
+                                        // When named_tool_active, keep Stop (OpenAI spec for tool_choice=named)
                                     }
                                     JailMode::Immediate { format } => {
                                         // tool_choice mode: apply specific finish_reason logic

From ba0a851d179d3047782001a4f6e4028118bf5083 Mon Sep 17 00:00:00 2001
From: Vasilis Vagias <vvagias@nvidia.com>
Date: Wed, 1 Apr 2026 11:43:37 -0500
Subject: [PATCH 6/6] fix(tests): Named() type mismatch and clippy
 manual_contains lint

Signed-off-by: Vasilis Vagias <vvagias@nvidia.com>
---
 lib/llm/tests/test_jail.rs | 103 +++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs
index a02025e0a0a..45f925b5674 100644
--- a/lib/llm/tests/test_jail.rs
+++ b/lib/llm/tests/test_jail.rs
@@ -3166,4 +3166,107 @@ fahrenheit
             }
         }
     }
+
+    /// Test for tool_choice=named with qwen3_coder parser and named_tool_filter.
+    ///
+    /// When tool_choice=named is used with a specific tool_name, the
+    /// preprocessor decision logic should apply the named_tool_filter to ensure
+    /// only the requested tool is parsed, even if the model emits other tools.
+    #[tokio::test]
+    async fn test_tool_choice_named_with_qwen3_coder_parser() {
+        // Simulate qwen3_coder XML output for a single tool call
+        let xml_output = r#"<tool_call>
+<function=get_weather>
+<parameter=city>
+San Francisco
+</parameter>
+<parameter=unit>
+fahrenheit
+</parameter>
+</function>
+</tool_call>"#;
+
+        let input_chunks = vec![
+            test_utils::create_mock_response_chunk(xml_output.to_string(), 0),
+            test_utils::create_final_response_chunk(0),
+        ];
+
+        let input_stream = stream::iter(input_chunks);
+
+        // Apply tool_choice=named for get_weather
+        let results: Vec<_> = OpenAIPreprocessor::apply_tool_calling_jail(
+            Some("qwen3_coder".to_string()),
+            Some(ChatCompletionToolChoiceOption::Named(
+                "get_weather".to_string().into(),
+            )),
+            None,
+            input_stream,
+        )
+        .collect()
+        .await;
+
+        // Should have parsed the named tool call
+        let tool_call_count: usize = results
+            .iter()
+            .map(|r| {
+                r.data.as_ref().map_or(0, |d| {
+                    d.inner
+                        .choices
+                        .iter()
+                        .map(|c: &ChatChoiceStream| {
+                            c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len())
+                        })
+                        .sum::<usize>()
+                })
+            })
+            .sum();
+
+        assert!(
+            tool_call_count >= 1,
+            "tool_choice=named with qwen3_coder should produce at least one tool call, got {}",
+            tool_call_count
+        );
+
+        // Verify the tool call was parsed correctly and matches the named tool
+        for r in &results {
+            if let Some(data) = &r.data {
+                for choice in &data.inner.choices {
+                    if let Some(tool_calls) = &choice.delta.tool_calls {
+                        for tc in tool_calls {
+                            assert_eq!(
+                                tc.function.as_ref().unwrap().name.as_deref(),
+                                Some("get_weather"),
+                                "Tool call name should match the named tool choice"
+                            );
+                        }
+                    }
+                    // Content should be empty, not raw XML
+                    if let Some(content) = &choice.delta.content {
+                        let text = test_utils::extract_text(content);
+                        assert!(
+                            !text.contains("<tool_call>"),
+                            "Raw XML should not leak as content, got: {}",
+                            text
+                        );
+                    }
+                }
+            }
+        }
+
+        // Verify finish_reason is Stop (not ToolCalls) for named tool choice
+        let finish_reasons: Vec<_> = results
+            .iter()
+            .filter_map(|r| {
+                r.data
+                    .as_ref()
+                    .and_then(|d| d.inner.choices.first().and_then(|c| c.finish_reason))
+            })
+            .collect();
+
+        // For tool_choice=named, finish_reason should be Stop (OpenAI spec)
+        assert!(
+            finish_reasons.contains(&FinishReason::Stop),
+            "tool_choice=named should have Stop finish reason"
+        );
+    }
 }