From 68fc8b704ac3cb152e3a31676da3dc5d095bc5ce Mon Sep 17 00:00:00 2001
From: Keegan Mullaney <keeganmullaney@gmail.com>
Date: Sat, 4 Apr 2026 15:33:55 -0700
Subject: [PATCH 1/2] feat: add Gemma 4 tool call parser

Gemma 4 uses a unique tool calling format with special tokens for
delimiters instead of JSON quotes. This adds a state-machine parser
that converts the custom argument format to valid JSON.

- New gemma4_tool_parser.py with streaming support
- Auto-detection in auto_tool_parser.py (both non-streaming and streaming)
- 21 tests covering arg conversion, nested objects, arrays, hyphenated
  names, streaming, think tags, and auto-detection
- CLI --tool-call-parser gemma4 option

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_gemma4_parser.py                 | 237 ++++++++++++++++++++
 vllm_mlx/cli.py                             |   3 +-
 vllm_mlx/tool_parsers/__init__.py           |   3 +
 vllm_mlx/tool_parsers/auto_tool_parser.py   |  64 ++++--
 vllm_mlx/tool_parsers/gemma4_tool_parser.py | 219 ++++++++++++++++++
 5 files changed, 512 insertions(+), 14 deletions(-)
 create mode 100644 tests/test_gemma4_parser.py
 create mode 100644 vllm_mlx/tool_parsers/gemma4_tool_parser.py

diff --git a/tests/test_gemma4_parser.py b/tests/test_gemma4_parser.py
new file mode 100644
index 000000000..acf499be2
--- /dev/null
+++ b/tests/test_gemma4_parser.py
@@ -0,0 +1,237 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for the Gemma 4 tool call parser."""
+
+import json
+
+from vllm_mlx.tool_parsers.gemma4_tool_parser import (
+    Gemma4ToolParser,
+    _gemma_args_to_json,
+)
+
+# The Gemma 4 escape token for quotes
+Q = '<|"|>'
+
+
+def test_args_to_json_simple_strings():
+    raw = f"location:{Q}San Francisco{Q},unit:{Q}celsius{Q}"
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"location": "San Francisco", "unit": "celsius"}
+
+
+def test_args_to_json_numbers():
+    raw = f"temperature:15,humidity:0.75"
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"temperature": 15, "humidity": 0.75}
+
+
+def test_args_to_json_booleans():
+    raw = f"enabled:true,verbose:false"
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"enabled": True, "verbose": False}
+
+
+def test_args_to_json_mixed():
+    raw = f"name:{Q}test{Q},count:42,active:true"
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"name": "test", "count": 42, "active": True}
+
+
+def test_args_to_json_nested_object():
+    raw = f"query:{Q}weather{Q},options:{{format:{Q}json{Q},verbose:true}}"
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"query": "weather", "options": {"format": "json", "verbose": True}}
+
+
+def test_args_to_json_array():
+    raw = f"tags:[{Q}a{Q},{Q}b{Q},{Q}c{Q}]"
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"tags": ["a", "b", "c"]}
+
+
+def test_args_to_json_string_with_special_chars():
+    raw = f'message:{Q}Hello, how are you? I\'m fine!{Q}'
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"message": "Hello, how are you? I'm fine!"}
+
+
+def test_args_to_json_string_with_embedded_quotes():
+    raw = f'message:{Q}She said "hello"{Q}'
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"message": 'She said "hello"'}
+
+
+def test_args_to_json_null():
+    raw = f"name:{Q}test{Q},value:null"
+    result = "{" + _gemma_args_to_json(raw) + "}"
+    parsed = json.loads(result)
+    assert parsed == {"name": "test", "value": None}
+
+
+def test_extract_single_tool_call():
+    parser = Gemma4ToolParser()
+    output = f"<|tool_call>call:get_weather{{location:{Q}San Francisco{Q},unit:{Q}celsius{Q}}}<tool_call|>"
+    result = parser.extract_tool_calls(output)
+
+    assert result.tools_called is True
+    assert len(result.tool_calls) == 1
+    tc = result.tool_calls[0]
+    assert tc["name"] == "get_weather"
+    args = json.loads(tc["arguments"])
+    assert args == {"location": "San Francisco", "unit": "celsius"}
+    assert tc["id"].startswith("call_")
+
+
+def test_extract_multiple_tool_calls():
+    parser = Gemma4ToolParser()
+    output = (
+        f"<|tool_call>call:get_weather{{location:{Q}SF{Q}}}<tool_call|>"
+        f"<|tool_call>call:get_time{{timezone:{Q}PST{Q}}}<tool_call|>"
+    )
+    result = parser.extract_tool_calls(output)
+
+    assert result.tools_called is True
+    assert len(result.tool_calls) == 2
+    assert result.tool_calls[0]["name"] == "get_weather"
+    assert result.tool_calls[1]["name"] == "get_time"
+
+
+def test_extract_tool_call_with_surrounding_text():
+    parser = Gemma4ToolParser()
+    output = f"Let me check the weather. <|tool_call>call:get_weather{{location:{Q}SF{Q}}}<tool_call|> Done."
+    result = parser.extract_tool_calls(output)
+
+    assert result.tools_called is True
+    assert len(result.tool_calls) == 1
+    assert result.content == "Let me check the weather.  Done."
+
+
+def test_no_tool_calls():
+    parser = Gemma4ToolParser()
+    output = "Just a regular response with no tool calls."
+    result = parser.extract_tool_calls(output)
+
+    assert result.tools_called is False
+    assert len(result.tool_calls) == 0
+    assert result.content == output
+
+
+def test_extract_with_think_tags():
+    parser = Gemma4ToolParser()
+    output = f"<think>I should check the weather</think><|tool_call>call:get_weather{{location:{Q}SF{Q}}}<tool_call|>"
+    result = parser.extract_tool_calls(output)
+
+    assert result.tools_called is True
+    assert len(result.tool_calls) == 1
+    assert result.tool_calls[0]["name"] == "get_weather"
+
+
+def test_extract_numeric_args():
+    parser = Gemma4ToolParser()
+    output = f"<|tool_call>call:set_temp{{value:72,unit:{Q}fahrenheit{Q}}}<tool_call|>"
+    result = parser.extract_tool_calls(output)
+
+    assert result.tools_called is True
+    args = json.loads(result.tool_calls[0]["arguments"])
+    assert args == {"value": 72, "unit": "fahrenheit"}
+
+
+def test_streaming_no_tool_call():
+    parser = Gemma4ToolParser()
+    result = parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text="Hello",
+        delta_text="Hello",
+    )
+    assert result == {"content": "Hello"}
+
+
+def test_streaming_tool_call_buffering():
+    parser = Gemma4ToolParser()
+    # Mid tool call — should buffer
+    result = parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text=f"<|tool_call>call:get_weather{{location:{Q}SF",
+        delta_text=f"{Q}SF",
+    )
+    assert result is None  # buffering
+
+
+def test_streaming_tool_call_complete():
+    parser = Gemma4ToolParser()
+    full = f"<|tool_call>call:get_weather{{location:{Q}SF{Q}}}<tool_call|>"
+    result = parser.extract_tool_calls_streaming(
+        previous_text=f"<|tool_call>call:get_weather{{location:{Q}SF{Q}}}",
+        current_text=full,
+        delta_text="<tool_call|>",
+    )
+    assert result is not None
+    assert "tool_calls" in result
+    assert len(result["tool_calls"]) == 1
+    assert result["tool_calls"][0]["function"]["name"] == "get_weather"
+
+
+def test_extract_hyphenated_tool_name():
+    parser = Gemma4ToolParser()
+    output = f"<|tool_call>call:web-search{{query:{Q}hello{Q}}}<tool_call|>"
+    result = parser.extract_tool_calls(output)
+
+    assert result.tools_called is True
+    assert result.tool_calls[0]["name"] == "web-search"
+    args = json.loads(result.tool_calls[0]["arguments"])
+    assert args == {"query": "hello"}
+
+
+def test_auto_parser_detects_gemma4():
+    from vllm_mlx.tool_parsers.auto_tool_parser import AutoToolParser
+
+    parser = AutoToolParser()
+    output = f"<|tool_call>call:search{{query:{Q}hello world{Q}}}<tool_call|>"
+    result = parser.extract_tool_calls(output)
+
+    assert result.tools_called is True
+    assert result.tool_calls[0]["name"] == "search"
+
+
+def test_auto_parser_streaming_gemma4():
+    from vllm_mlx.tool_parsers.auto_tool_parser import AutoToolParser
+
+    parser = AutoToolParser()
+    full = f"<|tool_call>call:get_weather{{location:{Q}SF{Q}}}<tool_call|>"
+
+    # No marker yet — pass through
+    r1 = parser.extract_tool_calls_streaming("", "Hello", "Hello")
+    assert r1 == {"content": "Hello"}
+
+    # Mid tool call — buffer
+    r2 = parser.extract_tool_calls_streaming(
+        "", f"<|tool_call>call:get_weather{{location:{Q}SF", f"{Q}SF"
+    )
+    assert r2 is None
+
+    # End marker arrives — parse
+    r3 = parser.extract_tool_calls_streaming(
+        f"<|tool_call>call:get_weather{{location:{Q}SF{Q}}}",
+        full,
+        "<tool_call|>",
+    )
+    assert r3 is not None
+    assert "tool_calls" in r3
+    assert r3["tool_calls"][0]["function"]["name"] == "get_weather"
+
+
+if __name__ == "__main__":
+    tests = [v for k, v in globals().items() if k.startswith("test_")]
+    for test in tests:
+        try:
+            test()
+            print(f"  PASS: {test.__name__}")
+        except Exception as e:
+            print(f"  FAIL: {test.__name__}: {e}")
diff --git a/vllm_mlx/cli.py b/vllm_mlx/cli.py
index 8a90bc9be..ca45f10f5 100644
--- a/vllm_mlx/cli.py
+++ b/vllm_mlx/cli.py
@@ -838,12 +838,13 @@ def main():
             "nemotron",
             "xlam",
             "functionary",
+            "gemma4",
             "glm47",
         ],
         help=(
             "Select the tool call parser for the model. Options: "
             "auto (auto-detect), mistral, qwen, qwen3_coder, llama, hermes, "
-            "deepseek, kimi, granite, nemotron, xlam, functionary, glm47. "
+            "deepseek, gemma4, kimi, granite, nemotron, xlam, functionary, glm47. "
             "Required for --enable-auto-tool-choice."
         ),
     )
diff --git a/vllm_mlx/tool_parsers/__init__.py b/vllm_mlx/tool_parsers/__init__.py
index 16f744080..b0232d30c 100644
--- a/vllm_mlx/tool_parsers/__init__.py
+++ b/vllm_mlx/tool_parsers/__init__.py
@@ -10,6 +10,7 @@
 - mistral: Mistral models ([TOOL_CALLS] format)
 - qwen/qwen3: Qwen models (<tool_call> and [Calling tool:] formats)
 - llama/llama3/llama4: Llama models (<function=name> format)
+- gemma4/gemma_4: Google Gemma 4 models (<|tool_call>call:name{} format)
 - hermes/nous: Hermes/NousResearch models
 - deepseek/deepseek_v3/deepseek_r1: DeepSeek models (unicode tokens)
 - kimi/kimi_k2/moonshot: Kimi/Moonshot models
@@ -48,6 +49,7 @@
 from .deepseek_tool_parser import DeepSeekToolParser
 from .functionary_tool_parser import FunctionaryToolParser
 from .granite_tool_parser import GraniteToolParser
+from .gemma4_tool_parser import Gemma4ToolParser
 from .hermes_tool_parser import HermesToolParser
 from .kimi_tool_parser import KimiToolParser
 from .llama_tool_parser import LlamaToolParser
@@ -65,6 +67,7 @@
     "ExtractedToolCallInformation",
     # Specific parsers
     "AutoToolParser",
+    "Gemma4ToolParser",
     "MistralToolParser",
     "QwenToolParser",
     "LlamaToolParser",
diff --git a/vllm_mlx/tool_parsers/auto_tool_parser.py b/vllm_mlx/tool_parsers/auto_tool_parser.py
index fc02d8fc6..dd1512326 100644
--- a/vllm_mlx/tool_parsers/auto_tool_parser.py
+++ b/vllm_mlx/tool_parsers/auto_tool_parser.py
@@ -16,6 +16,8 @@
     ToolParser,
     ToolParserManager,
 )
+from .gemma4_tool_parser import TOOL_CALL_PATTERN as GEMMA4_PATTERN
+from .gemma4_tool_parser import _gemma_args_to_json
 
 
 def generate_tool_id() -> str:
@@ -29,12 +31,13 @@ class AutoToolParser(ToolParser):
     Auto-detecting tool call parser.
 
     Tries multiple formats in order:
-    1. Mistral: [TOOL_CALLS] ...
-    2. Qwen bracket: [Calling tool: func_name({...})]
-    3. Qwen/Hermes XML: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
-    4. Llama: <function=name>{"arg": "value"}</function>
-    5. Nemotron: <tool_call><function=name>...</function></tool_call>
-    6. Raw JSON: {"name": "...", "arguments": {...}}
+    1. Gemma 4: <|tool_call>call:name{...}<tool_call|>
+    2. Mistral: [TOOL_CALLS] ...
+    3. Qwen bracket: [Calling tool: func_name({...})]
+    4. Qwen/Hermes XML: <tool_call>{"name": "...", "arguments": {...}}</tool_call>
+    5. Llama: <function=name>{"arg": "value"}</function>
+    6. Nemotron: <tool_call><function=name>...</function></tool_call>
+    7. Raw JSON: {"name": "...", "arguments": {...}}
 
     This is the default parser when no specific parser is selected.
     """
@@ -63,7 +66,41 @@ def extract_tool_calls(
         tool_calls: list[dict[str, Any]] = []
         cleaned_text = model_output
 
-        # 1. Try Mistral format
+        # 1. Try Gemma 4 format (most distinctive marker)
+        if "<|tool_call>" in model_output:
+            gemma_matches = GEMMA4_PATTERN.findall(model_output)
+            for name, raw_args in gemma_matches:
+                try:
+                    json_str = "{" + _gemma_args_to_json(raw_args) + "}"
+                    arguments = json.loads(json_str)
+                    tool_calls.append(
+                        {
+                            "id": generate_tool_id(),
+                            "name": name.strip(),
+                            "arguments": (
+                                json.dumps(arguments, ensure_ascii=False)
+                                if isinstance(arguments, dict)
+                                else str(arguments)
+                            ),
+                        }
+                    )
+                except (json.JSONDecodeError, ValueError):
+                    tool_calls.append(
+                        {
+                            "id": generate_tool_id(),
+                            "name": name.strip(),
+                            "arguments": raw_args,
+                        }
+                    )
+            if gemma_matches:
+                cleaned_text = GEMMA4_PATTERN.sub("", cleaned_text).strip()
+                return ExtractedToolCallInformation(
+                    tools_called=True,
+                    tool_calls=tool_calls,
+                    content=cleaned_text if cleaned_text else None,
+                )
+
+        # 2. Try Mistral format
         if self.MISTRAL_TOKEN in model_output:
             parts = model_output.split(self.MISTRAL_TOKEN)
             content = parts[0].strip()
@@ -113,7 +150,7 @@ def extract_tool_calls(
                     content=content if content else None,
                 )
 
-        # 2. Try Qwen bracket pattern
+        # 3. Try Qwen bracket pattern
         bracket_matches = self.QWEN_BRACKET_PATTERN.findall(model_output)
         for name, args_str in bracket_matches:
             try:
@@ -141,7 +178,7 @@ def extract_tool_calls(
         if bracket_matches:
             cleaned_text = self.QWEN_BRACKET_PATTERN.sub("", cleaned_text).strip()
 
-        # 3. Try Nemotron pattern (before Qwen XML as it's more specific)
+        # 4. Try Nemotron pattern (before Qwen XML as it's more specific)
         nemotron_matches = self.NEMOTRON_PATTERN.findall(cleaned_text)
         for name, params_block in nemotron_matches:
             params = self.NEMOTRON_PARAM_PATTERN.findall(params_block)
@@ -157,7 +194,7 @@ def extract_tool_calls(
         if nemotron_matches:
             cleaned_text = self.NEMOTRON_PATTERN.sub("", cleaned_text).strip()
 
-        # 4. Try Qwen/Hermes XML pattern
+        # 5. Try Qwen/Hermes XML pattern
         xml_matches = self.QWEN_XML_PATTERN.findall(cleaned_text)
         for match in xml_matches:
             try:
@@ -182,7 +219,7 @@ def extract_tool_calls(
         if xml_matches:
             cleaned_text = self.QWEN_XML_PATTERN.sub("", cleaned_text).strip()
 
-        # 5. Try Llama pattern
+        # 6. Try Llama pattern
         llama_matches = self.LLAMA_PATTERN.findall(cleaned_text)
         for name, args_str in llama_matches:
             try:
@@ -210,7 +247,7 @@ def extract_tool_calls(
         if llama_matches:
             cleaned_text = self.LLAMA_PATTERN.sub("", cleaned_text).strip()
 
-        # 6. Fallback: Try raw JSON
+        # 7. Fallback: Try raw JSON
         if not tool_calls:
             raw_calls = self._parse_raw_json_tool_calls(cleaned_text)
             if raw_calls:
@@ -327,6 +364,7 @@ def extract_tool_calls_streaming(
         """
         # Check for any tool call markers
         markers = [
+            "<|tool_call>",
             self.MISTRAL_TOKEN,
             "[Calling tool:",
             "<tool_call>",
@@ -339,7 +377,7 @@ def extract_tool_calls_streaming(
             return {"content": delta_text}
 
         # Check for completion markers
-        end_markers = ["</tool_call>", "</function>", ")]"]
+        end_markers = ["<tool_call|>", "</tool_call>", "</function>", ")]"]
         if any(m in delta_text for m in end_markers):
             result = self.extract_tool_calls(current_text)
             if result.tools_called:
diff --git a/vllm_mlx/tool_parsers/gemma4_tool_parser.py b/vllm_mlx/tool_parsers/gemma4_tool_parser.py
new file mode 100644
index 000000000..a3324ebd3
--- /dev/null
+++ b/vllm_mlx/tool_parsers/gemma4_tool_parser.py
@@ -0,0 +1,219 @@
+# SPDX-License-Identifier: Apache-2.0
+"""
+Gemma 4 tool call parser for vllm-mlx.
+
+Handles Gemma 4's tool calling format:
+- Tool call: <|tool_call>call:name{key:<|"|>value<|"|>,num:42}<tool_call|>
+
+Gemma 4 uses a custom non-JSON argument format:
+- Keys are unquoted
+- Strings are delimited by <|"|> (a special token) instead of "
+- Booleans are true/false, numbers are bare
+- Nested objects use {}, arrays use []
+"""
+
+import json
+import re
+import uuid
+from collections.abc import Sequence
+from typing import Any
+
+from .abstract_tool_parser import (
+    ExtractedToolCallInformation,
+    ToolParser,
+    ToolParserManager,
+)
+
+
+def generate_tool_id() -> str:
+    """Generate a unique tool call ID."""
+    return f"call_{uuid.uuid4().hex[:8]}"
+
+
+# Match: <|tool_call>call:NAME{ARGS}<tool_call|>
+# Capture group 1: function name
+# Capture group 2: raw arguments (Gemma's custom format)
+TOOL_CALL_PATTERN = re.compile(
+    r"<\|tool_call>call:([\w-]+)\{(.*?)\}<tool_call\|>", re.DOTALL
+)
+
+
+def _gemma_args_to_json(raw: str) -> str:
+    """
+    Convert Gemma 4's custom argument format to valid JSON.
+
+    Gemma format: key:<|"|>value<|"|>,other_key:42,flag:true
+    JSON format:  {"key":"value","other_key":42,"flag":true}
+
+    Uses a character-by-character state machine to correctly handle
+    nested structures and string values containing special characters.
+    """
+    # Replace the escape token with a placeholder that won't conflict
+    # with the state machine, then we'll produce proper JSON quotes
+    QUOTE = "\x00"  # null byte as temporary placeholder
+    text = raw.replace('<|"|>', QUOTE)
+
+    result = []
+    i = 0
+    length = len(text)
+
+    while i < length:
+        ch = text[i]
+
+        if ch == QUOTE:
+            # Start of a string value — copy until closing QUOTE
+            result.append('"')
+            i += 1
+            while i < length and text[i] != QUOTE:
+                c = text[i]
+                if c == '"':
+                    result.append('\\"')
+                elif c == '\\':
+                    result.append('\\\\')
+                elif c == '\n':
+                    result.append('\\n')
+                elif c == '\t':
+                    result.append('\\t')
+                else:
+                    result.append(c)
+                i += 1
+            result.append('"')
+            i += 1  # skip closing QUOTE
+
+        elif ch in '{}[],:':
+            # Structural characters pass through
+            result.append(ch)
+            i += 1
+
+        elif ch in ' \t\n\r':
+            # Whitespace passes through
+            result.append(ch)
+            i += 1
+
+        else:
+            # Bare token: could be a key, number, boolean, or null
+            start = i
+            while i < length and text[i] not in (QUOTE + '{}[],:  \t\n\r'):
+                i += 1
+            token = text[start:i]
+
+            # Look ahead: if next non-whitespace char is ':', this is a key
+            j = i
+            while j < length and text[j] in ' \t\n\r':
+                j += 1
+
+            if j < length and text[j] == ':':
+                # It's a key — quote it
+                result.append(f'"{token}"')
+            else:
+                # It's a value — keep bare for numbers/booleans/null
+                result.append(token)
+
+    return "".join(result)
+
+
+@ToolParserManager.register_module(["gemma4", "gemma_4", "gemma4_27b"])
+class Gemma4ToolParser(ToolParser):
+    """
+    Tool call parser for Google Gemma 4 models.
+
+    Supports Gemma 4 tool call format:
+    - <|tool_call>call:name{key:<|"|>value<|"|>}<tool_call|>
+
+    Gemma 4's chat template uses <|tool> / <tool|> for definitions
+    and <|tool_call> / <tool_call|> for calls, with <|"|> as string
+    delimiters instead of standard JSON quotes.
+
+    Used when --enable-auto-tool-choice --tool-call-parser gemma4 are set.
+    """
+
+    SUPPORTS_NATIVE_TOOL_FORMAT = True
+
+    def extract_tool_calls(
+        self, model_output: str, request: dict[str, Any] | None = None
+    ) -> ExtractedToolCallInformation:
+        """Extract tool calls from a complete Gemma 4 model response."""
+        # Strip think tags if present (Gemma 4 uses <|channel>...<channel|>
+        # for thinking but may also emit standard <think> tags)
+        text = self.strip_think_tags(model_output)
+
+        tool_calls = []
+        matches = TOOL_CALL_PATTERN.findall(text)
+
+        for name, raw_args in matches:
+            try:
+                json_str = "{" + _gemma_args_to_json(raw_args) + "}"
+                arguments = json.loads(json_str)
+                tool_calls.append(
+                    {
+                        "id": generate_tool_id(),
+                        "name": name.strip(),
+                        "arguments": (
+                            json.dumps(arguments, ensure_ascii=False)
+                            if isinstance(arguments, dict)
+                            else str(arguments)
+                        ),
+                    }
+                )
+            except (json.JSONDecodeError, ValueError):
+                # Fall back to raw args string
+                tool_calls.append(
+                    {
+                        "id": generate_tool_id(),
+                        "name": name.strip(),
+                        "arguments": raw_args,
+                    }
+                )
+
+        # Remove tool call markup from content
+        cleaned_text = text
+        if matches:
+            cleaned_text = TOOL_CALL_PATTERN.sub("", cleaned_text).strip()
+
+        if tool_calls:
+            return ExtractedToolCallInformation(
+                tools_called=True,
+                tool_calls=tool_calls,
+                content=cleaned_text if cleaned_text else None,
+            )
+        else:
+            return ExtractedToolCallInformation(
+                tools_called=False, tool_calls=[], content=model_output
+            )
+
+    def extract_tool_calls_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: Sequence[int] | None = None,
+        current_token_ids: Sequence[int] | None = None,
+        delta_token_ids: Sequence[int] | None = None,
+        request: dict[str, Any] | None = None,
+    ) -> dict[str, Any] | None:
+        """Extract tool calls from streaming Gemma 4 model output."""
+        # No tool call started yet — pass through as content
+        if "<|tool_call>" not in current_text:
+            return {"content": delta_text}
+
+        # Tool call end marker arrived — parse all tool calls
+        if "<tool_call|>" in delta_text:
+            result = self.extract_tool_calls(current_text)
+            if result.tools_called:
+                return {
+                    "tool_calls": [
+                        {
+                            "index": i,
+                            "id": tc["id"],
+                            "type": "function",
+                            "function": {
+                                "name": tc["name"],
+                                "arguments": tc["arguments"],
+                            },
+                        }
+                        for i, tc in enumerate(result.tool_calls)
+                    ]
+                }
+
+        # Inside a tool call — buffer (return None to suppress partial output)
+        return None

From 24114c327d0921bdea84ccf80c7eaaa01ab06f2b Mon Sep 17 00:00:00 2001
From: Keegan Mullaney <keeganmullaney@gmail.com>
Date: Sun, 5 Apr 2026 18:27:57 -0700
Subject: [PATCH 2/2] style: remove extraneous f-string prefixes to fix ruff
 F541

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/test_gemma4_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_gemma4_parser.py b/tests/test_gemma4_parser.py
index acf499be2..840da3ea1 100644
--- a/tests/test_gemma4_parser.py
+++ b/tests/test_gemma4_parser.py
@@ -20,14 +20,14 @@ def test_args_to_json_simple_strings():
 
 
 def test_args_to_json_numbers():
-    raw = f"temperature:15,humidity:0.75"
+    raw = "temperature:15,humidity:0.75"
     result = "{" + _gemma_args_to_json(raw) + "}"
     parsed = json.loads(result)
     assert parsed == {"temperature": 15, "humidity": 0.75}
 
 
 def test_args_to_json_booleans():
-    raw = f"enabled:true,verbose:false"
+    raw = "enabled:true,verbose:false"
     result = "{" + _gemma_args_to_json(raw) + "}"
     parsed = json.loads(result)
     assert parsed == {"enabled": True, "verbose": False}