google
diff --git a/‎src/google/adk/tools/mcp_tool/_internal.py‎
Lines changed: 318 additions & 0 deletions b/‎src/google/adk/tools/mcp_tool/_internal.py‎
Lines changed: 318 additions & 0 deletions
@@ -0,0 +1,318 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Internal utilities for MCP tools.
+
+This module contains internal validation and sanitization utilities
+that are not part of the public API and follow RFC 7230 properly.
+
+**Security Notes:**
+
+- Header validation implements RFC 7230 §3.2 for proper HTTP header format
+- Only truly dangerous control characters are removed from header values
+- Legitimate multi-line headers with proper folding are preserved
+- Binary data handling is separate from text data for security
+- All functions log security-relevant warnings when appropriate
+
+**RFC 7230 Compliance:**
+
+- Header names: only letters, digits, and hyphens allowed
+- Header values: control characters (0x00-0x1F, 0x7F) are dangerous
+- Header folding: CRLF sequences are preserved for legitimate use cases
+- Binary data: handled separately with explicit allow_binary flag
+
+**Attack Prevention:**
+
+- HTTP header injection attacks via control character filtering
+- Response splitting attacks through CRLF handling
+- Log injection attacks via character sanitization
+- Type confusion attacks through strict validation
+"""
+
+import logging
+import re
+from typing import Any
+
+logger = logging.getLogger("google_adk." + __name__)
+
+# RFC 7230 compliant header patterns
+# Control characters and special characters not allowed in header names
+_HEADER_NAME_FORBIDDEN = r'\x00-\x1F\x7F()<>@,;:\\"/[\]?={} \t'
+
+# Header whitespace characters (RFC 7230 §3.2.4)
+_HEADER_WHITESPACE = "\r\n"
+
+# RFC 7230 compliant header name pattern (allows letters, digits, hyphens)
+_HEADER_NAME_PATTERN = re.compile(r"^[a-zA-Z0-9-]+$")
+
+# Truly dangerous characters that should never appear in header values
+# These are characters that can break HTTP parsing or cause injection
+_DANGEROUS_CHARS = {
+    "\x00",
+    "\x01",
+    "\x02",
+    "\x03",
+    "\x04",
+    "\x05",
+    "\x06",
+    "\x07",
+    "\x08",
+    "\x0b",
+    "\x0c",
+    "\x0e",
+    "\x0f",
+    "\x10",
+    "\x11",
+    "\x12",
+    "\x13",
+    "\x14",
+    "\x15",
+    "\x16",
+    "\x17",
+    "\x18",
+    "\x19",
+    "\x1a",
+    "\x1b",
+    "\x1c",
+    "\x1d",
+    "\x1e",
+    "\x1f",
+    "\x7f",
+}
+
+
+def _is_printable_ascii(char: str) -> bool:
+  """Check if character is printable ASCII."""
+  try:
+    return 0x20 <= ord(char) <= 0x7E
+  except ValueError:
+    return False
+
+
+def _is_control_char(char: str) -> bool:
+  """Check if character is a control character."""
+  return char in _DANGEROUS_CHARS
+
+
+def _is_whitespace(char: str) -> bool:
+  """Check if character is whitespace."""
+  return char in _HEADER_WHITESPACE
+
+
+def _get_forbidden_char_desc(char: str) -> str:
+  """Get description of forbidden character."""
+  if char == "\r":
+    return "carriage return"
+  elif char == "\n":
+    return "line feed"
+  elif char == "\t":
+    return "horizontal tab"
+  elif _is_printable_ascii(char):
+    return f"non-printable ASCII: {repr(char)}"
+  else:
+    return f"control character: {repr(char)}"
+
+
+def _validate_header_name(header_name: str) -> None:
+  """Validates that a header name conforms to RFC 7230.
+  Only allows printable ASCII, no control chars, spaces, or separators.
+  Rejects header names containing invalid characters.
+  """
+  if not header_name:
+    raise ValueError("Header name cannot be empty.")
+
+  if not _HEADER_NAME_PATTERN.match(header_name):
+    raise ValueError(
+        f'Header name "{header_name}" contains invalid characters. '
+        "Header names must conform to RFC 7230 and cannot contain "
+        'control characters, spaces, or separators like ():<>@,;:\\"/[]?={}.'
+    )
+
+
+def _sanitize_header_value(value: str) -> str:
+  """Sanitizes a header value to prevent injection attacks.
+
+  This function removes ONLY truly dangerous characters that could cause
+  header injection attacks, while remaining RFC 7230 compliant.
+
+  Args:
+      value: The header value to sanitize.
+
+  Returns:
+      The sanitized header value with dangerous characters removed.
+  """
+  if not isinstance(value, str):
+    value = str(value)
+
+  # Remove only characters that are truly dangerous for HTTP headers
+  # These are control characters that can break parsing or enable injection
+  # We DON'T remove all \r\n sequences as that would break legitimate multi-line headers
+  # and violate RFC 7230 §3.2.4 which allows header folding
+  sanitized_chars = []
+  for char in value:
+    if char not in _DANGEROUS_CHARS:
+      sanitized_chars.append(char)
+    else:
+      logger.warning(
+          f"Removed dangerous character {repr(char)} from header value "
+          "for security reasons"
+      )
+
+  return "".join(sanitized_chars)
+
+
+def _validate_header_value(value: Any, allow_binary: bool = False) -> None:
+  """Validates header values with RFC 7230 compliance and proper binary handling.
+
+  Args:
+      value: The header value to validate.
+      allow_binary: Whether to allow binary data (bytes) in header values.
+
+  Raises:
+      ValueError: If value contains dangerous characters.
+  """
+  if value is None:
+    return
+
+  if isinstance(value, bytes):
+    if not allow_binary:
+      raise ValueError("Binary data not allowed in HTTP header values")
+    # For binary data, check for dangerous bytes
+    for byte_val in value:
+      if byte_val < 128:  # ASCII range
+        char = chr(byte_val)
+        if char in _DANGEROUS_CHARS:
+          raise ValueError(
+              f"Binary data contains dangerous byte: {repr(char)} "
+              f"({_get_forbidden_char_desc(char)})"
+          )
+    return
+
+  # For strings, check for dangerous characters that could enable injection
+  if isinstance(value, str):
+    for char in value:
+      if char in _DANGEROUS_CHARS:
+        raise ValueError(
+            f"Header value contains dangerous character: {repr(char)} "
+            f"({_get_forbidden_char_desc(char)})"
+        )
+    return
+
+  # For other types, convert to string and validate
+  str_value = str(value)
+  for char in str_value:
+    if char in _DANGEROUS_CHARS:
+      raise ValueError(
+          "Header value (converted to string) contains dangerous character: "
+          f"{repr(char)} ({_get_forbidden_char_desc(char)})"
+      )
+
+
+def sanitize_header_value(value: Any) -> str:
+  """Sanitizes a header value to prevent injection attacks.
+
+  This is a wrapper that converts non-string values to strings and then
+  applies core sanitization logic.
+
+  Args:
+      value: The header value to sanitize (any type).
+
+  Returns:
+      The sanitized header value as a string.
+  """
+  if not isinstance(value, str):
+    value = str(value)
+
+  return _sanitize_header_value(value)
+
+
+def validate_header_value(
+    state_key: str, value: Any, strict: bool = False
+) -> None:
+  """Validates that a state value is suitable for use in a header.
+
+  Args:
+      state_key: The key being validated.
+      value: The value to validate.
+      strict: If True, raises ValueError for non-primitive types.
+
+  Raises:
+      ValueError: If strict=True and value is not a primitive type.
+  """
+  if not isinstance(value, (str, int, float, bool)):
+    msg = (
+        f'Value for state key "{state_key}" is of type '
+        f"{type(value).__name__}, which may not serialize correctly into a "
+        "header. Consider pre-serializing complex values or using "
+        "state_header_format."
+    )
+    if strict:
+      raise ValueError(msg)
+    else:
+      logger.warning(msg)
+
+  # Always validate for dangerous characters regardless of strict mode
+  _validate_header_value(value)
+
+
+def create_session_state_header_provider(
+    state_key: str,
+    header_name: str = "Authorization",
+    header_format: str = "Bearer {value}",
+    default_value: str = None,
+    strict: bool = False,
+):
+  """Creates a header provider that extracts values from session state.
+
+  This utility function generates a header_provider callable that can be used
+  with McpToolset to automatically extract values from session state and
+  format them as HTTP headers for MCP server connections.
+
+  .. warning::
+      **Security Best Practice**: For sensitive, short-lived tokens like JWTs,
+    use ``request_state`` instead of ``session.state`` to avoid persisting
+    sensitive data to the database. Pass tokens via
+    ``RunAgentRequest.request_state``, which will override ``session.state``
+    for the duration of the request without being persisted.
+
+  Args:
+      state_key: The key to look up in session.state (or request_state).
+    header_name: The HTTP header name to set (default: 'Authorization').
+    header_format: Format string for the header value. Use {value} as a
+        placeholder for the state value (default: 'Bearer {value}').
+    default_value: Default value if state_key is not found in session state.
+      If None, the header is omitted when the key is missing.
+      strict: If True, raises ValueError when non-primitive types are
+      encountered. If False (default), logs a warning instead.
+
+  Returns:
+    A callable that takes a ReadonlyContext and returns a dictionary of
+    headers to be used for the MCP session.
+  """
+  # Validate header name upfront
+  _validate_header_name(header_name)
+
+  def provider(ctx) -> dict[str, str]:
+    value = ctx.state.get(state_key, default_value)
+    # Skip header if value is None or empty string
+    if value is None or value == "":
+      return {}
+
+    validate_header_value(state_key, value, strict=strict)
+    formatted_value = header_format.format(value=value)
+    sanitized_value = sanitize_header_value(formatted_value)
+
+    return {header_name: sanitized_value}
+
+  return provider