Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,49 @@ All notable changes to this project will be documented here.
and the queue-time parser rejects the empty form on `workflow_dispatch`.
The `workflow_run` path tolerated it, so the bug was latent here but
blocked manual dispatch and fresh-repo cascades. Closes #91.
- Windows clipboard `list_formats` now deduplicates MIME types when multiple
native format names map to the same MIME (e.g. `Text` and `UnicodeText`
both map to `text/plain`), matching the existing macOS behavior. Prevents
inflated format counts and downstream duplicate iteration. (#101)
- Nested HTML tables no longer leak inner-table cell text into the outer
cell. `_TableExtractor.handle_data` now gates on `_table_depth == 1` so
text inside an inner `<table><tr><td>...</td></tr></table>` is no longer
concatenated into the surrounding outer cell. (#101)
- macOS `_macos_write_typed` no longer trips AppleScript's 32,767-character
per-line limit when writing HTML or RTF content larger than ~24 KB. The
base64-encoded payload is split across multiple `set b64 to b64 & "..."`
statements with a 4,000-character chunk size. (#101)
- `_run_subprocess` and `_run_with_stdin` no longer orphan their child
process when the calling task is cancelled (e.g. on MCP client
disconnect). A `finally` block now calls `proc.kill()` on any non-normal
exit including `asyncio.CancelledError`, which inherits from
`BaseException` and previously bypassed the timeout-only `except`
handler. (#101)
- `parse_tsv` no longer treats single-cell input with a stray tab
(`"word\t"`, commonly produced when copying one Excel cell on Windows)
as a 1x2 table with a phantom empty column. Single-row results now
require at least two non-empty cells. (#101)

### Security
- New `MCP_CLIPBOARD_MAX_IMAGE_BYTES` cap (default 10 MB) on
`read_clipboard_image`. A 100 MB clipboard bitmap previously became
~133 MB base64 in a single MCP response and could time out or drop
the MCP transport. Oversized reads now raise the new
`ClipboardSizeError`, and `clipboard_paste` returns an explanatory
message instead of forwarding the payload. The cap is a wire-level
guard rather than a memory-bounded read: the backend still buffers
the full image before the size check, so the inflated wire response
is prevented but local memory pressure on the host running the
server is not. (#101)
- Image subtype passed to `mcp.Image(format=...)` is now validated against
an allowlist (`png`, `jpeg`, `gif`, `webp`, `tiff`, `bmp`). Clipboard-
controlled MIME strings with parameter injection or unexpected subtypes
fall back to `png` rather than flowing through to the host. (#101)
- Markdown code fences in `clipboard_paste` (JSON, code, RTF branches) now
size dynamically to one longer than the longest backtick run inside the
wrapped content. Prevents clipboard text containing literal triple
backticks from closing the fence early and rendering injected content
as Markdown (or HTML on permissive hosts). (#101)

## [2.2.1] - 2026-04-16

Expand Down
161 changes: 115 additions & 46 deletions src/mcp_clipboard/clipboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,23 @@ class ClipboardError(Exception):
"""Raised when clipboard access fails."""


class ClipboardSizeError(ClipboardError):
"""Raised when clipboard content exceeds a configured size cap."""


# AppleScript source has a 32,767-character per-line limit. Chunk size for
# base64 string literals must stay well under that to leave headroom for
# the surrounding `set b64 to "..."` syntax.
_APPLESCRIPT_CHUNK = 4000

# Cap on image read size. A large clipboard bitmap (e.g. 100 MB uncompressed
# TIFF screenshot) becomes ~133 MB base64 in a single MCP response and can
# time out or drop the MCP transport. The cap is a wire-level guard;
# backend memory is not bounded (the full image is still buffered before
# the size check fires). Configurable via env var.
_MAX_IMAGE_BYTES = int(os.environ.get("MCP_CLIPBOARD_MAX_IMAGE_BYTES", 10 * 1024 * 1024))


def base_mime_type(mime: str) -> str:
"""Strip parameters from a MIME type string.

Expand All @@ -50,29 +67,40 @@ async def _run_subprocess(
available"). Set it to ``False`` for macOS and Windows backends where exit
code 1 indicates a real error (script failure, permission denied, etc.).
"""
proc: asyncio.subprocess.Process | None = None
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
except FileNotFoundError as fnf:
raise ClipboardError(f"Command not found: {cmd[0]}") from fnf
except TimeoutError as te:
proc.kill()
with contextlib.suppress(TimeoutError):
await asyncio.wait_for(proc.wait(), timeout=1.0)
raise ClipboardError(f"Clipboard command timed out: {' '.join(cmd)}") from te

if proc.returncode != 0:
if allow_empty_exit and proc.returncode == 1:
return b""
err = stderr.decode(errors="replace").strip()
raise ClipboardError(f"Clipboard command failed (rc={proc.returncode}): {err}")

return stdout
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=env,
)
except FileNotFoundError as fnf:
raise ClipboardError(f"Command not found: {cmd[0]}") from fnf
try:
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
except TimeoutError as te:
proc.kill()
with contextlib.suppress(TimeoutError):
await asyncio.wait_for(proc.wait(), timeout=1.0)
raise ClipboardError(f"Clipboard command timed out: {' '.join(cmd)}") from te

if proc.returncode != 0:
if allow_empty_exit and proc.returncode == 1:
return b""
err = stderr.decode(errors="replace").strip()
raise ClipboardError(f"Clipboard command failed (rc={proc.returncode}): {err}")

return stdout
finally:
# Belt-and-suspenders cleanup for paths that bypass the explicit
# kill above -- specifically asyncio.CancelledError (BaseException)
# from a cancelled MCP request, which would otherwise orphan the
# subprocess. kill() is a no-op once the process has exited.
if proc is not None and proc.returncode is None:
with contextlib.suppress(ProcessLookupError):
proc.kill()


async def _run(
Expand Down Expand Up @@ -116,28 +144,39 @@ async def _run_with_stdin(
"""
debug = os.environ.get("MCP_CLIPBOARD_DEBUG", "") == "1"
stderr_mode = asyncio.subprocess.PIPE if debug else asyncio.subprocess.DEVNULL
proc: asyncio.subprocess.Process | None = None
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.DEVNULL,
stderr=stderr_mode,
env=env,
)
_, stderr_data = await asyncio.wait_for(proc.communicate(input=input_data), timeout=timeout)
except FileNotFoundError as fnf:
raise ClipboardError(f"Command not found: {cmd[0]}") from fnf
except TimeoutError as te:
proc.kill()
with contextlib.suppress(TimeoutError):
await asyncio.wait_for(proc.wait(), timeout=1.0)
raise ClipboardError(f"Clipboard command timed out: {' '.join(cmd)}") from te

if proc.returncode != 0:
msg = f"Clipboard write failed (rc={proc.returncode}): {cmd[0]}"
if debug and stderr_data:
msg += f"\nstderr: {stderr_data.decode(errors='replace').strip()}"
raise ClipboardError(msg)
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.DEVNULL,
stderr=stderr_mode,
env=env,
)
except FileNotFoundError as fnf:
raise ClipboardError(f"Command not found: {cmd[0]}") from fnf
try:
_, stderr_data = await asyncio.wait_for(
proc.communicate(input=input_data), timeout=timeout
)
except TimeoutError as te:
proc.kill()
with contextlib.suppress(TimeoutError):
await asyncio.wait_for(proc.wait(), timeout=1.0)
raise ClipboardError(f"Clipboard command timed out: {' '.join(cmd)}") from te

if proc.returncode != 0:
msg = f"Clipboard write failed (rc={proc.returncode}): {cmd[0]}"
if debug and stderr_data:
msg += f"\nstderr: {stderr_data.decode(errors='replace').strip()}"
raise ClipboardError(msg)
finally:
# See _run_subprocess: belt-and-suspenders cleanup for the
# CancelledError path which bypasses except handlers entirely.
if proc is not None and proc.returncode is None:
with contextlib.suppress(ProcessLookupError):
proc.kill()


def _find_wayland_display() -> str | None:
Expand Down Expand Up @@ -390,7 +429,16 @@ async def _windows_list_formats() -> list[str]:
)
raw = await _run(["powershell", "-NoProfile", "-Command", script], allow_empty_exit=False)
native = [line.strip() for line in raw.splitlines() if line.strip()]
return [_WIN_TO_MIME.get(f, f) for f in native]
# Deduplicate: Windows clipboards routinely expose both "Text" and
# "UnicodeText" which both map to text/plain. Mirror _macos_list_formats.
seen: set[str] = set()
result: list[str] = []
for f in native:
mime = _WIN_TO_MIME.get(f, f)
if mime not in seen:
seen.add(mime)
result.append(mime)
return result


# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -513,11 +561,23 @@ async def _macos_write_typed(content: str, mime_type: str) -> None:
if mime_type in ("text/html", "text/rtf"):
uti = "public.html" if mime_type == "text/html" else "public.rtf"
b64 = base64.b64encode(content.encode("utf-8")).decode("ascii")
# AppleScript has a 32,767-char per-line limit, so a single
# `set b64 to "..."` literal breaks for content >~24 KB once
# base64-encoded. Split the literal across multiple statements.
b64_chunks = [
b64[i : i + _APPLESCRIPT_CHUNK] for i in range(0, len(b64), _APPLESCRIPT_CHUNK)
]
if not b64_chunks:
b64_chunks = [""]
b64_lines = [f'set b64 to "{b64_chunks[0]}"']
for chunk in b64_chunks[1:]:
b64_lines.append(f'set b64 to b64 & "{chunk}"')
script = (
'use framework "AppKit"\n'
'use framework "Foundation"\n'
f'set b64 to "{b64}"\n'
"set decoded to (current application's NSData's alloc()'s "
+ "\n".join(b64_lines)
+ "\n"
+ "set decoded to (current application's NSData's alloc()'s "
"initWithBase64EncodedString:b64 options:0)\n"
"set pb to current application's NSPasteboard's generalPasteboard()\n"
"pb's clearContents()\n"
Expand Down Expand Up @@ -721,6 +781,9 @@ async def read_clipboard_image(mime_type: str = "image/png") -> bytes:

Like :func:`read_clipboard`, falls back to a matching suffixed MIME
type when the exact requested type is not available.

Raises :exc:`ClipboardSizeError` when the image exceeds
``MCP_CLIPBOARD_MAX_IMAGE_BYTES`` (default 10 MB).
"""
backend = _get_backend()
result = await _IMAGE_READERS[backend](mime_type)
Expand All @@ -734,6 +797,12 @@ async def read_clipboard_image(mime_type: str = "image/png") -> bytes:
if result:
break

if len(result) > _MAX_IMAGE_BYTES:
raise ClipboardSizeError(
f"Image exceeds clipboard read limit "
f"({len(result):,} bytes, max {_MAX_IMAGE_BYTES:,}). "
f"Set MCP_CLIPBOARD_MAX_IMAGE_BYTES to increase."
)
return result


Expand Down
13 changes: 12 additions & 1 deletion src/mcp_clipboard/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,10 @@ def handle_endtag(self, tag: str) -> None:
self._current_row = None

def handle_data(self, data: str) -> None:
if self._current_cell is not None:
# Only collect text at the outer table's depth: a nested table inside
# a cell would otherwise leak its inner-cell text into the outer cell
# because _current_cell is still set while inner <td>/<th> are skipped.
if self._current_cell is not None and self._table_depth == 1:
self._current_cell.append(data)


Expand Down Expand Up @@ -90,6 +93,14 @@ def parse_tsv(text: str) -> list[list[str]]:
for row in reader:
if any(cell.strip() for cell in row):
rows.append(row)

# Reject a single row with fewer than two non-empty cells: copying a
# single Excel cell on Windows commonly produces "word\t" which would
# otherwise be rendered as a misleading 1x2 table with a phantom
# empty column.
if len(rows) == 1 and sum(1 for c in rows[0] if c.strip()) < 2:
return []

return rows


Expand Down
42 changes: 38 additions & 4 deletions src/mcp_clipboard/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from .clipboard import (
ClipboardError,
ClipboardSizeError,
base_mime_type,
list_clipboard_formats,
read_clipboard,
Expand Down Expand Up @@ -102,13 +103,38 @@ def _load_icons() -> list[Icon]:
_BINARY_MIME_PREFIXES = ("image/", "audio/", "video/")
_BINARY_MIME_EXACT = frozenset({"application/octet-stream"})

# Whitelist of image subtypes recognized as a safe `format=` value to pass
# to mcp.Image. Anything else (including parameter-laden or malformed
# clipboard-controlled MIME strings) falls back to "png" so the host
# never sees an unexpected format string.
_IMAGE_SUBTYPE_ALLOWLIST = frozenset({"png", "jpeg", "gif", "webp", "tiff", "bmp"})

# image/* entries that are text-readable (not actual binary).
_TEXT_READABLE_MIMES = frozenset({"image/svg+xml"})

# MIME type validation: type and subtype must start with a letter.
_MIME_RE = re.compile(r"^[a-zA-Z][\w.+\-]*/[a-zA-Z][\w.+\-]*(;\s*[\w.+\-]+=[\w.+\-]+)*$")


def _safe_code_fence(text: str) -> str:
"""Return a backtick fence long enough to wrap ``text`` without escape.

Markdown spec: a fenced code block can only be closed by a fence at
least as long as the opening fence. Pick a fence one longer than any
backtick run inside the content; minimum length 3.
"""
longest = 0
current = 0
for ch in text:
if ch == "`":
current += 1
if current > longest:
longest = current
else:
current = 0
return "`" * max(3, longest + 1)


async def _read_clipboard_content() -> tuple[list[list[str]], str, str]:
"""Read clipboard and attempt to extract tabular data.

Expand Down Expand Up @@ -158,13 +184,16 @@ def _format_non_tabular(text: str) -> str:
try:
parsed = json.loads(text.strip())
formatted = json.dumps(parsed, indent=2, ensure_ascii=False)
result = f"Clipboard contains JSON:\n\n```json\n{formatted}\n```"
fence = _safe_code_fence(formatted)
result = f"Clipboard contains JSON:\n\n{fence}json\n{formatted}\n{fence}"
except (json.JSONDecodeError, ValueError):
result = f"Clipboard content:\n\n{text}"
elif content_type == "url":
result = f"Clipboard contains URL:\n\n{text.strip()}"
elif content_type == "code":
result = f"Clipboard contains code:\n\n```\n{text.rstrip()}\n```"
body = text.rstrip()
fence = _safe_code_fence(body)
result = f"Clipboard contains code:\n\n{fence}\n{body}\n{fence}"
else:
result = f"Clipboard content:\n\n{text}"

Expand Down Expand Up @@ -246,7 +275,8 @@ async def clipboard_paste(
if rtf.strip():
truncated = len(rtf) > _MAX_CONTENT_CHARS
display = rtf[:_MAX_CONTENT_CHARS]
result = f"Clipboard contains rich text (RTF):\n\n```\n{display}\n```"
fence = _safe_code_fence(display)
result = f"Clipboard contains rich text (RTF):\n\n{fence}\n{display}\n{fence}"
if truncated:
result += f"\n\n... [truncated at {_MAX_CONTENT_CHARS:,} characters]"
return result
Expand All @@ -267,8 +297,12 @@ async def clipboard_paste(
try:
data = await read_clipboard_image(mime)
if data:
fmt = base_mime_type(mime).split("/", 1)[1]
fmt = base_mime_type(mime).split("/", 1)[1].lower()
if fmt not in _IMAGE_SUBTYPE_ALLOWLIST:
fmt = "png"
return Image(data=data, format=fmt)
except ClipboardSizeError as e:
return f"Clipboard image too large to return: {e}"
except ClipboardError as e:
logger.debug("Image read failed: %s", e)
# Non-image binary (audio/video) — report but can't return
Expand Down
Loading