diff --git a/CHANGELOG.md b/CHANGELOG.md index bb5d15e..2b34430 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,11 @@ All notable changes to this project will be documented here. titled `Conduct` as a workaround for GitHub's lack of a general private-contact channel. Closes #25. +### Fixed +- Pipe (`|`) and backslash (`\`) in table cell values are now escaped in the + `markdown` and `notion` output formats, preventing column-structure corruption + in rendered tables. Closes #16. + ### Changed - Renamed `.github/workflows/test.yml` → `ci.yml` and the workflow `name:` from `Tests` to `CI` for cross-repo consistency. README diff --git a/docs/superpowers/specs/2026-04-11-table-formatter-escaping-design.md b/docs/superpowers/specs/2026-04-11-table-formatter-escaping-design.md new file mode 100644 index 0000000..5c31802 --- /dev/null +++ b/docs/superpowers/specs/2026-04-11-table-formatter-escaping-design.md @@ -0,0 +1,84 @@ +# Table Formatter Escaping -- Design Spec + +**Date:** 2026-04-11 +**Issues:** #16, #17, #18 (skipping #15 -- potential contributor) +**Scope:** Fix pipe-character escaping in Markdown/Notion and Jira/Confluence formatters; add escaping test matrix for all output formats. + +## Problem + +`_format_markdown` and `_format_jira` in `parser.py` interpolate raw cell values into pipe-delimited table syntax. A cell containing `|` breaks the table structure in every downstream renderer. Backslash `\` also needs escaping since it is the escape character in both formats. + +Zero tests cover special characters in any formatter. + +## Approach + +Per-formatter escape helpers (Approach A). Each formatter gets its own escape function. The logic is identical today (`\` -> `\\`, then `|` -> `\|`), but separate helpers keep each formatter self-contained and extensible for format-specific concerns later. + +## PR 1: Fix #16 -- Markdown/Notion pipe escaping + +### Changes in `src/mcp_clipboard/parser.py` + +Add helper: + +```python +def _escape_md_cell(cell: str) -> str: + """Escape characters that break GFM pipe tables.""" + return cell.replace("\\", "\\\\").replace("|", "\\|") +``` + +Modify `_format_markdown()`: +- Apply `_escape_md_cell()` to every cell value in header and data rows +- Compute column widths from the *escaped* values so alignment is correct +- Apply escaping *before* `ljust()` padding + +Notion routes through `_format_markdown` -- no additional changes needed. + +### Tests + +Add tests in `tests/test_parser.py` (alongside existing formatter tests): +- Cell containing `|` -- assert column count preserved, pipe escaped in output +- Cell containing `||` -- same +- Cell containing `\` -- assert escaped to `\\` +- Cell containing `\|` -- assert escaped to `\\|` (no ambiguity) +- Leading/trailing `|` in a cell + +## PR 2: Fix #17 -- Jira/Confluence pipe escaping + +### Changes in `src/mcp_clipboard/parser.py` + +Add helper: + +```python +def _escape_jira_cell(cell: str) -> str: + """Escape characters that break Jira/Confluence wiki markup tables.""" + return cell.replace("\\", "\\\\").replace("|", "\\|") +``` + +Modify `_format_jira()`: +- Apply `_escape_jira_cell()` to every cell value in header and data rows + +Confluence routes through `_format_jira` -- no additional changes needed. + +### Tests + +Add tests in `tests/test_parser.py`: +- Cell containing `|` -- assert column count preserved, pipe escaped +- Cell containing `||` -- assert no accidental header promotion +- Cell containing `\` -- assert escaped +- Leading/trailing `|` in a cell + +## PR 3: Fix #18 -- Escaping test matrix + +### New file: `tests/test_escaping.py` + +Parametrized test matrix covering every `OutputFormat` with special characters. + +### Merge strategy + +PR 3 rebases onto main after PRs 1 and 2 are merged. Markdown/Notion/Jira/Confluence pipe tests are passing assertions (not xfails). + +## Out of scope + +- #15: HTML XSS (`_format_html` cell escaping) -- potential contributor +- #19: Slack `*` and backtick escaping -- medium priority, separate PR +- #20: `detect_content_type` false positives -- unrelated diff --git a/src/mcp_clipboard/parser.py b/src/mcp_clipboard/parser.py index 28929b4..970462c 100644 --- a/src/mcp_clipboard/parser.py +++ b/src/mcp_clipboard/parser.py @@ -356,6 +356,11 @@ def format_table(rows: list[list[str]], fmt: OutputFormat = "markdown") -> str: return _format_markdown(rows) +def _escape_md_cell(cell: str) -> str: + """Escape characters that break GFM pipe tables.""" + return cell.replace("\\", "\\\\").replace("|", "\\|") + + def _format_markdown(rows: list[list[str]]) -> str: """Render rows as a GitHub-flavored Markdown table.""" if not rows: @@ -365,9 +370,11 @@ def _format_markdown(rows: list[list[str]]) -> str: max_cols = max(len(row) for row in rows) normalized = [row + [""] * (max_cols - len(row)) for row in rows] - # Calculate column widths + # Escape cells, then calculate widths from escaped values + escaped = [[_escape_md_cell(cell) for cell in row] for row in normalized] + widths = [ - max(len(normalized[r][c]) for r in range(len(normalized))) + max(len(escaped[r][c]) for r in range(len(escaped))) for c in range(max_cols) ] # Minimum width of 3 for the separator @@ -376,7 +383,7 @@ def _format_markdown(rows: list[list[str]]) -> str: lines: list[str] = [] # Header header = "| " + " | ".join( - normalized[0][c].ljust(widths[c]) for c in range(max_cols) + escaped[0][c].ljust(widths[c]) for c in range(max_cols) ) + " |" lines.append(header) @@ -385,7 +392,7 @@ def _format_markdown(rows: list[list[str]]) -> str: lines.append(sep) # Data rows - for row in normalized[1:]: + for row in escaped[1:]: line = "| " + " | ".join( row[c].ljust(widths[c]) for c in range(max_cols) ) + " |" diff --git a/tests/test_parser.py b/tests/test_parser.py index f1f910b..c9d2128 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,5 +1,7 @@ """Tests for the HTML/TSV parser, formatters, and content detection.""" +import re + from mcp_clipboard.parser import ( detect_content_type, extract_html_text, @@ -464,3 +466,82 @@ def test_format_destination_ragged_rows(): for fmt in ("slack", "jira", "confluence", "html"): result = format_table(rows, fmt) assert result # non-empty + + +# --------------------------------------------------------------------------- +# Markdown/Notion escaping (#16) +# --------------------------------------------------------------------------- + + +def test_format_markdown_escapes_pipe_in_data(): + """Pipe in a data cell must not break column structure.""" + rows = [["Cmd", "Desc"], ["cat | grep", "filter"]] + result = format_table(rows, "markdown") + lines = result.strip().split("\n") + # Header + separator + 1 data row + assert len(lines) == 3 + # Every line must have the same number of unescaped pipe delimiters + # (escaped pipes \| do not count as delimiters) + # 2-column table: | col1 | col2 | = 3 unescaped pipes per line + for line in lines: + # Count unescaped pipes: pipes not preceded by backslash + delimiters = len(re.findall(r"(?