Skip to content

Commit

Permalink
Fix quadratic complexity parsing long backtick code spans with no mat…
Browse files Browse the repository at this point in the history
…ching closers
  • Loading branch information
colinodell committed Dec 7, 2024
1 parent e1cfa8d commit 540d850
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi
- Fixed quadratic complexity parsing emphasis and strikethrough delimiters
- Fixed issue where having 500,000+ delimiters could trigger a [known segmentation fault issue in PHP's garbage collection](https://bugs.php.net/bug.php?id=68606)
- Fixed quadratic complexity deactivating link openers
- Fixed quadratic complexity parsing long backtick code spans with no matching closers
- Fixed catastrophic backtracking when parsing link labels/titles

## [2.4.1] - 2023-08-30
Expand Down
72 changes: 67 additions & 5 deletions src/Extension/CommonMark/Parser/Inline/BacktickParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,27 @@

use League\CommonMark\Extension\CommonMark\Node\Inline\Code;
use League\CommonMark\Node\Inline\Text;
use League\CommonMark\Parser\Cursor;
use League\CommonMark\Parser\Inline\InlineParserInterface;
use League\CommonMark\Parser\Inline\InlineParserMatch;
use League\CommonMark\Parser\InlineParserContext;

final class BacktickParser implements InlineParserInterface
{
/**
* Max bound for backtick code span delimiters.
*
* @see https://github.com/commonmark/cmark/commit/8ed5c9d
*/
private const MAX_BACKTICKS = 1000;

/** @var \WeakReference<Cursor>|null */
private ?\WeakReference $lastCursor = null;
private bool $lastCursorScanned = false;

/** @var array<int, int> backtick count => position of known ender */
private array $seenBackticks = [];

public function getMatchDefinition(): InlineParserMatch
{
return InlineParserMatch::regex('`+');
Expand All @@ -38,11 +53,7 @@ public function parse(InlineParserContext $inlineContext): bool
$currentPosition = $cursor->getPosition();
$previousState = $cursor->saveState();

while ($matchingTicks = $cursor->match('/`+/m')) {
if ($matchingTicks !== $ticks) {
continue;
}

if ($this->findMatchingTicks(\strlen($ticks), $cursor)) {
$code = $cursor->getSubstring($currentPosition, $cursor->getPosition() - $currentPosition - \strlen($ticks));

$c = \preg_replace('/\n/m', ' ', $code) ?? '';
Expand All @@ -67,4 +78,55 @@ public function parse(InlineParserContext $inlineContext): bool

return true;
}

/**
* Locates the matching closer for a backtick code span.
*
* Leverages some caching to avoid traversing the same cursor multiple times when
* we've already seen all the potential backtick closers.
*
* @see https://github.com/commonmark/cmark/commit/8ed5c9d
*
* @param int $openTickLength Number of backticks in the opening sequence
* @param Cursor $cursor Cursor to scan
*
* @return bool True if a matching closer was found, false otherwise
*/
private function findMatchingTicks(int $openTickLength, Cursor $cursor): bool
{
// Reset the seenBackticks cache if this is a new cursor
if ($this->lastCursor === null || $this->lastCursor->get() !== $cursor) {
$this->seenBackticks = [];
$this->lastCursor = \WeakReference::create($cursor);
$this->lastCursorScanned = false;
}

if ($openTickLength > self::MAX_BACKTICKS) {
return false;
}

// Return if we already know there's no closer
if ($this->lastCursorScanned && isset($this->seenBackticks[$openTickLength]) && $this->seenBackticks[$openTickLength] <= $cursor->getPosition()) {
return false;
}

while ($ticks = $cursor->match('/`{1,' . self::MAX_BACKTICKS . '}/m')) {
$numTicks = \strlen($ticks);

// Did we find the closer?
if ($numTicks === $openTickLength) {
return true;
}

// Store position of closer
if ($numTicks <= self::MAX_BACKTICKS) {
$this->seenBackticks[$numTicks] = $cursor->getPosition() - $numTicks;
}
}

// Got through whole input without finding closer
$this->lastCursorScanned = true;

return false;
}
}

0 comments on commit 540d850

Please sign in to comment.