diff --git a/CHANGELOG.md b/CHANGELOG.md index e5c79e7f07..7024144cb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -110,6 +110,7 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi - Fixed quadratic complexity parsing emphasis and strikethrough delimiters - Fixed issue where having 500,000+ delimiters could trigger a [known segmentation fault issue in PHP's garbage collection](https://bugs.php.net/bug.php?id=68606) - Fixed quadratic complexity deactivating link openers +- Fixed quadratic complexity parsing long backtick code spans with no matching closers - Fixed catastrophic backtracking when parsing link labels/titles ## [2.4.1] - 2023-08-30 diff --git a/src/Extension/CommonMark/Parser/Inline/BacktickParser.php b/src/Extension/CommonMark/Parser/Inline/BacktickParser.php index 9618f2e676..3324fe39d0 100644 --- a/src/Extension/CommonMark/Parser/Inline/BacktickParser.php +++ b/src/Extension/CommonMark/Parser/Inline/BacktickParser.php @@ -18,12 +18,27 @@ use League\CommonMark\Extension\CommonMark\Node\Inline\Code; use League\CommonMark\Node\Inline\Text; +use League\CommonMark\Parser\Cursor; use League\CommonMark\Parser\Inline\InlineParserInterface; use League\CommonMark\Parser\Inline\InlineParserMatch; use League\CommonMark\Parser\InlineParserContext; final class BacktickParser implements InlineParserInterface { + /** + * Max bound for backtick code span delimiters. + * + * @see https://github.com/commonmark/cmark/commit/8ed5c9d + */ + private const MAX_BACKTICKS = 1000; + + /** @var \WeakReference|null */ + private ?\WeakReference $lastCursor = null; + private bool $lastCursorScanned = false; + + /** @var array backtick count => position of known ender */ + private array $seenBackticks = []; + public function getMatchDefinition(): InlineParserMatch { return InlineParserMatch::regex('`+'); @@ -38,11 +53,7 @@ public function parse(InlineParserContext $inlineContext): bool $currentPosition = $cursor->getPosition(); $previousState = $cursor->saveState(); - while ($matchingTicks = $cursor->match('/`+/m')) { - if ($matchingTicks !== $ticks) { - continue; - } - + if ($this->findMatchingTicks(\strlen($ticks), $cursor)) { $code = $cursor->getSubstring($currentPosition, $cursor->getPosition() - $currentPosition - \strlen($ticks)); $c = \preg_replace('/\n/m', ' ', $code) ?? ''; @@ -67,4 +78,55 @@ public function parse(InlineParserContext $inlineContext): bool return true; } + + /** + * Locates the matching closer for a backtick code span. + * + * Leverages some caching to avoid traversing the same cursor multiple times when + * we've already seen all the potential backtick closers. + * + * @see https://github.com/commonmark/cmark/commit/8ed5c9d + * + * @param int $openTickLength Number of backticks in the opening sequence + * @param Cursor $cursor Cursor to scan + * + * @return bool True if a matching closer was found, false otherwise + */ + private function findMatchingTicks(int $openTickLength, Cursor $cursor): bool + { + // Reset the seenBackticks cache if this is a new cursor + if ($this->lastCursor === null || $this->lastCursor->get() !== $cursor) { + $this->seenBackticks = []; + $this->lastCursor = \WeakReference::create($cursor); + $this->lastCursorScanned = false; + } + + if ($openTickLength > self::MAX_BACKTICKS) { + return false; + } + + // Return if we already know there's no closer + if ($this->lastCursorScanned && isset($this->seenBackticks[$openTickLength]) && $this->seenBackticks[$openTickLength] <= $cursor->getPosition()) { + return false; + } + + while ($ticks = $cursor->match('/`{1,' . self::MAX_BACKTICKS . '}/m')) { + $numTicks = \strlen($ticks); + + // Did we find the closer? + if ($numTicks === $openTickLength) { + return true; + } + + // Store position of closer + if ($numTicks <= self::MAX_BACKTICKS) { + $this->seenBackticks[$numTicks] = $cursor->getPosition() - $numTicks; + } + } + + // Got through whole input without finding closer + $this->lastCursorScanned = true; + + return false; + } }