Skip to content

Commit 2bb93a6

Browse files
committed
use PCRE MARK verb to identify token types
1 parent 8d66350 commit 2bb93a6

File tree

4 files changed

+53
-25
lines changed

4 files changed

+53
-25
lines changed

Diff for: docker/php-5.x/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ RUN apt update && apt install -y --force-yes libonig-dev libzip-dev
55
RUN docker-php-ext-install mbstring zip
66

77
RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" \
8-
&& php -r "if (hash_file('sha384', 'composer-setup.php') === 'c31c1e292ad7be5f49291169c0ac8f683499edddcfd4e42232982d0fd193004208a58ff6f353fde0012d35fdd72bc394') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \
8+
&& php -r "if (hash_file('sha384', 'composer-setup.php') === 'dac665fdc30fdd8ec78b38b9800061b4150413ff2e3b6f88543c636f7cd84f6db9189d43a81e5503cda447da73c7e5b6') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \
99
&& php composer-setup.php \
1010
&& php -r "unlink('composer-setup.php');" \
1111
&& mv composer.phar /usr/local/bin/composer

Diff for: docker/php/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ ARG PHP_VERSION=8.0
22
FROM php:$PHP_VERSION
33

44
RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');" \
5-
&& php -r "if (hash_file('sha384', 'composer-setup.php') === '906a84df04cea2aa72f40b5f787e49f22d4c2f19492ac310e8cba5b96ac8b64115ac402c8cd292b8a03482574915d1a8') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \
5+
&& php -r "if (hash_file('sha384', 'composer-setup.php') === 'dac665fdc30fdd8ec78b38b9800061b4150413ff2e3b6f88543c636f7cd84f6db9189d43a81e5503cda447da73c7e5b6') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;" \
66
&& php composer-setup.php \
77
&& php -r "unlink('composer-setup.php');" \
88
&& mv composer.phar /usr/local/bin/composer

Diff for: src/Parser/RegularParser.php

+13-23
Original file line numberDiff line numberDiff line change
@@ -349,19 +349,9 @@ private function tokenize($text)
349349

350350
$tokens = array();
351351
$position = 0;
352-
353352
foreach($matches as $match) {
354-
switch(true) {
355-
case array_key_exists('close', $match): { $token = $match['close']; $type = self::TOKEN_CLOSE; break; }
356-
case array_key_exists('open', $match): { $token = $match['open']; $type = self::TOKEN_OPEN; break; }
357-
case array_key_exists('separator', $match): { $token = $match['separator']; $type = self::TOKEN_SEPARATOR; break; }
358-
case array_key_exists('delimiter', $match): { $token = $match['delimiter']; $type = self::TOKEN_DELIMITER; break; }
359-
case array_key_exists('marker', $match): { $token = $match['marker']; $type = self::TOKEN_MARKER; break; }
360-
case array_key_exists('ws', $match): { $token = $match['ws']; $type = self::TOKEN_WS; break; }
361-
case array_key_exists('string', $match): { $token = $match['string']; $type = self::TOKEN_STRING; break; }
362-
default: { throw new \RuntimeException('Invalid token.'); }
363-
}
364-
$tokens[] = array($type, $token, $position);
353+
$token = $match[0];
354+
$tokens[] = array((int)$match['MARK'], $token, $position);
365355
$position += mb_strlen($token, 'utf-8');
366356
}
367357

@@ -373,31 +363,31 @@ private function prepareLexer(SyntaxInterface $syntax)
373363
{
374364
// FIXME: for some reason Psalm does not understand the `@psalm-var callable() $var` annotation
375365
/** @psalm-suppress MissingClosureParamType, MissingClosureReturnType */
376-
$group = function($text, $group) {
377-
return '(?<'.(string)$group.'>'.preg_replace('/(.)/us', '\\\\$0', (string)$text).')';
366+
$group = function($text) {
367+
return preg_replace('/(.)/us', '\\\\$0', (string)$text);
378368
};
379369
/** @psalm-suppress MissingClosureParamType, MissingClosureReturnType */
380370
$quote = function($text) {
381371
return preg_replace('/(.)/us', '\\\\$0', (string)$text);
382372
};
383373

384374
$rules = array(
385-
'(?<string>\\\\.|(?:(?!'.implode('|', array(
375+
'\\\\.(*:'.self::TOKEN_STRING.')|(?:(?!'.implode('|', array(
386376
$quote($syntax->getOpeningTag()),
387377
$quote($syntax->getClosingTag()),
388378
$quote($syntax->getClosingTagMarker()),
389379
$quote($syntax->getParameterValueSeparator()),
390380
$quote($syntax->getParameterValueDelimiter()),
391381
'\s+',
392-
)).').)+)',
393-
'(?<ws>\s+)',
394-
$group($syntax->getClosingTagMarker(), 'marker'),
395-
$group($syntax->getParameterValueDelimiter(), 'delimiter'),
396-
$group($syntax->getParameterValueSeparator(), 'separator'),
397-
$group($syntax->getOpeningTag(), 'open'),
398-
$group($syntax->getClosingTag(), 'close'),
382+
)).').)+(*:'.self::TOKEN_STRING.')',
383+
'\s+(*:'.self::TOKEN_WS.')',
384+
$group($syntax->getClosingTagMarker()).'(*:'.self::TOKEN_MARKER.')',
385+
$group($syntax->getParameterValueDelimiter()).'(*:'.self::TOKEN_DELIMITER.')',
386+
$group($syntax->getParameterValueSeparator()).'(*:'.self::TOKEN_SEPARATOR.')',
387+
$group($syntax->getOpeningTag()).'(*:'.self::TOKEN_OPEN.')',
388+
$group($syntax->getClosingTag()).'(*:'.self::TOKEN_CLOSE.')',
399389
);
400390

401-
return '~('.implode('|', $rules).')~us';
391+
return '~(?|'.implode('|', $rules).')~us';
402392
}
403393
}

Diff for: tests/ParserTest.php

+38
Original file line numberDiff line numberDiff line change
@@ -318,4 +318,42 @@ public function testInstances()
318318
static::assertInstanceOf('Thunder\Shortcode\Parser\WordPressParser', new WordpressParser());
319319
static::assertInstanceOf('Thunder\Shortcode\Parser\RegularParser', new RegularParser());
320320
}
321+
322+
/** @dataProvider provideBenchmarks */
323+
public function testGeneratedNesting($level, $num)
324+
{
325+
$parser = new RegularParser();
326+
$text = $this->benchmark($level, $num);
327+
328+
$time = microtime(true);
329+
$list = $parser->parse($text);
330+
$time = (microtime(true) - $time) * 1000; // ms
331+
332+
self::assertLessThan(30, $time);
333+
self::assertCount(1, $list);
334+
}
335+
336+
private function benchmark($level, $num)
337+
{
338+
for($i = 1; $i <= $level; $i++) {
339+
$text = str_repeat('[s'.$i.']'.$i, $num);
340+
}
341+
for($i = $level; $i >= 1; $i--) {
342+
$text .= str_repeat($i.'[/s'.$i.']', $num);
343+
}
344+
345+
return $text;
346+
}
347+
348+
public static function provideBenchmarks()
349+
{
350+
$cases = array();
351+
foreach(range(10, 100, 10) as $level) {
352+
foreach(range(1, 10, 1) as $num) {
353+
$cases[] = [$level, $num];
354+
}
355+
}
356+
357+
return $cases;
358+
}
321359
}

0 commit comments

Comments
 (0)