Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow \n literals in strings #21

Merged
merged 7 commits into from
Feb 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified bin/bench
100644 → 100755
Empty file.
Empty file modified bin/buildparser
100644 → 100755
Empty file.
Empty file modified bin/phpstan
100644 → 100755
Empty file.
Empty file modified bin/registerbin
100644 → 100755
Empty file.
Empty file modified bin/test
100644 → 100755
Empty file.
Empty file modified bin/test_smoke
100644 → 100755
Empty file.
Empty file modified bin/test_unit
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion example/bench_all.primi
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ xb_2 = b - i_2;
xc = c - j;
xc_k = c - k;
l = "xoxoxomilanobergamo,anno:domini. Anno, neštvi ma.".string_replace(r"ann?o", "FAIL");
m = "\ahoj\n\vo\le" - r"\\ahoj\s";
m = "\\ahoj\n\\vo\\le" - r"\\ahoj\s";
n = "a/b/c" - r"\/b"; // Test proper handling of escaped regex delimiters.
a = 4;
b = 5;
Expand Down
Empty file modified primi
100644 → 100755
Empty file.
5 changes: 4 additions & 1 deletion src/handlers/StringLiteral.php
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
<?php

declare(strict_types = 1);

namespace Smuuf\Primi\Handlers;

use \Smuuf\Primi\Structures\StringValue;
use \Smuuf\Primi\Structures\NumberValue;
use \Smuuf\Primi\Helpers\StringEscaping;
use \Smuuf\Primi\Context;

class StringLiteral extends \Smuuf\Primi\StrictObject implements IHandler {
Expand All @@ -16,6 +18,7 @@ public static function handle(array $node, Context $context) {
// Using trim("\"'", ...) would make "abc'" into abc instead of abc',
// so do this a little more directly.
$value = \mb_substr($content, 1, \mb_strlen($content) - 2);
$value = StringEscaping::unescapeString($value);

return new StringValue($value);

Expand Down
102 changes: 102 additions & 0 deletions src/helpers/StringEscaping.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
<?php

declare(strict_types = 1);

namespace Smuuf\Primi\Helpers;

use \Smuuf\Primi\ErrorException;

class StringEscaping extends \Smuuf\Primi\StrictObject {

/**
* Escape sequences that are supported in string literals.
*
* Left side represents character after a backslash which together form a
* supported escape sequence. Right side represents a result of such
* sequence.
*
* @const
*/
private const ESCAPE_PAIRS = [
'\\\\' => "\\",
'\\n' => "\n",
'\\t' => "\t",
'\\"' => '"',
"\\'" => "'",
];

private const QUOTE_CHARS = ['"', "'"];

/**
* Return the provided string but with known escape sequences being expanded
* to their literal meaning. For example `\n` will be expanded to literal
* new-line.
*/
public static function unescapeString(string $str): string {

return \preg_replace_callback('#(\\\\.)#', function($m) {

$char = $m[1];
foreach (self::ESCAPE_PAIRS as $in => $out) {
if ($char === $in) {
return $out;
}
}

// The backslashed character doesn't represent any known escape
// sequence, therefore error.
throw new ErrorException(
"Unrecognized string escape sequence '{$m[0]}'."
);

}, $str);

}

/**
* The string provided as an argument will be returned, but with added
* escaping for characters that represent a known escape sequence. For
* example a literal new-line will be replaced by `\n`.
*
* This is useful for converting a internal string value of StringValue to
* a source code representaton of it - that is how a string literal would
* have to be written by hand for it to be - as a result - interpreted as
* that original string).
*
* If a third optional $quoteChar argument is passed, all other known
* quote characters will NOT be escaped - only the one specified by the
* third argument. The only known quote characters are `'` and `"`. For
* example the string `hello'there"` without $quoteChar specified will
* be escaped as `hello\'there\"`. But with $quoteChar being `"` it would
* result in `hello'there\"`, since the caller used the third argument to
* specify that the single-quote does NOT have to be escaped.
*/
public static function escapeString(
string $str,
string $quoteChar = null
): string {

foreach (self::ESCAPE_PAIRS as $out => $in) {

// $in = <new line>
// $out = '\n'

if (
$quoteChar !== null
&& in_array($in, self::QUOTE_CHARS, true)
&& $in !== $quoteChar
) {
// Do not escape quote characters that aren't necessary to be
// escaped.
continue;
}

$str = str_replace($in, $out, $str);

}

return $str;

}

}
4 changes: 2 additions & 2 deletions src/parser/CompiledParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ class CompiledParser extends Parser\Packrat {
/** @var string **/
public $string;

/* StringLiteral: / (?:".*?(?<!\\)")|(?:'.*?(?<!\\)') /s */
/* StringLiteral: / ("[^"\\]*(\\.[^"\\]*)*")|('[^'\\]*(\\.[^'\\]*)*') /s */
protected $match_StringLiteral_typestack = array('StringLiteral');
function match_StringLiteral ($stack = []) {
$matchrule = "StringLiteral"; $result = $this->construct($matchrule, $matchrule, \null);
if (( $subres = $this->rx( '/ (?:".*?(?<!\\\\)")|(?:\'.*?(?<!\\\\)\') /s' ) ) !== \false) {
if (( $subres = $this->rx( '/ ("[^"\\\\]*(\\\\.[^"\\\\]*)*")|(\'[^\'\\\\]*(\\\\.[^\'\\\\]*)*\') /s' ) ) !== \false) {
$result["text"] .= $subres;
return $this->finalise($result);
}
Expand Down
2 changes: 1 addition & 1 deletion src/parser/Grammar.peg
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class CompiledParser extends Parser\Packrat {

# Carefully crafted (or stolen somewhere?) string matching regex which supports
# both " and ' quotes, doesn't cause JIT stack overflow and also supports escaped quotes.
StringLiteral: / (?:".*?(?<!\\)")|(?:'.*?(?<!\\)') /s
StringLiteral: / ("[^"\\]*(\\.[^"\\]*)*")|('[^'\\]*(\\.[^'\\]*)*') /s
NumberLiteral: /-?\d+(\.\d+)?/
BoolLiteral: "true" | "false"
NullLiteral: "null"
Expand Down
10 changes: 7 additions & 3 deletions src/structures/StringValue.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Smuuf\Primi\Structures;

use \Smuuf\Primi\Helpers\Common;
use \Smuuf\Primi\Helpers\StringEscaping;
use \Smuuf\Primi\ISupportsComparison;
use \Smuuf\Primi\ISupportsAddition;
use \Smuuf\Primi\ISupportsSubtraction;
Expand All @@ -22,14 +23,14 @@ class StringValue extends Value implements
const TYPE = "string";

public function __construct(string $value) {
$this->value = self::expandSequences($value);
$this->value = $value;
}

public function getStringValue(): string {

// We are about to put double-quotes around the return value,
// so let's "escape" double-quotes present in the string value.
$escaped = \str_replace('"', '\"', $this->value);
$escaped = StringEscaping::escapeString($this->value, '"');
return "\"$escaped\"";

}
Expand Down Expand Up @@ -159,7 +160,10 @@ public function getIterator(): \Iterator {
protected static function expandSequences(string $string) {

// Primi strings support some escape sequences.
return \str_replace('\n', "\n", $string);
//return \str_replace('\n', "\n", $string);
//return \preg_replace('#(?<!\\\\)\\\\n#', "\n", $string);
$string = \preg_replace('#(?<!\\\\)\\\\n#', "\n", $string);
return \str_replace('\\\\', "\\", $string);

}

Expand Down
5 changes: 5 additions & 0 deletions tests/language/suites/extensions/ext.string.expect
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,8 @@ m:BoolValue:true
n:BoolValue:false
o:StringValue:"1,2,3"
p:StringValue:"1,nested,stuff-yeah,3"
esc_1:StringValue:"\n"
esc_2:StringValue:"\\n"
esc_3:StringValue:"\n\n"
esc_4:StringValue:"\\n\\n"
esc_5:StringValue:"\n\\n\t\\"
6 changes: 6 additions & 0 deletions tests/language/suites/extensions/ext.string.primi
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,9 @@ n = "ahoj ahoj ahoj!".contains(r"(čau ){2}!?");

o = ",".join([1, 2, 3]);
p = ",".join([1, ['nested', "stuff-{}".format("yeah")], 3]);

esc_1 = "\n"
esc_2 = "\\n"
esc_3 = "\n" + "\n"
esc_4 = "\\n" + "\\n"
esc_5 = "\n" + "\\n\t\\"
2 changes: 1 addition & 1 deletion tests/language/suites/structures/regexes.expect
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ xb_2:StringValue:"ab123dfg"
xc:StringValue:"xyzčaukomňauko"
xc_k:StringValue:"xyz456čako"
l:StringValue:"xoxoxomilFAILbergamo,FAIL:domini. Anno, neštvi ma."
m:StringValue:"\vo\le"
m:StringValue:"\\vo\\le"
n:StringValue:"a/c"
o:StringValue:"00011"
p:StringValue:"00011234455"
Expand Down
2 changes: 1 addition & 1 deletion tests/language/suites/structures/regexes.primi
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ xc = c - j;
xc_k = c - k;

l = "xoxoxomilanobergamo,anno:domini. Anno, neštvi ma.".string_replace(r"ann?o", "FAIL");
m = "\ahoj\n\vo\le" - r"\\ahoj\s";
m = "\\ahoj\n\\vo\\le" - r"\\ahoj\s";
n = "a/b/c" - r"/b"; // Test proper handling of escaped regex delimiters.

o = "[0-9]{5}".to_regex().match("abc00011234455");
Expand Down
2 changes: 0 additions & 2 deletions tests/unit/method.helper.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ use \Tester\Assert;
use \Smuuf\Primi\Helpers\Common;
use \Smuuf\Primi\Structures\StringValue;
use \Smuuf\Primi\Structures\Value;
use \Smuuf\Primi\Structures\RegexValue;
use \Smuuf\Primi\ErrorException;

require __DIR__ . '/../bootstrap.php';

Expand Down
28 changes: 23 additions & 5 deletions tests/unit/value.string.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,33 @@ $fns = ExtensionHub::get();
$string = new StringValue("this is a string.");
$letterA = new StringValue("a");
$unicode = new StringValue("ťhiš íš á ŠTřing.");
$withNewline = new StringValue('a \n b');

//
// Test sequence expanding...
// Escaping sequences are NOT handled inside StringValue, but instead during the
// handling of source code string literal.
//

Assert::same(2, count(explode("\n", get_val($withNewline))));
$stringsWithEscapeSequences = [
'a\nb',
'a \n b',
'a \\n b',
'a\\n\nb',
'a\\\n\t\\tb',
];
foreach ($stringsWithEscapeSequences as $s) {
Assert::same($s, get_val(new StringValue($s)));
}

// Get correct repr - things should be quoted and escaped properly.
// REMEMBER: Escape characters are NOT HANDLED when creating StringValue
// objects. Whatever is put into StringValue as argument will literally be
// what's inside.
Assert::same('"\""', (new StringValue('"'))->getStringValue());
Assert::same('"\\\\\""', (new StringValue('\\"'))->getStringValue());
Assert::same('"\\\\\'"', (new StringValue("\'"))->getStringValue());
Assert::same('"\\\\\\\\\'"', (new StringValue("\\\\'"))->getStringValue());
Assert::same('"\\\\n"', (new StringValue('\n'))->getStringValue());
Assert::same('"\\n"', (new StringValue("\n"))->getStringValue());

//
// Test adding and subtracting...
Expand Down Expand Up @@ -267,8 +287,6 @@ Assert::same(17, get_val($fns['string_length']->invoke([$string])));
Assert::same(1, get_val($fns['string_length']->invoke([$letterA])));
// Multibyte strings should report length correctly.
Assert::same(17, get_val($fns['string_length']->invoke([$unicode])));
// "\n" is expanded as newline - that's one character.
Assert::same(5, get_val($fns['string_length']->invoke([$withNewline])));

//
// Test replacing.
Expand Down