Skip to content

Commit

Permalink
Implement verbatim string literals
Browse files Browse the repository at this point in the history
  • Loading branch information
benley committed Nov 25, 2016
1 parent 8db69ed commit dd23c88
Show file tree
Hide file tree
Showing 11 changed files with 112 additions and 2 deletions.
2 changes: 1 addition & 1 deletion core/ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ struct LiteralNumber : public AST {
/** Represents JSON strings. */
struct LiteralString : public AST {
String value;
enum TokenKind { SINGLE, DOUBLE, BLOCK };
enum TokenKind { SINGLE, DOUBLE, BLOCK, VERBATIM_SINGLE, VERBATIM_DOUBLE };
TokenKind tokenKind;
std::string blockIndent; // Only contains ' ' and '\t'.
std::string blockTermIndent; // Only contains ' ' and '\t'.
Expand Down
4 changes: 3 additions & 1 deletion core/desugarer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,9 @@ class Desugarer {
// Nothing to do.

} else if (auto *ast = dynamic_cast<LiteralString*>(ast_)) {
if (ast->tokenKind != LiteralString::BLOCK) {
if ((ast->tokenKind != LiteralString::BLOCK) &&
(ast->tokenKind != LiteralString::VERBATIM_DOUBLE) &&
(ast->tokenKind != LiteralString::VERBATIM_SINGLE)) {
ast->value = jsonnet_string_unescape(ast->location, ast->value);
}
ast->tokenKind = LiteralString::DOUBLE;
Expand Down
26 changes: 26 additions & 0 deletions core/formatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,30 @@ class Unparser {
}
}
o << ast->blockTermIndent << "|||";
} else if (ast->tokenKind == LiteralString::VERBATIM_DOUBLE) {
o << "@\"";
for (const char32_t *cp = ast->value.c_str() ; *cp != U'\0' ; ++cp) {
if (*cp == U'"') {
o << "\"\"";
} else {
std::string utf8;
encode_utf8(*cp, utf8);
o << utf8;
}
}
o << "\"";
} else if (ast->tokenKind == LiteralString::VERBATIM_SINGLE) {
o << "@'";
for (const char32_t *cp = ast->value.c_str() ; *cp != U'\0' ; ++cp) {
if (*cp == U'\'') {
o << "''";
} else {
std::string utf8;
encode_utf8(*cp, utf8);
o << utf8;
}
}
o << "'";
}

} else if (dynamic_cast<const LiteralNull*>(ast_)) {
Expand Down Expand Up @@ -613,6 +637,8 @@ class EnforceStringStyle : public FmtPass {
void visit(LiteralString *lit)
{
if (lit->tokenKind == LiteralString::BLOCK) return;
if (lit->tokenKind == LiteralString::VERBATIM_DOUBLE) return;
if (lit->tokenKind == LiteralString::VERBATIM_SINGLE) return;
String canonical = jsonnet_string_unescape(lit->location, lit->value);
unsigned num_single = 0, num_double = 0;
for (char32_t c : canonical) {
Expand Down
32 changes: 32 additions & 0 deletions core/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,38 @@ Tokens jsonnet_lex(const std::string &filename, const char *input)
}
break;

// Verbatim string literals.
case '@': {
c++;
if (*c != '"' && *c != '\'') {
std::stringstream ss;
ss << "Couldn't lex verbatim string, junk after '@': " << *c;
throw StaticError(filename, begin, ss.str());
}
const char quot = *c;
c++; // Advance beyond the opening quote.
for (; ; ++c) {
if (*c == '\0') {
throw StaticError(filename, begin, "Unterminated verbatim string");
}
if (*c == quot) {
if (*(c+1) == quot) {
c++;
} else {
break;
}
}
data += *c;
}
c++; // Advance beyond the closing quote.
if (quot == '"') {
kind = Token::VERBATIM_STRING_DOUBLE;
} else {
kind = Token::VERBATIM_STRING_SINGLE;
}
}
break;

// Keywords
default:
if (is_identifier_first(*c)) {
Expand Down
4 changes: 4 additions & 0 deletions core/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ struct Token {
STRING_DOUBLE,
STRING_SINGLE,
STRING_BLOCK,
VERBATIM_STRING_SINGLE,
VERBATIM_STRING_DOUBLE,

// Keywords
ASSERT,
Expand Down Expand Up @@ -176,6 +178,8 @@ struct Token {
case OPERATOR: return "OPERATOR";
case STRING_SINGLE: return "STRING_SINGLE";
case STRING_DOUBLE: return "STRING_DOUBLE";
case VERBATIM_STRING_SINGLE: return "VERBATIM_STRING_SINGLE";
case VERBATIM_STRING_DOUBLE: return "VERBATIM_STRING_DOUBLE";
case STRING_BLOCK: return "STRING_BLOCK";

case ASSERT: return "assert";
Expand Down
32 changes: 32 additions & 0 deletions core/lexer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,38 @@ TEST(Lexer, TestSingleStrings)
"'hi", {}, "single string 'hi:1:1: Unterminated string");
}

TEST(Lexer, TestVerbatimDoubleStrings)
{
testLex("verbatim double string @\"hi\"",
"@\"hi\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi")}, "");
testLex("verbatim double string @\"hi nl\"",
"@\"hi\n\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi\n")}, "");
testLex("verbatim double string @\"hi\\\"",
"@\"hi\\\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi\\")}, "");
testLex("verbatim double string @\"hi\\\\\"",
"@\"hi\\\\\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi\\\\")}, "");
testLex("verbatim double string @\"hi\"\"\"",
"@\"hi\"\"\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi\"")}, "");
testLex("verbatim double string @\"\"\"hi\"",
"@\"\"\"hi\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "\"hi")}, "");
}

TEST(Lexer, TestVerbatimSingleStrings)
{
testLex("verbatim single string @'hi'",
"@'hi'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi")}, "");
testLex("verbatim single string @'hi nl'",
"@'hi\n'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi\n")}, "");
testLex("verbatim single string @'hi\\'",
"@'hi\\'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi\\")}, "");
testLex("verbatim single string @'hi\\\\'",
"@'hi\\\\'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi\\\\")}, "");
testLex("verbatim single string @'hi'''",
"@'hi'''", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi'")}, "");
testLex("verbatim single string @'''hi'",
"@'''hi'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "'hi")}, "");
}

TEST(Lexer, TestBlockStringSpaces)
{
const char str[] =
Expand Down
7 changes: 7 additions & 0 deletions core/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,13 @@ class Parser {
return alloc->make<LiteralString>(
span(tok), tok.fodder, tok.data32(), LiteralString::BLOCK,
tok.stringBlockIndent, tok.stringBlockTermIndent);
case Token::VERBATIM_STRING_SINGLE:
return alloc->make<LiteralString>(
span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_SINGLE, "", "");
case Token::VERBATIM_STRING_DOUBLE:
return alloc->make<LiteralString>(
span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_DOUBLE, "", "");


case Token::FALSE:
return alloc->make<LiteralBoolean>(span(tok), tok.fodder, false);
Expand Down
1 change: 1 addition & 0 deletions test_suite/stdlib.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ std.assertEqual(
"[empty]\n[s1]\nx = 11\ny = 22\nz = 33\n[s2]\np = yes\nq = \n") &&

std.assertEqual(std.escapeStringJson("hello"), "\"hello\"") &&
std.assertEqual(std.escapeStringJson("hello"), @'"hello"') &&
std.assertEqual(std.escapeStringJson("he\"llo"), "\"he\\\"llo\"") &&
std.assertEqual(std.escapeStringJson("he\"llo"), "\"he\\\"llo\"") &&
std.assertEqual(std.escapeStringBash("he\"l'lo"), "'he\"l'\"'\"'lo'") &&
Expand Down
2 changes: 2 additions & 0 deletions test_suite/unparse.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ limitations under the License.
zero: 0,
string: "'foo\n bar\n\n\"bar\u0005\"\'\t \u0050\b\f\r\\",
string2: '"foo\n bar\n\n\'bar\u0005\"\'\t \u0050\b\f\r\\',
string3: @'"foo\n bar\n\n''bar\u0005\"''\t \u0050\b\f\r\\',
string4: @"'foo\n bar\n\n'bar\u0005""'\t \u0050\b\f\r\\",
"lit_field1": 1,
'lit_field2': 1,
"false": false,
Expand Down
2 changes: 2 additions & 0 deletions test_suite/unparse.jsonnet.fmt.golden
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ limitations under the License.
zero: 0,
string: "'foo\n bar\n\n\"bar\u0005\"\'\t \u0050\b\f\r\\",
string2: '"foo\n bar\n\n\'bar\u0005\"\'\t \u0050\b\f\r\\',
string3: @'"foo\n bar\n\n''bar\u0005\"''\t \u0050\b\f\r\\',
string4: @"'foo\n bar\n\n'bar\u0005""'\t \u0050\b\f\r\\",
lit_field1: 1,
lit_field2: 1,
"false": false,
Expand Down
2 changes: 2 additions & 0 deletions test_suite/unparse.jsonnet.golden
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
"small_number": 1e-14,
"string": "'foo\n bar\n\n\"bar\u0005\"'\t P\b\f\r\\",
"string2": "\"foo\n bar\n\n'bar\u0005\"'\t P\b\f\r\\",
"string3": "\"foo\\n bar\\n\\n'bar\\u0005\\\"'\\t \\u0050\\b\\f\\r\\\\",
"string4": "'foo\\n bar\\n\\n'bar\\u0005\"'\\t \\u0050\\b\\f\\r\\\\",
"true": true,
"with\"quote": "\"",
"zero": 0
Expand Down

0 comments on commit dd23c88

Please sign in to comment.