Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions include/wabt/wast-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class WastLexer {

private:
static const int kEof = -1;
enum class CharClass { Reserved = 1, Keyword = 2, HexDigit = 4, Digit = 8 };
enum class CharClass { IdChar = 1, Keyword = 2, HexDigit = 4, Digit = 8 };

Location GetLocation();
std::string_view GetText(size_t offset = 0);
Expand All @@ -76,12 +76,16 @@ class WastLexer {
static bool IsDigit(int c) { return IsCharClass(c, CharClass::Digit); }
static bool IsHexDigit(int c) { return IsCharClass(c, CharClass::HexDigit); }
static bool IsKeyword(int c) { return IsCharClass(c, CharClass::Keyword); }
static bool IsReserved(int c) { return IsCharClass(c, CharClass::Reserved); }
static bool IsIdChar(int c) { return IsCharClass(c, CharClass::IdChar); }

bool ReadNum();
bool ReadHexNum();
int ReadReservedChars();
bool NoTrailingReservedChars() { return ReadReservedChars() == 0; }

enum class ReservedChars { None, Some, Id };
ReservedChars ReadReservedChars();
bool NoTrailingReservedChars() {
return ReadReservedChars() == ReservedChars::None;
}
void ReadSign();
Token GetStringToken(WastParser*);
Token GetNumberToken(TokenType);
Expand Down
43 changes: 30 additions & 13 deletions src/wast-lexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
#include "wabt/lexer-source.h"
#include "wabt/wast-parser.h"

#define ERROR(...) parser->Error(GetLocation(), __VA_ARGS__)
#define ERROR(...) \
if (parser) \
parser->Error(GetLocation(), __VA_ARGS__)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the change in tokenization, ReadReservedChars() now needs to be able to call GetStringToken() if it sees a " character, but GetStringToken takes a WastParser named parser so it can call the ERROR macro, and ReadReservedChars() is called in a whole bunch of places and takes no arguments. So... this makes it possible for ReadReservedChars() to call GetStringToken(nullptr) without causing a crash if that wants to log an error.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But won't this mean that such errors can/will be lost?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, now that strings can be part of a reserved token, we would lose the log message if an invalid string is found while running NoTrailingReservedChars(). I think the best/cleanest fix for this is #2013, which just makes the WastLexer work similar to a WastParser (it receives an Errors* on construction and stores it so it can log errors from any context).


namespace wabt {

Expand Down Expand Up @@ -165,7 +167,7 @@ Token WastLexer::GetToken(WastParser* parser) {
default:
if (IsKeyword(PeekChar())) {
return GetKeywordToken();
} else if (IsReserved(PeekChar())) {
} else if (IsIdChar(PeekChar())) {
return GetReservedToken();
} else {
ReadChar();
Expand Down Expand Up @@ -310,6 +312,10 @@ Token WastLexer::GetStringToken(WastParser* parser) {
continue;

case '"':
if (PeekChar() == '"') {
ERROR("invalid string token");
has_error = true;
}
in_string = false;
break;

Expand Down Expand Up @@ -417,13 +423,13 @@ bool WastLexer::IsCharClass(int c, CharClass bit) {
// def IsDigit(c): return Range(c, '0', '9')
// def IsHexDigit(c): return IsDigit(c) or Range(c.lower(), 'a', 'f')
// def IsKeyword(c): return Range(c, 'a', 'z')
// def IsReserved(c): return Range(c, '!', '~') and c not in '"(),;[]{}'
// def IsIdChar(c): return Range(c, '!', '~') and c not in '"(),;[]{}'
//
// print ([0] + [
// (8 if IsDigit(c) else 0) |
// (4 if IsHexDigit(c) else 0) |
// (2 if IsKeyword(c) else 0) |
// (1 if IsReserved(c) else 0)
// (1 if IsIdChar(c) else 0)
// for c in map(chr, range(0, 127))
// ])
static const char kCharClasses[257] = {
Expand Down Expand Up @@ -456,13 +462,23 @@ bool WastLexer::ReadHexNum() {
return false;
}

int WastLexer::ReadReservedChars() {
int count = 0;
while (IsReserved(PeekChar())) {
ReadChar();
++count;
WastLexer::ReservedChars WastLexer::ReadReservedChars() {
ReservedChars ret{ReservedChars::None};
while (true) {
auto peek = PeekChar();
if (IsIdChar(peek)) {
ReadChar();
if (ret == ReservedChars::None) {
ret = ReservedChars::Id;
}
} else if (peek == '"') {
GetStringToken(nullptr);
ret = ReservedChars::Some;
} else {
break;
}
}
return count;
return ret;
}

void WastLexer::ReadSign() {
Expand Down Expand Up @@ -562,10 +578,11 @@ Token WastLexer::GetNameEqNumToken(std::string_view name,

Token WastLexer::GetIdToken() {
ReadChar();
if (NoTrailingReservedChars()) {
return TextToken(TokenType::Reserved);
if (ReadReservedChars() == ReservedChars::Id) {
return TextToken(TokenType::Var);
}
return TextToken(TokenType::Var);

return TextToken(TokenType::Reserved);
}

Token WastLexer::GetKeywordToken() {
Expand Down
113 changes: 113 additions & 0 deletions test/spec/tokens.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
;;; TOOL: run-interp-spec
;;; STDIN_FILE: third_party/testsuite/tokens.wast
(;; STDOUT ;;;
out/test/spec/tokens.wast:74: assert_malformed passed:
out/test/spec/tokens/tokens.17.wat:1:41: error: unexpected token "0$l", expected a var (e.g. 12 or $foo).
(func (block $l (i32.const 0) (br_table 0$l)))
^^^
out/test/spec/tokens.wast:84: assert_malformed passed:
out/test/spec/tokens/tokens.19.wat:1:41: error: undefined label variable "$l0"
(func (block $l (i32.const 0) (br_table $l0)))
^^^
out/test/spec/tokens.wast:94: assert_malformed passed:
out/test/spec/tokens/tokens.21.wat:1:41: error: undefined label variable "$l$l"
(func (block $l (i32.const 0) (br_table $l$l)))
^^^^
out/test/spec/tokens.wast:114: assert_malformed passed:
out/test/spec/tokens/tokens.25.wat:1:2: error: unexpected token "data"a"", expected a module field or a module.
(data"a")
^^^^^^^
out/test/spec/tokens/tokens.25.wat:1:9: error: unexpected token ), expected EOF.
(data"a")
^
out/test/spec/tokens.wast:124: assert_malformed passed:
out/test/spec/tokens/tokens.27.wat:1:7: error: unexpected token $l"a", expected ).
(data $l"a")
^^^^^
out/test/spec/tokens.wast:134: assert_malformed passed:
out/test/spec/tokens/tokens.29.wat:1:7: error: unexpected token $l" a", expected ).
(data $l" a")
^^^^^^
out/test/spec/tokens.wast:144: assert_malformed passed:
out/test/spec/tokens/tokens.31.wat:1:7: error: unexpected token $l"a ", expected ).
(data $l"a ")
^^^^^^
out/test/spec/tokens.wast:154: assert_malformed passed:
out/test/spec/tokens/tokens.33.wat:1:7: error: unexpected token $l"a ""b", expected ).
(data $l"a ""b")
^^^^^^^^^
out/test/spec/tokens.wast:164: assert_malformed passed:
out/test/spec/tokens/tokens.35.wat:1:7: error: unexpected token $l"", expected ).
(data $l"")
^^^^^^^^^^
out/test/spec/tokens.wast:174: assert_malformed passed:
out/test/spec/tokens/tokens.37.wat:1:7: error: unexpected token $l" ", expected ).
(data $l" ")
^^^^^^^^^^^
out/test/spec/tokens.wast:184: assert_malformed passed:
out/test/spec/tokens/tokens.39.wat:1:7: error: unexpected token $l" ", expected ).
(data $l" ")
^^^^^^^^^^^
out/test/spec/tokens.wast:194: assert_malformed passed:
out/test/spec/tokens/tokens.41.wat:1:7: error: invalid string token
(data "a""b")
^^^
out/test/spec/tokens/tokens.41.wat:1:7: error: unexpected token Invalid, expected ).
(data "a""b")
^^^
out/test/spec/tokens.wast:204: assert_malformed passed:
out/test/spec/tokens/tokens.43.wat:1:7: error: invalid string token
(data "a"" b")
^^^
out/test/spec/tokens/tokens.43.wat:1:7: error: unexpected token Invalid, expected ).
(data "a"" b")
^^^
out/test/spec/tokens.wast:214: assert_malformed passed:
out/test/spec/tokens/tokens.45.wat:1:7: error: invalid string token
(data "a ""b")
^^^^
out/test/spec/tokens/tokens.45.wat:1:7: error: unexpected token Invalid, expected ).
(data "a ""b")
^^^^
out/test/spec/tokens.wast:224: assert_malformed passed:
out/test/spec/tokens/tokens.47.wat:1:7: error: invalid string token
(data """")
^^^^^^^^
out/test/spec/tokens/tokens.47.wat:1:7: error: unexpected token Invalid, expected ).
(data """")
^^^^^^^^
out/test/spec/tokens.wast:234: assert_malformed passed:
out/test/spec/tokens/tokens.49.wat:1:7: error: invalid string token
(data """ ")
^^^^^^^^
out/test/spec/tokens/tokens.49.wat:1:7: error: unexpected token Invalid, expected ).
(data """ ")
^^^^^^^^
out/test/spec/tokens.wast:244: assert_malformed passed:
out/test/spec/tokens/tokens.51.wat:1:7: error: invalid string token
(data " """)
^^^^^^^^^
out/test/spec/tokens/tokens.51.wat:1:7: error: unexpected token Invalid, expected ).
(data " """)
^^^^^^^^^
out/test/spec/tokens.wast:252: assert_malformed passed:
out/test/spec/tokens/tokens.52.wat:1:7: error: unexpected token "a", expected ).
(func "a"x)
^^^
out/test/spec/tokens/tokens.52.wat:1:10: error: unexpected token x.
(func "a"x)
^
out/test/spec/tokens.wast:258: assert_malformed passed:
out/test/spec/tokens/tokens.53.wat:1:7: error: unexpected token "a", expected ).
(func "a"0)
^^^
out/test/spec/tokens.wast:264: assert_malformed passed:
out/test/spec/tokens/tokens.54.wat:1:7: error: unexpected token 0"a", expected ).
(func 0"a")
^^^^
out/test/spec/tokens.wast:270: assert_malformed passed:
out/test/spec/tokens/tokens.55.wat:1:7: error: unexpected token "a", expected ).
(func "a"$x)
^^^
56/56 tests passed.
;;; STDOUT ;;)
5 changes: 5 additions & 0 deletions test/wasm2c/spec/tokens.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
;;; TOOL: run-spec-wasm2c
;;; STDIN_FILE: third_party/testsuite/tokens.wast
(;; STDOUT ;;;
0/0 tests passed.
;;; STDOUT ;;)