Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/wasm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set(wasm_SOURCES
wasm-stack.cpp
wasm-type.cpp
wasm-validator.cpp
wat-lexer.cpp
${wasm_HEADERS}
)
# wasm-debug.cpp includes LLVM header using std::iterator (deprecated in C++17)
Expand Down
341 changes: 100 additions & 241 deletions src/wasm/wat-parser-internal.h → src/wasm/wat-lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,6 @@
* limitations under the License.
*/

// Usage note
// ----------
//
// This parser is a work in progress and this file should not yet be included
// anywhere except for in its own tests. Once the parser is usable, we will add
// wat-parser.h to declare the public parsing API and wat-parser.cpp to
// implement the public parsing functions in terms of the private API in this
// header. The private API will stay in this header rather than moving to
// wat-parser.cpp so that we can continue to unit test it.

#include <cassert>
#include <cctype>
#include <cmath>
Expand All @@ -32,6 +22,8 @@
#include <sstream>
#include <variant>

#include "wat-lexer.h"

using namespace std::string_view_literals;

namespace wasm::WATParser {
Expand Down Expand Up @@ -106,8 +98,6 @@ struct LexCtx {
void takeAll() { lexedSize = input.size(); }
};

enum Signedness { Unsigned, Signed };

enum OverflowBehavior { DisallowOverflow, IgnoreOverflow };

std::optional<int> getDigit(char c) {
Expand Down Expand Up @@ -786,258 +776,127 @@ std::optional<LexResult> keyword(std::string_view in) {
return ctx.lexed();
}

// ======
// Tokens
// ======

struct LParenTok {
friend std::ostream& operator<<(std::ostream& os, const LParenTok&) {
return os << "'('";
}

friend bool operator==(const LParenTok&, const LParenTok&) { return true; }
};

struct RParenTok {
friend std::ostream& operator<<(std::ostream& os, const RParenTok&) {
return os << "')'";
}

friend bool operator==(const RParenTok&, const RParenTok&) { return true; }
};

struct IntTok {
uint64_t n;
Signedness signedness;

friend std::ostream& operator<<(std::ostream& os, const IntTok& tok) {
return os << tok.n << (tok.signedness == Signed ? " signed" : " unsigned");
}

friend bool operator==(const IntTok& t1, const IntTok& t2) {
return t1.n == t2.n && t1.signedness == t2.signedness;
}
};

struct FloatTok {
// The payload if we lexed a nan with payload. We cannot store the payload
// directly in `d` because we do not know at this point whether we are parsing
// an f32 or f64 and therefore we do not know what the allowable payloads are.
std::optional<uint64_t> nanPayload;
double d;

friend std::ostream& operator<<(std::ostream& os, const FloatTok& tok) {
if (std::isnan(tok.d)) {
os << (std::signbit(tok.d) ? "+" : "-");
if (tok.nanPayload) {
return os << "nan:0x" << std::hex << *tok.nanPayload << std::dec;
}
return os << "nan";
}
return os << tok.d;
}
} // anonymous namespace

friend bool operator==(const FloatTok& t1, const FloatTok& t2) {
return std::signbit(t1.d) == std::signbit(t2.d) &&
(t1.d == t2.d || (std::isnan(t1.d) && std::isnan(t2.d) &&
t1.nanPayload == t2.nanPayload));
void Lexer::skipSpace() {
if (auto ctx = space(next())) {
index += ctx->span.size();
}
};
}

struct IdTok {
friend std::ostream& operator<<(std::ostream& os, const IdTok&) {
return os << "id";
void Lexer::lexToken() {
// TODO: Ensure we're getting the longest possible match.
Token tok;
if (auto t = lparen(next())) {
tok = Token{t->span, LParenTok{}};
} else if (auto t = rparen(next())) {
tok = Token{t->span, RParenTok{}};
} else if (auto t = ident(next())) {
tok = Token{t->span, IdTok{}};
} else if (auto t = integer(next())) {
tok = Token{t->span, IntTok{t->n, t->signedness}};
} else if (auto t = float_(next())) {
tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
} else if (auto t = str(next())) {
tok = Token{t->span, StringTok{t->str}};
} else if (auto t = keyword(next())) {
tok = Token{t->span, KeywordTok{}};
} else {
// TODO: Do something about lexing errors.
curr = std::nullopt;
return;
}
index += tok.span.size();
curr = {tok};
}

friend bool operator==(const IdTok&, const IdTok&) { return true; }
};

struct StringTok {
std::optional<std::string> str;

friend std::ostream& operator<<(std::ostream& os, const StringTok& tok) {
if (tok.str) {
os << '"' << *tok.str << '"';
TextPos Lexer::position(const char* c) {
assert(size_t(c - buffer.data()) < buffer.size());
TextPos pos{1, 0};
for (const char* p = buffer.data(); p != c; ++p) {
if (*p == '\n') {
pos.line++;
pos.col = 0;
} else {
os << "(raw string)";
pos.col++;
}
return os;
}

friend bool operator==(const StringTok& t1, const StringTok& t2) {
return t1.str == t2.str;
}
};

struct KeywordTok {
friend std::ostream& operator<<(std::ostream& os, const KeywordTok&) {
return os << "keyword";
}

friend bool operator==(const KeywordTok&, const KeywordTok&) { return true; }
};

struct Token {
using Data = std::variant<LParenTok,
RParenTok,
IntTok,
FloatTok,
IdTok,
StringTok,
KeywordTok>;

std::string_view span;
Data data;

// Suppress clang-tidy false positive about unused functions.
[[maybe_unused]] friend std::ostream& operator<<(std::ostream& os,
const Token& tok) {
std::visit([&](const auto& t) { os << t; }, tok.data);
return os << " \"" << tok.span << "\"";
}

[[maybe_unused]] friend bool operator==(const Token& t1, const Token& t2) {
return t1.span == t2.span &&
std::visit(
[](auto& d1, auto& d2) {
if constexpr (std::is_same_v<decltype(d1), decltype(d2)>) {
return d1 == d2;
} else {
return false;
}
},
t1.data,
t2.data);
}
};

struct TextPos {
size_t line;
size_t col;
return pos;
}

bool operator==(const TextPos& other) const {
return line == other.line && col == other.col;
}
bool operator!=(const TextPos& other) const { return !(*this == other); }
bool TextPos::operator==(const TextPos& other) const {
return line == other.line && col == other.col;
}

// Suppress clang-tidy false positive about unused functions.
[[maybe_unused]] friend std::ostream& operator<<(std::ostream& os,
const TextPos& pos) {
return os << pos.line << ":" << pos.col;
}
};
bool IntTok::operator==(const IntTok& other) const {
return n == other.n && signedness == other.signedness;
}

// Lexer's purpose is twofold. First, it wraps a buffer to provide a tokenizing
// iterator over it. Second, it implements that iterator itself. Also provides
// utilities for locating the text position of tokens within the buffer. Text
// positions are computed on demand rather than eagerly because they are
// typically only needed when there is an error to report.
struct Lexer {
using iterator = Lexer;
using difference_type = std::ptrdiff_t;
using value_type = Token;
using pointer = const Token*;
using reference = const Token&;
using iterator_category = std::forward_iterator_tag;

std::string_view buffer;
size_t index = 0;
std::optional<Token> curr;

// The end sentinel.
Lexer() = default;

Lexer(std::string_view buffer) : buffer(buffer) {
skipSpace();
lexToken();
skipSpace();
}
bool FloatTok::operator==(const FloatTok& other) const {
return std::signbit(d) == std::signbit(other.d) &&
(d == other.d || (std::isnan(d) && std::isnan(other.d) &&
nanPayload == other.nanPayload));
}

std::string_view next() const { return buffer.substr(index); }
bool Token::operator==(const Token& other) const {
return span == other.span &&
std::visit(
[](auto& t1, auto& t2) {
if constexpr (std::is_same_v<decltype(t1), decltype(t2)>) {
return t1 == t2;
} else {
return false;
}
},
data,
other.data);
}

void skipSpace() {
if (auto ctx = space(next())) {
index += ctx->span.size();
}
}
std::ostream& operator<<(std::ostream& os, const TextPos& pos) {
return os << pos.line << ":" << pos.col;
}

void lexToken() {
// TODO: Ensure we're getting the longest possible match.
Token tok;
if (auto t = lparen(next())) {
tok = Token{t->span, LParenTok{}};
} else if (auto t = rparen(next())) {
tok = Token{t->span, RParenTok{}};
} else if (auto t = ident(next())) {
tok = Token{t->span, IdTok{}};
} else if (auto t = integer(next())) {
tok = Token{t->span, IntTok{t->n, t->signedness}};
} else if (auto t = float_(next())) {
tok = Token{t->span, FloatTok{t->nanPayload, t->d}};
} else if (auto t = str(next())) {
tok = Token{t->span, StringTok{t->str}};
} else if (auto t = keyword(next())) {
tok = Token{t->span, KeywordTok{}};
} else {
// TODO: Do something about lexing errors.
curr = std::nullopt;
return;
}
index += tok.span.size();
curr = {tok};
}
std::ostream& operator<<(std::ostream& os, const LParenTok&) {
return os << "'('";
}

Lexer& operator++() {
// Preincrement
lexToken();
skipSpace();
return *this;
}
std::ostream& operator<<(std::ostream& os, const RParenTok&) {
return os << "')'";
}

Lexer operator++(int) {
// Postincrement
Lexer ret = *this;
++(*this);
return ret;
}
std::ostream& operator<<(std::ostream& os, const IdTok&) { return os << "id"; }

const Token& operator*() { return *curr; }
const Token* operator->() { return &*curr; }
std::ostream& operator<<(std::ostream& os, const IntTok& tok) {
return os << tok.n << (tok.signedness == Signed ? " signed" : " unsigned");
}

bool operator==(const Lexer& other) const {
// The iterator is equal to the end sentinel when there is no current token.
if (!curr && !other.curr) {
return true;
std::ostream& operator<<(std::ostream& os, const FloatTok& tok) {
if (std::isnan(tok.d)) {
os << (std::signbit(tok.d) ? "+" : "-");
if (tok.nanPayload) {
return os << "nan:0x" << std::hex << *tok.nanPayload << std::dec;
}
// Otherwise they are equivalent when they are at the same position.
return index == other.index;
return os << "nan";
}
return os << tok.d;
}

bool operator!=(const Lexer& other) const { return !(*this == other); }

Lexer begin() { return *this; }

Lexer end() { return Lexer(); }

TextPos position(const char* c) {
assert(size_t(c - buffer.data()) < buffer.size());
TextPos pos{1, 0};
for (const char* p = buffer.data(); p != c; ++p) {
if (*p == '\n') {
pos.line++;
pos.col = 0;
} else {
pos.col++;
}
}
return pos;
std::ostream& operator<<(std::ostream& os, const StringTok& tok) {
if (tok.str) {
os << '"' << *tok.str << '"';
} else {
os << "(raw string)";
}
return os;
}

TextPos position(std::string_view span) { return position(span.data()); }

TextPos position(Token tok) { return position(tok.span); }
};
std::ostream& operator<<(std::ostream& os, const KeywordTok&) {
return os << "keyword";
}

} // anonymous namespace
std::ostream& operator<<(std::ostream& os, const Token& tok) {
std::visit([&](const auto& t) { os << t; }, tok.data);
return os << " \"" << tok.span << "\"";
}

} // namespace wasm::WATParser
Loading