From 03c8e4b465c8cffd2596d2741b29ad2ba4ec1765 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 14 Jul 2022 14:33:12 +0200 Subject: [PATCH 01/21] lexer: rewrite token scanner - Use nested switches instead of lookup tables to detect tokens - Simplify input buffer logic - Reduce amount of intermediate states Signed-off-by: Jo-Philipp Wich --- include/ucode/lexer.h | 23 +- lexer.c | 1464 ++++++++--------- .../17_hang_on_unclosed_expression_block | 2 +- .../04_bugs/18_hang_on_line_comments_at_eof | 2 +- 4 files changed, 710 insertions(+), 781 deletions(-) diff --git a/include/ucode/lexer.h b/include/ucode/lexer.h index 835bc2b6..dbec129c 100644 --- a/include/ucode/lexer.h +++ b/include/ucode/lexer.h @@ -124,14 +124,11 @@ typedef enum { typedef enum { UC_LEX_IDENTIFY_BLOCK, - UC_LEX_BLOCK_COMMENT_START, - UC_LEX_BLOCK_EXPRESSION_START, UC_LEX_BLOCK_EXPRESSION_EMIT_TAG, - UC_LEX_BLOCK_STATEMENT_START, UC_LEX_BLOCK_COMMENT, UC_LEX_IDENTIFY_TOKEN, - UC_LEX_PARSE_TOKEN, - UC_LEX_PLACEHOLDER, + UC_LEX_PLACEHOLDER_START, + UC_LEX_PLACEHOLDER_END, UC_LEX_EOF } uc_lex_state_t; @@ -145,19 +142,9 @@ typedef struct { uc_lex_state_t state; uc_parse_config_t *config; uc_source_t *source; - uint8_t eof:1; - uint8_t is_escape:1; - uint8_t is_placeholder:1; uint8_t no_regexp:1; uint8_t no_keyword:1; - size_t buflen; - char *buf, *bufstart, *bufend; - size_t lookbehindlen; - char *lookbehind; - const void *tok; uc_token_t curr; - char esc[5]; - uint8_t esclen; int lead_surrogate; size_t lastoff; enum { @@ -176,6 +163,12 @@ typedef struct { size_t count; size_t *entries; } templates; + struct { + size_t count; + char *entries; + } buffer; + unsigned char *rbuf; + size_t rlen, rpos; } uc_lexer_t; diff --git a/lexer.c b/lexer.c index 5be8eced..574c0516 100644 --- a/lexer.c +++ b/lexer.c @@ -29,24 +29,12 @@ #include "ucode/lib.h" #include "ucode/lexer.h" -#define UC_LEX_CONTINUE_PARSING (void *)1 - struct keyword { unsigned type; const char *pat; unsigned plen; }; -struct token { - unsigned type; - union { - uint32_t patn; - char pat[4]; - } u; - unsigned plen; - uc_token_t *(*parse)(uc_lexer_t *); -}; - #define dec(o) \ ((o) - '0') @@ -56,94 +44,6 @@ struct token { #ifndef NO_COMPILE -static uc_token_t *parse_comment(uc_lexer_t *); -static uc_token_t *parse_string(uc_lexer_t *); -static uc_token_t *parse_regexp(uc_lexer_t *); -static uc_token_t *parse_number(uc_lexer_t *); -static uc_token_t *parse_label(uc_lexer_t *); - -static const struct token tokens[] = { - { TK_ASLEFT, { .pat = "<<=" }, 3, NULL }, - { TK_ASRIGHT, { .pat = ">>=" }, 3, NULL }, - { TK_LEXP, { .pat = "{{-" }, 3, NULL }, - { TK_REXP, { .pat = "-}}" }, 3, NULL }, - { TK_LSTM, { .pat = "{%+" }, 3, NULL }, - { TK_LSTM, { .pat = "{%-" }, 3, NULL }, - { TK_RSTM, { .pat = "-%}" }, 3, NULL }, - { TK_EQS, { .pat = "===" }, 3, NULL }, - { TK_NES, { .pat = "!==" }, 3, NULL }, - { TK_ELLIP, { .pat = "..." }, 3, NULL }, - { TK_QLBRACK, { .pat = "?.[" }, 3, NULL }, - { TK_QLPAREN, { .pat = "?.(" }, 3, NULL }, - { TK_ASEXP, { .pat = "**=" }, 3, NULL }, - { TK_ASAND, { .pat = "&&=" }, 3, NULL }, - { TK_ASOR, { .pat = "||=" }, 3, NULL }, - { TK_ASNULLISH, { .pat = "\?\?=" }, 3, NULL }, - { TK_AND, { .pat = "&&" }, 2, NULL }, - { TK_ASADD, { .pat = "+=" }, 2, NULL }, - { TK_ASBAND, { .pat = "&=" }, 2, NULL }, - { TK_ASBOR, { .pat = "|=" }, 2, NULL }, - { TK_ASBXOR, { .pat = "^=" }, 2, NULL }, - //{ TK_ASDIV, { .pat = "/=" }, 2, NULL }, - { TK_ASMOD, { .pat = "%=" }, 2, NULL }, - { TK_ASMUL, { .pat = "*=" }, 2, NULL }, - { TK_ASSUB, { .pat = "-=" }, 2, NULL }, - { TK_EXP, { .pat = "**" }, 2, NULL }, - { TK_DEC, { .pat = "--" }, 2, NULL }, - { TK_INC, { .pat = "++" }, 2, NULL }, - { TK_EQ, { .pat = "==" }, 2, NULL }, - { TK_NE, { .pat = "!=" }, 2, NULL }, - { TK_LE, { .pat = "<=" }, 2, NULL }, - { TK_GE, { .pat = ">=" }, 2, NULL }, - { TK_LSHIFT, { .pat = "<<" }, 2, NULL }, - { TK_RSHIFT, { .pat = ">>" }, 2, NULL }, - { 0, { .pat = "//" }, 2, parse_comment }, - { 0, { .pat = "/*" }, 2, parse_comment }, - { TK_OR, { .pat = "||" }, 2, NULL }, - { TK_LEXP, { .pat = "{{" }, 2, NULL }, - { TK_REXP, { .pat = "}}" }, 2, NULL }, - { TK_LSTM, { .pat = "{%" }, 2, NULL }, - { TK_RSTM, { .pat = "%}" }, 2, NULL }, - { TK_ARROW, { .pat = "=>" }, 2, NULL }, - { TK_NULLISH, { .pat = "??" }, 2, NULL }, - { TK_QDOT, { .pat = "?." }, 2, NULL }, - { TK_PLACEH, { .pat = "${" }, 2, NULL }, - { TK_ADD, { .pat = "+" }, 1, NULL }, - { TK_ASSIGN, { .pat = "=" }, 1, NULL }, - { TK_BAND, { .pat = "&" }, 1, NULL }, - { TK_BOR, { .pat = "|" }, 1, NULL }, - { TK_LBRACK, { .pat = "[" }, 1, NULL }, - { TK_RBRACK, { .pat = "]" }, 1, NULL }, - { TK_BXOR, { .pat = "^" }, 1, NULL }, - { TK_LBRACE, { .pat = "{" }, 1, NULL }, - { TK_RBRACE, { .pat = "}" }, 1, NULL }, - { TK_COLON, { .pat = ":" }, 1, NULL }, - { TK_COMMA, { .pat = "," }, 1, NULL }, - { TK_COMPL, { .pat = "~" }, 1, NULL }, - //{ TK_DIV, { .pat = "/" }, 1, NULL }, - { TK_GT, { .pat = ">" }, 1, NULL }, - { TK_NOT, { .pat = "!" }, 1, NULL }, - { TK_LT, { .pat = "<" }, 1, NULL }, - { TK_MOD, { .pat = "%" }, 1, NULL }, - { TK_MUL, { .pat = "*" }, 1, NULL }, - { TK_LPAREN, { .pat = "(" }, 1, NULL }, - { TK_RPAREN, { .pat = ")" }, 1, NULL }, - { TK_QMARK, { .pat = "?" }, 1, NULL }, - { TK_SCOL, { .pat = ";" }, 1, NULL }, - { TK_SUB, { .pat = "-" }, 1, NULL }, - { TK_DOT, { .pat = "." }, 1, NULL }, - { TK_STRING, { .pat = "'" }, 1, parse_string }, - { TK_STRING, { .pat = "\"" }, 1, parse_string }, - { TK_REGEXP, { .pat = "/" }, 1, parse_regexp }, - { TK_LABEL, { .pat = "_" }, 1, parse_label }, - { TK_LABEL, { .pat = "az" }, 0, parse_label }, - { TK_LABEL, { .pat = "AZ" }, 0, parse_label }, - { TK_NUMBER, { .pat = "09" }, 0, parse_number }, - - /* NB: this must be last for simple retrieval */ - { TK_TEMPLATE, { .pat = "`" }, 1, parse_string } -}; - static const struct keyword reserved_words[] = { { TK_ENDFUNC, "endfunction", 11 }, { TK_CONTINUE, "continue", 8 }, @@ -174,119 +74,118 @@ static const struct keyword reserved_words[] = { }; -/* length of the longest token in our lookup table */ -#define UC_LEX_MAX_TOKEN_LEN 3 +static int +fill_buf(uc_lexer_t *lex) { + lex->rbuf = xrealloc(lex->rbuf, 128); + lex->rlen = fread(lex->rbuf, 1, 128, lex->source->fp); + lex->rpos = 0; -static uc_token_t * -emit_op(uc_lexer_t *lex, uint32_t pos, int type, uc_value_t *uv) -{ - lex->curr.type = type; - lex->curr.uv = uv; - lex->curr.pos = pos; + if (!lex->rlen) + return EOF; - return &lex->curr; -} + lex->rpos++; -static void lookbehind_append(uc_lexer_t *lex, const char *data, size_t len) -{ - if (len) { - lex->lookbehind = xrealloc(lex->lookbehind, lex->lookbehindlen + len); - memcpy(lex->lookbehind + lex->lookbehindlen, data, len); - lex->lookbehindlen += len; - } + return (int)lex->rbuf[0]; } -static void lookbehind_reset(uc_lexer_t *lex) { - free(lex->lookbehind); - lex->lookbehind = NULL; - lex->lookbehindlen = 0; -} +static int +update_line(uc_lexer_t *lex, int ch) { + if (ch == '\n' || ch == EOF) + uc_source_line_next(lex->source); + else + uc_source_line_update(lex->source, 1); -static uc_token_t * -lookbehind_to_text(uc_lexer_t *lex, uint32_t pos, int type, const char *strip_trailing_chars) { - uc_token_t *rv = NULL; + lex->source->off++; - if (lex->lookbehind) { - if (strip_trailing_chars) { - while (lex->lookbehindlen > 0 && strchr(strip_trailing_chars, lex->lookbehind[lex->lookbehindlen-1])) - lex->lookbehindlen--; - } + return ch; +} - rv = emit_op(lex, pos, type, ucv_string_new_length(lex->lookbehind, lex->lookbehindlen)); +static int +lookahead_char(uc_lexer_t *lex) { + int c; - lookbehind_reset(lex); - } + if (lex->rpos < lex->rlen) + return (int)lex->rbuf[lex->rpos]; - return rv; -} + c = fill_buf(lex); + lex->rpos = 0; -static inline size_t -buf_remaining(uc_lexer_t *lex) { - return (lex->bufend - lex->bufstart); + return c; } -static inline bool -_buf_startswith(uc_lexer_t *lex, const char *str, size_t len) { - return (buf_remaining(lex) >= len && !strncmp(lex->bufstart, str, len)); -} +static bool +check_char(uc_lexer_t *lex, int ch) { + if (lookahead_char(lex) != ch) + return false; -#define buf_startswith(s, str) _buf_startswith(s, str, sizeof(str) - 1) + lex->rpos++; + update_line(lex, ch); -static void -buf_consume(uc_lexer_t *lex, size_t len) { - size_t i, linelen; + return true; +} - for (i = 0, linelen = 0; i < len; i++) { - if (lex->bufstart[i] == '\n') { - uc_source_line_update(lex->source, linelen); - uc_source_line_next(lex->source); +static int +next_char(uc_lexer_t *lex) { + int ch = (lex->rpos < lex->rlen) ? (int)lex->rbuf[lex->rpos++] : fill_buf(lex); - linelen = 0; - } - else { - linelen++; - } - } + return update_line(lex, ch); +} + +static uc_token_t * +emit_op(uc_lexer_t *lex, ssize_t pos, int type, uc_value_t *uv) +{ + lex->curr.type = type; + lex->curr.uv = uv; - if (linelen) - uc_source_line_update(lex->source, linelen); + if (pos < 0) + lex->curr.pos = lex->source->off + pos; + else + lex->curr.pos = (size_t)pos; - lex->bufstart += len; - lex->source->off += len; + return &lex->curr; } static uc_token_t * -parse_comment(uc_lexer_t *lex) -{ - const struct token *tok = lex->tok; - const char *ptr, *end; - size_t elen; +emit_buffer(uc_lexer_t *lex, ssize_t pos, int type, const char *strip_trailing_chars) { + uc_token_t *rv = NULL; + + if (lex->buffer.count) { + if (strip_trailing_chars) + while (lex->buffer.count > 0 && strchr(strip_trailing_chars, *uc_vector_last(&lex->buffer))) + lex->buffer.count--; + + rv = emit_op(lex, pos, type, ucv_string_new_length(uc_vector_first(&lex->buffer), lex->buffer.count)); - if (!strcmp(tok->u.pat, "//")) { - end = "\n"; - elen = 1; + uc_vector_clear(&lex->buffer); } - else { - end = "*/"; - elen = 2; + else if (type != TK_TEXT) { + rv = emit_op(lex, pos, type, ucv_string_new_length("", 0)); } - for (ptr = lex->bufstart; ptr < lex->bufend - elen; ptr++) { - if (!strncmp(ptr, end, elen)) { - buf_consume(lex, (ptr - lex->bufstart) + elen); + return rv; +} - return UC_LEX_CONTINUE_PARSING; - } - } - buf_consume(lex, ptr - lex->bufstart); +static uc_token_t * +parse_comment(uc_lexer_t *lex, int kind) +{ + int ch; + + while (true) { + ch = next_char(lex); - if (lex->eof) { - lex->state = UC_LEX_EOF; + if (kind == '/' && (ch == '\n' || ch == EOF)) + break; + + if (kind == '*' && ch == '*' && check_char(lex, '/')) + break; + + if (ch == EOF) { + lex->state = UC_LEX_EOF; - if (elen == 2) return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated comment")); + } } return NULL; @@ -301,238 +200,157 @@ append_utf8(uc_lexer_t *lex, int code) { rem = sizeof(ustr); if (utf8enc(&up, &rem, code)) - lookbehind_append(lex, ustr, up - ustr); + for (up = ustr; rem < (int)sizeof(ustr); rem++) + uc_vector_push(&lex->buffer, *up++); } static uc_token_t * -parse_string(uc_lexer_t *lex) +parse_string(uc_lexer_t *lex, int kind) { - const struct token *tok = lex->tok; - char q = tok->u.pat[0]; - char *ptr, *c; - uc_token_t *rv; - int code; + int code, ch, i; + unsigned type; + size_t off; - if (!buf_remaining(lex)) - return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated string")); + if (kind == '`') + type = TK_TEMPLATE; + else if (kind == '/') + type = TK_REGEXP; + else + type = TK_STRING; - for (ptr = lex->bufstart; ptr < lex->bufend; ptr++) { - /* continuation of placeholder start */ - if (lex->is_placeholder) { - if (*ptr == '{') { - buf_consume(lex, 1); - rv = lookbehind_to_text(lex, lex->lastoff, tok->type, NULL); + off = lex->source->off - 1; - if (!rv) - rv = emit_op(lex, lex->lastoff, tok->type, ucv_string_new_length("", 0)); + for (ch = next_char(lex); ch != EOF; ch = next_char(lex)) { + switch (ch) { + /* placeholder */ + case '$': + if (type == TK_TEMPLATE && check_char(lex, '{')) { + lex->state = UC_LEX_PLACEHOLDER_START; - return rv; + return emit_buffer(lex, off, type, NULL); } - lex->is_placeholder = false; - lookbehind_append(lex, "$", 1); - } + uc_vector_push(&lex->buffer, '$'); + break; - /* continuation of escape sequence */ - if (lex->is_escape) { - if (lex->esclen == 0) { - /* non-unicode escape following a lead surrogate, emit replacement... */ - if (lex->lead_surrogate && *ptr != 'u') { - append_utf8(lex, 0xFFFD); - lex->lead_surrogate = 0; - } + /* escape sequence */ + case '\\': + /* unicode escape sequence */ + if (type != TK_REGEXP && check_char(lex, 'u')) { + for (i = 0, code = 0; i < 4; i++) { + ch = next_char(lex); - switch ((q == '/') ? 0 : *ptr) { - case 'u': - case 'x': - lex->esc[lex->esclen++] = *ptr; - break; + if (!isxdigit(ch)) + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Invalid escape sequence")); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - lex->esc[lex->esclen++] = 'o'; - lex->esc[lex->esclen++] = *ptr; - break; + code = code * 16 + hex(ch); + } - default: - lex->is_escape = false; - c = strchr("a\ab\be\033f\fn\nr\rt\tv\v", *ptr); + /* is a leading surrogate value */ + if ((code & 0xFC00) == 0xD800) { + /* found a subsequent leading surrogate, ignore and emit replacement char for previous one */ + if (lex->lead_surrogate) + append_utf8(lex, 0xFFFD); - if (c && *c >= 'a') { - lookbehind_append(lex, c + 1, 1); + /* store surrogate value and advance to next escape sequence */ + lex->lead_surrogate = code; + } + + /* is a trailing surrogate value */ + else if ((code & 0xFC00) == 0xDC00) { + /* found a trailing surrogate following a leading one, combine and encode */ + if (lex->lead_surrogate) { + code = 0x10000 + ((lex->lead_surrogate & 0x3FF) << 10) + (code & 0x3FF); + lex->lead_surrogate = 0; } - else { - /* regex mode => retain backslash */ - if (q == '/') - lookbehind_append(lex, "\\", 1); - lookbehind_append(lex, ptr, 1); + /* trailing surrogate not following a leading one, ignore and use replacement char */ + else { + code = 0xFFFD; } - buf_consume(lex, (ptr + 1) - lex->bufstart); + append_utf8(lex, code); + } - break; + /* is a normal codepoint */ + else { + append_utf8(lex, code); } } - else { - switch (lex->esc[0]) { - case 'u': - if (lex->esclen < 5) { - if (!isxdigit(*ptr)) - return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, ucv_string_new("Invalid escape sequence")); - lex->esc[lex->esclen++] = *ptr; - } - - if (lex->esclen == 5) { - code = hex(lex->esc[1]) * 16 * 16 * 16 + - hex(lex->esc[2]) * 16 * 16 + - hex(lex->esc[3]) * 16 + - hex(lex->esc[4]); - - /* is a leading surrogate value */ - if ((code & 0xFC00) == 0xD800) { - /* found a subsequent leading surrogate, ignore and emit replacement char for previous one */ - if (lex->lead_surrogate) - append_utf8(lex, 0xFFFD); - - /* store surrogate value and advance to next escape sequence */ - lex->lead_surrogate = code; - } - - /* is a trailing surrogate value */ - else if ((code & 0xFC00) == 0xDC00) { - /* found a trailing surrogate following a leading one, combine and encode */ - if (lex->lead_surrogate) { - code = 0x10000 + ((lex->lead_surrogate & 0x3FF) << 10) + (code & 0x3FF); - lex->lead_surrogate = 0; - } - - /* trailing surrogate not following a leading one, ignore and use replacement char */ - else { - code = 0xFFFD; - } - - append_utf8(lex, code); - } - - /* is a normal codepoint */ - else { - append_utf8(lex, code); - } - - lex->esclen = 0; - lex->is_escape = false; - buf_consume(lex, (ptr + 1) - lex->bufstart); - } + /* hex escape sequence */ + else if (type != TK_REGEXP && check_char(lex, 'x')) { + for (i = 0, code = 0; i < 2; i++) { + ch = next_char(lex); - break; + if (!isxdigit(ch)) + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Invalid escape sequence")); - case 'x': - if (lex->esclen < 3) { - if (!isxdigit(*ptr)) - return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, ucv_string_new("Invalid escape sequence")); + code = code * 16 + hex(ch); + } - lex->esc[lex->esclen++] = *ptr; - } + append_utf8(lex, code); + } - if (lex->esclen == 3) { - append_utf8(lex, hex(lex->esc[1]) * 16 + hex(lex->esc[2])); + /* octal or letter */ + else { + /* try to parse octal sequence... */ + for (i = 0, code = 0, ch = lookahead_char(lex); + kind != '/' && i < 3 && ch >= '0' && ch <= '7'; + i++, next_char(lex), ch = lookahead_char(lex)) { + code = code * 8 + dec(ch); + } - lex->esclen = 0; - lex->is_escape = false; - buf_consume(lex, (ptr + 1) - lex->bufstart); - } + if (i) { + if (code > 255) + return emit_op(lex, -3, TK_ERROR, ucv_string_new("Invalid escape sequence")); - break; + append_utf8(lex, code); + } - case 'o': - if (lex->esclen < 4) { - /* found a non-octal char */ - if (*ptr < '0' || *ptr > '7') { - /* pad sequence to three chars */ - switch (lex->esclen) { - case 3: - lex->esc[3] = lex->esc[2]; - lex->esc[2] = lex->esc[1]; - lex->esc[1] = '0'; - break; - - case 2: - lex->esc[3] = lex->esc[1]; - lex->esc[2] = '0'; - lex->esc[1] = '0'; - break; - } - - lex->esclen = 4; - buf_consume(lex, ptr-- - lex->bufstart); - } - - /* append */ - else { - lex->esc[lex->esclen++] = *ptr; - buf_consume(lex, (ptr + 1) - lex->bufstart); - } - } + /* ... no octal sequence, handle other escape */ + else { + ch = next_char(lex); - if (lex->esclen == 4) { - code = dec(lex->esc[1]) * 8 * 8 + - dec(lex->esc[2]) * 8 + - dec(lex->esc[3]); + switch (ch) { + case 'a': uc_vector_push(&lex->buffer, '\a'); break; + case 'b': uc_vector_push(&lex->buffer, '\b'); break; + case 'e': uc_vector_push(&lex->buffer, '\033'); break; + case 'f': uc_vector_push(&lex->buffer, '\f'); break; + case 'n': uc_vector_push(&lex->buffer, '\n'); break; + case 'r': uc_vector_push(&lex->buffer, '\r'); break; + case 't': uc_vector_push(&lex->buffer, '\t'); break; + case 'v': uc_vector_push(&lex->buffer, '\v'); break; - if (code > 255) - return emit_op(lex, lex->source->off + lex->esclen + 1, TK_ERROR, ucv_string_new("Invalid escape sequence")); + case EOF: + return emit_op(lex, -2, TK_ERROR, ucv_string_new("Unterminated string")); - append_utf8(lex, code); + default: + /* regex mode => retain backslash */ + if (type == TK_REGEXP) + uc_vector_push(&lex->buffer, '\\'); - lex->esclen = 0; - lex->is_escape = false; + uc_vector_push(&lex->buffer, ch); } - - break; } } - } - - /* terminating char */ - else if (*ptr == q) { - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr + 1) - lex->bufstart); - - rv = lookbehind_to_text(lex, lex->lastoff, tok->type, NULL); - if (!rv) - rv = emit_op(lex, lex->lastoff, tok->type, ucv_string_new_length("", 0)); - - return rv; - } + break; - /* escape sequence start */ - else if (*ptr == '\\') { - lex->is_escape = true; - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr - lex->bufstart) + 1); - } + /* other character */ + default: + /* terminating delimitter */ + if (ch == kind) + return emit_buffer(lex, off, type, NULL); - /* potential placeholder start */ - else if (q == '`' && *ptr == '$') { - lex->is_placeholder = true; - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr - lex->bufstart) + 1); + uc_vector_push(&lex->buffer, ch); } } - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); + // FIXME + lex->state = UC_LEX_EOF; - return NULL; + return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated string")); } @@ -563,76 +381,31 @@ parse_regexp(uc_lexer_t *lex) size_t len; char *s; - switch (lex->esc[0]) { - case UC_LEX_PARSE_REGEX_INIT: - if (lex->no_regexp) { - if (buf_startswith(lex, "=")) { - buf_consume(lex, 1); - - return emit_op(lex, lex->source->off, TK_ASDIV, NULL); - } - - return emit_op(lex, lex->source->off, TK_DIV, NULL); - } - - lex->esc[0] = UC_LEX_PARSE_REGEX_PATTERN; - break; - - case UC_LEX_PARSE_REGEX_PATTERN: - rv = parse_string(lex); - - if (rv && rv->type == TK_ERROR) - return rv; + rv = parse_string(lex, '/'); - if (rv != NULL && rv != UC_LEX_CONTINUE_PARSING) { - lex->lookbehind = (char *)rv; - lex->esc[0] = UC_LEX_PARSE_REGEX_FLAGS; - } - - break; - - case UC_LEX_PARSE_REGEX_FLAGS: - rv = (uc_token_t *)lex->lookbehind; - - while (lex->bufstart < lex->bufend || lex->eof) { - switch (lex->eof ? EOF : lex->bufstart[0]) { - case 'g': - buf_consume(lex, 1); + if (rv->type == TK_REGEXP) { + while (true) { + if (check_char(lex, 'g')) is_reg_global = true; - break; - - case 'i': - buf_consume(lex, 1); + else if (check_char(lex, 'i')) is_reg_icase = true; - break; - - case 's': - buf_consume(lex, 1); + else if (check_char(lex, 's')) is_reg_newline = true; + else break; - - default: - lex->lookbehind = NULL; - - len = xasprintf(&s, "%c%*s", - (is_reg_global << 0) | (is_reg_icase << 1) | (is_reg_newline << 2), - ucv_string_length(rv->uv), - ucv_string_get(rv->uv)); - - ucv_free(rv->uv, false); - rv->uv = ucv_string_new_length(s, len); - free(s); - - rv->type = TK_REGEXP; - - return rv; - } } - break; + len = xasprintf(&s, "%c%*s", + (is_reg_global << 0) | (is_reg_icase << 1) | (is_reg_newline << 2), + ucv_string_length(rv->uv), + ucv_string_get(rv->uv)); + + ucv_free(rv->uv, false); + rv->uv = ucv_string_new_length(s, len); + free(s); } - return NULL; + return rv; } @@ -647,37 +420,34 @@ parse_regexp(uc_lexer_t *lex) */ static uc_token_t * -parse_label(uc_lexer_t *lex) +parse_label(uc_lexer_t *lex, int ch) { - const struct token *tok = lex->tok; const struct keyword *word; - char *ptr; - size_t i; - - if (!lex->lookbehind && tok->plen) - lookbehind_append(lex, tok->u.pat, tok->plen); + size_t i, len; - if (!buf_remaining(lex) || (lex->bufstart[0] != '_' && !isalnum(lex->bufstart[0]))) { - if (lex->no_keyword == false) { - for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) { - if (lex->lookbehind && lex->lookbehindlen == word->plen && !strncmp(lex->lookbehind, word->pat, word->plen)) { - lookbehind_reset(lex); + while (true) { + uc_vector_push(&lex->buffer, ch); + ch = lookahead_char(lex); - return emit_op(lex, lex->source->off - word->plen, word->type, NULL); - } - } - } + if (!isalnum(ch) && ch != '_') + break; - return lookbehind_to_text(lex, lex->source->off - lex->lookbehindlen, TK_LABEL, NULL); + next_char(lex); } - for (ptr = lex->bufstart; ptr < lex->bufend && (*ptr == '_' || isalnum(*ptr)); ptr++) - ; + len = lex->buffer.count; - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); + if (!lex->no_keyword) { + for (i = 0, word = &reserved_words[0]; i < ARRAY_SIZE(reserved_words); i++, word = &reserved_words[i]) { + if (lex->buffer.count == word->plen && !strncmp(uc_vector_first(&lex->buffer), word->pat, word->plen)) { + uc_vector_clear(&lex->buffer); - return NULL; + return emit_op(lex, -len, word->type, NULL); + } + } + } + + return emit_buffer(lex, -len, TK_LABEL, NULL); } @@ -694,7 +464,7 @@ parse_label(uc_lexer_t *lex) static inline bool is_numeric_char(uc_lexer_t *lex, char c) { - char prev = lex->lookbehindlen ? lex->lookbehind[lex->lookbehindlen-1] : 0; + char prev = lex->buffer.count ? *uc_vector_last(&lex->buffer) : 0; switch (c|32) { case '.': @@ -731,380 +501,507 @@ is_numeric_char(uc_lexer_t *lex, char c) } static uc_token_t * -parse_number(uc_lexer_t *lex) +parse_number(uc_lexer_t *lex, int ch) { - uc_token_t *rv = NULL; uc_value_t *nv = NULL; - const char *ptr; + size_t len; char *e; - if (!buf_remaining(lex) || !is_numeric_char(lex, lex->bufstart[0])) { - lookbehind_append(lex, "\0", 1); - - nv = uc_number_parse_octal(lex->lookbehind, &e); + while (true) { + uc_vector_push(&lex->buffer, ch); + ch = lookahead_char(lex); - switch (ucv_type(nv)) { - case UC_DOUBLE: - rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_DOUBLE, nv); + if (!is_numeric_char(lex, ch)) break; - case UC_INTEGER: - rv = emit_op(lex, lex->source->off - (e - lex->lookbehind), TK_NUMBER, nv); - break; + next_char(lex); + } - default: - rv = emit_op(lex, lex->source->off - (lex->lookbehindlen - (e - lex->lookbehind) - 1), TK_ERROR, ucv_string_new("Invalid number literal")); - } + len = lex->buffer.count; - lookbehind_reset(lex); + uc_vector_push(&lex->buffer, '\0'); - return rv; - } + nv = uc_number_parse_octal(uc_vector_first(&lex->buffer), &e); - for (ptr = lex->bufstart; ptr < lex->bufend && is_numeric_char(lex, *ptr); ptr++) - ; + uc_vector_clear(&lex->buffer); - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); + switch (ucv_type(nv)) { + case UC_DOUBLE: + return emit_op(lex, -len, TK_DOUBLE, nv); - return NULL; + case UC_INTEGER: + return emit_op(lex, -len, TK_NUMBER, nv); + + default: + return emit_op(lex, -len, TK_ERROR, ucv_string_new("Invalid number literal")); + } } static uc_token_t * -lex_step(uc_lexer_t *lex, FILE *fp) +lex_find_token(uc_lexer_t *lex) { - uint32_t masks[] = { 0, le32toh(0x000000ff), le32toh(0x0000ffff), le32toh(0x00ffffff), le32toh(0xffffffff) }; - union { uint32_t n; char str[4]; } search; - const struct token *tok; - size_t rlen, rem, *nest; - char *ptr, c; - uc_token_t *rv; - size_t i; + bool tpl = !(lex->config && lex->config->raw_mode); + int ch = next_char(lex); + + while (isspace(ch)) + ch = next_char(lex); + + switch (ch) { + case '~': + return emit_op(lex, -1, TK_COMPL, NULL); - /* only less than UC_LEX_MAX_TOKEN_LEN unread buffer chars remaining, - * move the remaining bytes to the beginning and read more data */ - if (buf_remaining(lex) < UC_LEX_MAX_TOKEN_LEN) { - if (!lex->buf) { - lex->buflen = 128; - lex->buf = xalloc(lex->buflen); + case '}': + if (tpl && check_char(lex, '}')) + return emit_op(lex, -2, TK_REXP, NULL); + + return emit_op(lex, -1, TK_RBRACE, NULL); + + case '|': + if (check_char(lex, '|')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASOR, NULL); + + return emit_op(lex, -2, TK_OR, NULL); } - rem = lex->bufend - lex->bufstart; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASBOR, NULL); - if (rem) - memcpy(lex->buf, lex->bufstart, rem); + return emit_op(lex, -1, TK_BOR, NULL); - rlen = fread(lex->buf + rem, 1, lex->buflen - rem, fp); + case '{': + if (tpl && check_char(lex, '{')) + return emit_op(lex, -2, TK_LEXP, NULL); - lex->bufstart = lex->buf; - lex->bufend = lex->buf + rlen + rem; + if (tpl && check_char(lex, '%')) + return emit_op(lex, -2, TK_LSTM, NULL); - if (rlen == 0 && (ferror(fp) || feof(fp))) - lex->eof = 1; - } + return emit_op(lex, -1, TK_LBRACE, NULL); - switch (lex->state) { - case UC_LEX_IDENTIFY_BLOCK: - /* previous block had strip trailing whitespace flag, skip leading whitespace */ - if (lex->modifier == MINUS) { - while (buf_remaining(lex) && isspace(lex->bufstart[0])) - buf_consume(lex, 1); + case '^': + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASBXOR, NULL); - lex->modifier = UNSPEC; + return emit_op(lex, -1, TK_BXOR, NULL); + + case '[': + return emit_op(lex, -1, TK_LBRACK, NULL); + + case ']': + return emit_op(lex, -1, TK_RBRACK, NULL); + + case '?': + if (check_char(lex, '?')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASNULLISH, NULL); + + return emit_op(lex, -2, TK_NULLISH, NULL); } - /* previous block was a statement block and trim_blocks is enabld, skip leading newline */ - else if (lex->modifier == NEWLINE) { - if (buf_startswith(lex, "\n")) - buf_consume(lex, 1); + if (check_char(lex, '.')) { + if (check_char(lex, '[')) + return emit_op(lex, -3, TK_QLBRACK, NULL); + + if (check_char(lex, '(')) + return emit_op(lex, -3, TK_QLPAREN, NULL); - lex->modifier = UNSPEC; + return emit_op(lex, -2, TK_QDOT, NULL); } - /* scan forward through buffer to identify start token */ - for (ptr = lex->bufstart; ptr < lex->bufend - strlen("{#"); ptr++) { - /* found start of comment block */ - if (!strncmp(ptr, "{#", 2)) { - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr + 2) - lex->bufstart); - lex->lastoff = lex->source->off - 2; - lex->state = UC_LEX_BLOCK_COMMENT_START; + return emit_op(lex, lex->source->off, TK_QMARK, NULL); - return NULL; - } + case '>': + if (check_char(lex, '>')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASRIGHT, NULL); - /* found start of expression block */ - else if (!strncmp(ptr, "{{", 2)) { - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr + 2) - lex->bufstart); - lex->lastoff = lex->source->off - 2; - lex->state = UC_LEX_BLOCK_EXPRESSION_START; + return emit_op(lex, -2, TK_RSHIFT, NULL); + } - return NULL; - } + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_GE, NULL); - /* found start of statement block */ - else if (!strncmp(ptr, "{%", 2)) { - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, (ptr + 2) - lex->bufstart); - lex->lastoff = lex->source->off - 2; - lex->state = UC_LEX_BLOCK_STATEMENT_START; + return emit_op(lex, -1, TK_GT, NULL); - return NULL; - } + case '=': + if (check_char(lex, '=')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_EQS, NULL); + + return emit_op(lex, -2, TK_EQ, NULL); } - /* we're at eof */ - if (lex->eof) { - lookbehind_append(lex, ptr, lex->bufend - ptr); - lex->state = UC_LEX_EOF; + if (check_char(lex, '>')) + return emit_op(lex, -2, TK_ARROW, NULL); + + return emit_op(lex, -1, TK_ASSIGN, NULL); + + case '<': + if (check_char(lex, '<')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASLEFT, NULL); - return lookbehind_to_text(lex, lex->lastoff, TK_TEXT, NULL); + return emit_op(lex, -2, TK_LSHIFT, NULL); } - lookbehind_append(lex, lex->bufstart, ptr - lex->bufstart); - buf_consume(lex, ptr - lex->bufstart); - break; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_LE, NULL); + + return emit_op(lex, -1, TK_LT, NULL); + + case ';': + return emit_op(lex, -1, TK_SCOL, NULL); + case ':': + return emit_op(lex, -1, TK_COLON, NULL); - case UC_LEX_BLOCK_COMMENT_START: - case UC_LEX_BLOCK_EXPRESSION_START: - case UC_LEX_BLOCK_STATEMENT_START: - rv = NULL; - lex->modifier = UNSPEC; + case '/': + ch = lookahead_char(lex); + lex->lastoff = lex->source->off - 1; - /* strip whitespace before block */ - if (buf_startswith(lex, "-")) { - rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, " \n\t\v\f\r"); - buf_consume(lex, 1); + if (ch == '/' || ch == '*') + return parse_comment(lex, ch); + + if (lex->no_regexp) { + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASDIV, NULL); + + return emit_op(lex, -1, TK_DIV, NULL); } - /* disable lstrip flag (only valid for statement blocks) */ - else if (lex->state == UC_LEX_BLOCK_STATEMENT_START) { - /* disable lstrip flag */ - if (buf_startswith(lex, "+")) { - rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, NULL); - buf_consume(lex, 1); - } + return parse_regexp(lex); - /* put out text leading up to the opening tag and potentially - * strip trailing white space from it depending on the global - * block lstrip setting */ - else { - rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, - (lex->config && lex->config->lstrip_blocks) ? " \t\v\f\r" : NULL); + case '.': + if (check_char(lex, '.')) { + if (check_char(lex, '.')) + return emit_op(lex, -3, TK_ELLIP, NULL); + + /* The sequence ".." cannot be a valid */ + return emit_op(lex, -2, TK_ERROR, ucv_string_new("Unexpected character")); + } + + return emit_op(lex, -1, TK_DOT, NULL); + + case '-': + if (tpl && check_char(lex, '}')) { + if (check_char(lex, '}')) { + lex->modifier = MINUS; + + return emit_op(lex, -3, TK_REXP, NULL); } + + /* The sequence "-}" cannot be a valid */ + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Unexpected character")); } - else { - rv = lookbehind_to_text(lex, lex->source->off, TK_TEXT, NULL); + + if (tpl && check_char(lex, '%')) { + if (check_char(lex, '}')) { + lex->modifier = MINUS; + + return emit_op(lex, -3, TK_RSTM, NULL); + } + + /* The sequence "-%" cannot be a valid */ + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Unexpected character")); } - switch (lex->state) { - case UC_LEX_BLOCK_COMMENT_START: - lex->state = UC_LEX_BLOCK_COMMENT; - lex->block = COMMENT; - break; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASSUB, NULL); - case UC_LEX_BLOCK_STATEMENT_START: - lex->state = UC_LEX_IDENTIFY_TOKEN; - lex->block = STATEMENTS; - break; + if (check_char(lex, '-')) + return emit_op(lex, -2, TK_DEC, NULL); - case UC_LEX_BLOCK_EXPRESSION_START: - lex->state = UC_LEX_BLOCK_EXPRESSION_EMIT_TAG; - break; + return emit_op(lex, -1, TK_SUB, NULL); - default: - break; + case ',': + return emit_op(lex, -1, TK_COMMA, NULL); + + case '+': + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASADD, NULL); + + if (check_char(lex, '+')) + return emit_op(lex, -2, TK_INC, NULL); + + return emit_op(lex, -1, TK_ADD, NULL); + + case '*': + if (check_char(lex, '*')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASEXP, NULL); + + return emit_op(lex, -2, TK_EXP, NULL); } - return rv; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASMUL, NULL); + return emit_op(lex, -1, TK_MUL, NULL); - case UC_LEX_BLOCK_COMMENT: - /* scan forward through buffer to identify end token */ - while (lex->bufstart < lex->bufend - 2) { - if (buf_startswith(lex, "-#}")) { - lex->state = UC_LEX_IDENTIFY_BLOCK; - lex->modifier = MINUS; - buf_consume(lex, 3); - lex->lastoff = lex->source->off; - break; - } - else if (buf_startswith(lex, "#}")) { - lex->state = UC_LEX_IDENTIFY_BLOCK; - buf_consume(lex, 2); - lex->lastoff = lex->source->off; - break; - } + case '(': + return emit_op(lex, -1, TK_LPAREN, NULL); + + case ')': + return emit_op(lex, -1, TK_RPAREN, NULL); - buf_consume(lex, 1); + case '\'': + case '"': + case '`': + lex->lastoff = lex->source->off - 1; + + return parse_string(lex, ch); + + case '&': + if (check_char(lex, '&')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_ASAND, NULL); + + return emit_op(lex, -2, TK_AND, NULL); } - /* we're at eof */ - if (lex->eof) { - lex->state = UC_LEX_EOF; + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASBAND, NULL); + + return emit_op(lex, -1, TK_BAND, NULL); - buf_consume(lex, lex->bufend - lex->bufstart); + case '%': + if (tpl && check_char(lex, '}')) + return emit_op(lex, -2, TK_RSTM, NULL); - return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated template block")); + if (check_char(lex, '=')) + return emit_op(lex, -2, TK_ASMOD, NULL); + + return emit_op(lex, -1, TK_MOD, NULL); + + case '!': + if (check_char(lex, '=')) { + if (check_char(lex, '=')) + return emit_op(lex, -3, TK_NES, NULL); + + return emit_op(lex, -2, TK_NE, NULL); } - break; + return emit_op(lex, -1, TK_NOT, NULL); + case EOF: + return emit_op(lex, -1, TK_EOF, NULL); - case UC_LEX_BLOCK_EXPRESSION_EMIT_TAG: - lex->state = UC_LEX_IDENTIFY_TOKEN; - lex->block = EXPRESSION; + default: + if (isalpha(ch) || ch == '_') + return parse_label(lex, ch); - return emit_op(lex, lex->source->off, TK_LEXP, NULL); + if (isdigit(ch)) + return parse_number(lex, ch); + return emit_op(lex, -1, TK_ERROR, ucv_string_new("Unexpected character")); + } +} - case UC_LEX_IDENTIFY_TOKEN: - /* skip leading whitespace */ - for (i = 0; i < buf_remaining(lex) && isspace(lex->bufstart[i]); i++) - ; +static uc_token_t * +lex_step(uc_lexer_t *lex) +{ + const char *strip = NULL; + uc_token_t *tok; + size_t *nest; + int ch; - buf_consume(lex, i); + while (lex->state != UC_LEX_EOF) { + switch (lex->state) { + case UC_LEX_IDENTIFY_BLOCK: + ch = next_char(lex); - if (i > 0 && buf_remaining(lex) < UC_LEX_MAX_TOKEN_LEN) - return NULL; + /* previous block had strip trailing whitespace flag, skip leading whitespace */ + if (lex->modifier == MINUS) { + while (isspace(ch)) + ch = next_char(lex); - for (i = 0; i < sizeof(search.str); i++) - search.str[i] = (i < buf_remaining(lex)) ? lex->bufstart[i] : 0; + lex->modifier = UNSPEC; + } - for (i = 0, tok = tokens; i < ARRAY_SIZE(tokens); tok = &tokens[++i]) { - /* remaining buffer data is shorter than token, skip */ - if (tok->plen > buf_remaining(lex)) - continue; + /* previous block was a statement block and trim_blocks is enabled, skip leading newline */ + else if (lex->modifier == NEWLINE) { + if (ch == '\n') + ch = next_char(lex); - c = buf_remaining(lex) ? lex->bufstart[0] : 0; + lex->modifier = UNSPEC; + } - if (tok->plen ? ((search.n & masks[tok->plen]) == tok->u.patn) - : (c >= tok->u.pat[0] && c <= tok->u.pat[1])) { - lex->lastoff = lex->source->off; + /* scan forward through buffer to identify block start token */ + while (ch != EOF) { + if (ch == '{') { + ch = next_char(lex); - /* token has a parse method, switch state */ - if (tok->parse) { - lex->tok = tok; - lex->state = UC_LEX_PARSE_TOKEN; + switch (ch) { + /* found start of comment block */ + case '#': + lex->state = UC_LEX_BLOCK_COMMENT; + lex->block = COMMENT; - buf_consume(lex, tok->plen); + if (check_char(lex, '-')) + strip = " \n\t\v\f\r"; - return NULL; - } + break; - /* in raw code mode, ignore template tag tokens */ - if (lex->config && lex->config->raw_mode && - (tok->type == TK_LSTM || tok->type == TK_RSTM || - tok->type == TK_LEXP || tok->type == TK_REXP)) { - continue; - } + /* found start of expression block */ + case '{': + lex->state = UC_LEX_BLOCK_EXPRESSION_EMIT_TAG; - /* disallow nesting blocks */ - if (tok->type == TK_LSTM || tok->type == TK_LEXP) { - buf_consume(lex, tok->plen); + if (check_char(lex, '-')) + strip = " \n\t\v\f\r"; - return emit_op(lex, lex->source->off - tok->plen, TK_ERROR, ucv_string_new("Template blocks may not be nested")); - } + break; - /* found end of block */ - else if ((lex->block == STATEMENTS && tok->type == TK_RSTM) || - (lex->block == EXPRESSION && tok->type == TK_REXP)) { - /* strip whitespace after block */ - if (tok->u.pat[0] == '-') - lex->modifier = MINUS; + /* found start of statement block */ + case '%': + lex->state = UC_LEX_IDENTIFY_TOKEN; + lex->block = STATEMENTS; - /* strip newline after statement block */ - else if (lex->block == STATEMENTS && - lex->config && lex->config->trim_blocks) - lex->modifier = NEWLINE; + if (check_char(lex, '-')) + strip = " \n\t\v\f\r"; + else if (check_char(lex, '+')) + strip = NULL; + else if (lex->config && lex->config->lstrip_blocks) + strip = " \t\v\f\r"; - lex->state = UC_LEX_IDENTIFY_BLOCK; - lex->block = NONE; - } + break; + + default: + /* not a start tag, remember char and move on */ + uc_vector_push(&lex->buffer, '{'); + continue; + } - /* track opening braces */ - else if (tok->type == TK_LBRACE && lex->templates.count > 0) { - nest = uc_vector_last(&lex->templates); - (*nest)++; + break; } - /* check end of placeholder expression */ - else if (tok->type == TK_RBRACE && lex->templates.count > 0) { - nest = uc_vector_last(&lex->templates); + uc_vector_push(&lex->buffer, ch); + ch = next_char(lex); + } - if (*nest == 0) { - lex->templates.count--; - lex->state = UC_LEX_PARSE_TOKEN; - lex->tok = &tokens[ARRAY_SIZE(tokens) - 1]; /* NB: TK_TEMPLATE token spec */ - } - else { - (*nest)--; - } + if (ch == EOF) + lex->state = UC_LEX_EOF; + + /* push out leading text */ + tok = emit_buffer(lex, lex->lastoff, TK_TEXT, strip); + lex->lastoff = lex->source->off - 2; + + if (!tok) + continue; + + return tok; + + + case UC_LEX_BLOCK_COMMENT: + ch = next_char(lex); + + /* scan forward through buffer to identify end token */ + while (ch != EOF) { + if (ch == '-' && check_char(lex, '#') && check_char(lex, '}')) { + lex->modifier = MINUS; + break; } - /* do not report statement tags to the parser */ - if (tok->type != 0 && tok->type != TK_LSTM) - rv = emit_op(lex, lex->source->off, - (tok->type == TK_RSTM) ? TK_SCOL : tok->type, NULL); - else - rv = NULL; + if (ch == '#' && check_char(lex, '}')) + break; + + ch = next_char(lex); + } - buf_consume(lex, tok->plen); + if (ch == EOF) { + lex->state = UC_LEX_EOF; - return rv; + return emit_op(lex, lex->lastoff, TK_ERROR, ucv_string_new("Unterminated template block")); } - } - /* no possible return beyond this point can advance, - mark lex state as eof */ - lex->state = UC_LEX_EOF; + lex->lastoff = lex->source->off; + lex->state = UC_LEX_IDENTIFY_BLOCK; - /* no token matched and we do have remaining data, junk */ - if (buf_remaining(lex)) - return emit_op(lex, lex->source->off, TK_ERROR, ucv_string_new("Unexpected character")); + continue; - /* we're at eof, allow unclosed statement blocks */ - if (lex->block == STATEMENTS) - return NULL; - /* premature EOF */ - return emit_op(lex, lex->source->off, TK_ERROR, ucv_string_new("Unterminated template block")); + case UC_LEX_BLOCK_EXPRESSION_EMIT_TAG: + lex->state = UC_LEX_IDENTIFY_TOKEN; + lex->block = EXPRESSION; + return emit_op(lex, lex->source->off, TK_LEXP, NULL); - case UC_LEX_PARSE_TOKEN: - tok = lex->tok; - rv = tok->parse(lex); - if (rv) { - memset(lex->esc, 0, sizeof(lex->esc)); - lex->state = lex->is_placeholder ? UC_LEX_PLACEHOLDER : UC_LEX_IDENTIFY_TOKEN; - lex->is_placeholder = false; - lex->tok = NULL; + case UC_LEX_IDENTIFY_TOKEN: + do { tok = lex_find_token(lex); } while (tok == NULL); - if (rv == UC_LEX_CONTINUE_PARSING) - rv = NULL; + /* disallow nesting blocks */ + if (tok->type == TK_LSTM || tok->type == TK_LEXP) + return emit_op(lex, -2, TK_ERROR, ucv_string_new("Template blocks may not be nested")); - return rv; - } + /* found end of statement block */ + if (lex->block == STATEMENTS && tok->type == TK_RSTM) { + /* strip newline after statement block? */ + if (lex->modifier == UNSPEC && lex->config && lex->config->trim_blocks) + lex->modifier = NEWLINE; - break; + lex->lastoff = lex->source->off; + lex->state = UC_LEX_IDENTIFY_BLOCK; + lex->block = NONE; + tok = emit_op(lex, -2, TK_SCOL, NULL); + } - case UC_LEX_PLACEHOLDER: - lex->state = UC_LEX_IDENTIFY_TOKEN; + /* found end of expression block */ + else if (lex->block == EXPRESSION && tok->type == TK_REXP) { + lex->lastoff = lex->source->off; + lex->state = UC_LEX_IDENTIFY_BLOCK; + lex->block = NONE; + } + + /* track opening braces */ + else if (tok->type == TK_LBRACE && lex->templates.count > 0) { + nest = uc_vector_last(&lex->templates); + (*nest)++; + } + + /* check end of placeholder expression */ + else if (tok->type == TK_RBRACE && lex->templates.count > 0) { + nest = uc_vector_last(&lex->templates); + + if (*nest == 0) { + lex->templates.count--; + lex->state = UC_LEX_PLACEHOLDER_END; + } + else { + (*nest)--; + } + } + + /* premature EOF? */ + else if (tok->type == TK_EOF && lex->block != STATEMENTS) { + lex->state = UC_LEX_EOF; + + return emit_op(lex, -2, TK_ERROR, ucv_string_new("Unterminated template block")); + } - uc_vector_push(&lex->templates, 0); + return tok; - return emit_op(lex, lex->source->off, TK_PLACEH, NULL); + case UC_LEX_PLACEHOLDER_START: + lex->state = UC_LEX_IDENTIFY_TOKEN; + + uc_vector_push(&lex->templates, 0); + + return emit_op(lex, -2, TK_PLACEH, NULL); - case UC_LEX_EOF: - break; + + case UC_LEX_PLACEHOLDER_END: + lex->state = UC_LEX_IDENTIFY_TOKEN; + + return parse_string(lex, '`'); + + + case UC_LEX_EOF: + break; + } } - return NULL; + return emit_op(lex, lex->source->off, TK_EOF, NULL); } void @@ -1115,24 +1012,15 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) lex->config = config; lex->source = uc_source_get(source); - lex->eof = 0; - lex->is_escape = 0; - lex->block = NONE; lex->modifier = UNSPEC; - lex->buflen = 0; - lex->buf = NULL; - lex->bufstart = NULL; - lex->bufend = NULL; - - lex->lookbehindlen = 0; - lex->lookbehind = NULL; + lex->rlen = 0; + lex->rpos = 0; + lex->rbuf = NULL; - lex->tok = NULL; - - lex->esclen = 0; - memset(lex->esc, 0, sizeof(lex->esc)); + lex->buffer.count = 0; + lex->buffer.entries = NULL; lex->lead_surrogate = 0; @@ -1150,11 +1038,12 @@ uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source) void uc_lexer_free(uc_lexer_t *lex) { + uc_vector_clear(&lex->buffer); uc_vector_clear(&lex->templates); + uc_source_put(lex->source); - free(lex->lookbehind); - free(lex->buf); + free(lex->rbuf); } uc_token_t * @@ -1162,47 +1051,94 @@ uc_lexer_next_token(uc_lexer_t *lex) { uc_token_t *rv = NULL; - while (lex->state != UC_LEX_EOF) { - rv = lex_step(lex, lex->source->fp); - - if (rv != NULL) - break; - } - - if (rv) { - lex->no_keyword = false; - lex->no_regexp = false; + rv = lex_step(lex); - return rv; - } + lex->no_keyword = false; + lex->no_regexp = false; - return emit_op(lex, lex->source->off, TK_EOF, NULL); + return rv; } const char * uc_tokenname(unsigned type) { static char buf[sizeof("'endfunction'")]; - size_t i; - - switch (type) { - case 0: return "End of file"; - case TK_TEMPLATE: return "Template"; - case TK_STRING: return "String"; - case TK_LABEL: return "Label"; - case TK_NUMBER: return "Number"; - case TK_DOUBLE: return "Double"; - case TK_REGEXP: return "Regexp"; - } - - for (i = 0; i < ARRAY_SIZE(tokens); i++) { - if (tokens[i].type != type) - continue; + const char *tokennames[] = { + [TK_LEXP] = "'{{'", + [TK_REXP] = "'}}'", + [TK_LSTM] = "'{%'", + [TK_RSTM] = "'%}'", + [TK_COMMA] = "','", + [TK_ASSIGN] = "'='", + [TK_ASADD] = "'+='", + [TK_ASSUB] = "'-='", + [TK_ASMUL] = "'*='", + [TK_ASDIV] = "'/='", + [TK_ASMOD] = "'%='", + [TK_ASLEFT] = "'<<='", + [TK_ASRIGHT] = "'>>='", + [TK_ASBAND] = "'&='", + [TK_ASBXOR] = "'^='", + [TK_ASBOR] = "'|='", + [TK_QMARK] = "'?'", + [TK_COLON] = "':'", + [TK_OR] = "'||'", + [TK_AND] = "'&&'", + [TK_BOR] = "'|'", + [TK_BXOR] = "'^'", + [TK_BAND] = "'&'", + [TK_EQS] = "'==='", + [TK_NES] = "'!=='", + [TK_EQ] = "'=='", + [TK_NE] = "'!='", + [TK_LT] = "'<'", + [TK_LE] = "'<='", + [TK_GT] = "'>'", + [TK_GE] = "'>='", + [TK_LSHIFT] = "'<<'", + [TK_RSHIFT] = "'>>'", + [TK_ADD] = "'+'", + [TK_SUB] = "'-'", + [TK_MUL] = "'*'", + [TK_DIV] = "'/'", + [TK_MOD] = "'%'", + [TK_EXP] = "'**'", + [TK_NOT] = "'!'", + [TK_COMPL] = "'~'", + [TK_INC] = "'++'", + [TK_DEC] = "'--'", + [TK_DOT] = "'.'", + [TK_LBRACK] = "'['", + [TK_RBRACK] = "']'", + [TK_LPAREN] = "'('", + [TK_RPAREN] = "')'", + [TK_LBRACE] = "'{'", + [TK_RBRACE] = "'}'", + [TK_SCOL] = "';'", + [TK_ELLIP] = "'...'", + [TK_ARROW] = "'=>'", + [TK_QLBRACK] = "'?.['", + [TK_QLPAREN] = "'?.('", + [TK_QDOT] = "'?.'", + [TK_ASEXP] = "'**='", + [TK_ASAND] = "'&&='", + [TK_ASOR] = "'||='", + [TK_ASNULLISH] = "'\?\?='", + [TK_NULLISH] = "'\?\?'", + [TK_PLACEH] = "'${'", + + [TK_TEXT] = "Text", + [TK_LABEL] = "Label", + [TK_NUMBER] = "Number", + [TK_DOUBLE] = "Double", + [TK_STRING] = "String", + [TK_REGEXP] = "Regexp", + [TK_TEMPLATE] = "Template", + [TK_ERROR] = "Error", + [TK_EOF] = "End of file", + }; - snprintf(buf, sizeof(buf), "'%s'", tokens[i].u.pat); - - return buf; - } + size_t i; for (i = 0; i < ARRAY_SIZE(reserved_words); i++) { if (reserved_words[i].type != type) @@ -1213,7 +1149,7 @@ uc_tokenname(unsigned type) return buf; } - return "?"; + return tokennames[type] ? tokennames[type] : "?"; } bool diff --git a/tests/custom/04_bugs/17_hang_on_unclosed_expression_block b/tests/custom/04_bugs/17_hang_on_unclosed_expression_block index 25128bb6..29553abe 100644 --- a/tests/custom/04_bugs/17_hang_on_unclosed_expression_block +++ b/tests/custom/04_bugs/17_hang_on_unclosed_expression_block @@ -3,7 +3,7 @@ infinite loop. -- Expect stderr -- Syntax error: Unterminated template block -In line 1, byte 6: +In line 1, byte 5: `{{ 1` ^-- Near here diff --git a/tests/custom/04_bugs/18_hang_on_line_comments_at_eof b/tests/custom/04_bugs/18_hang_on_line_comments_at_eof index 957ed473..5fc811e8 100644 --- a/tests/custom/04_bugs/18_hang_on_line_comments_at_eof +++ b/tests/custom/04_bugs/18_hang_on_line_comments_at_eof @@ -2,7 +2,7 @@ When parsing a comment near EOF, or a comment escaping the end of an expression block, the lexer did end up in an infinite loop. -- Expect stderr -- -Syntax error: Expecting expression +Syntax error: Unterminated template block In line 1, byte 9: `{{ // }}` From b738f3adbe76fb4bd446f1de1f0ece71cf6b78e8 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 10:20:53 +0200 Subject: [PATCH 02/21] lexer: recognize module related keywords Add support for the `import`, `export`, `from` and `as` keywords used in module import and export statements. Signed-off-by: Jo-Philipp Wich --- include/ucode/lexer.h | 4 ++++ lexer.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/include/ucode/lexer.h b/include/ucode/lexer.h index dbec129c..e3aba8ed 100644 --- a/include/ucode/lexer.h +++ b/include/ucode/lexer.h @@ -117,6 +117,10 @@ typedef enum { TK_NULLISH, TK_PLACEH, TK_TEMPLATE, + TK_IMPORT, + TK_EXPORT, + TK_FROM, + TK_AS, TK_EOF, TK_ERROR diff --git a/lexer.c b/lexer.c index 574c0516..7c7788a4 100644 --- a/lexer.c +++ b/lexer.c @@ -54,6 +54,8 @@ static const struct keyword reserved_words[] = { { TK_RETURN, "return", 6 }, { TK_ENDFOR, "endfor", 6 }, { TK_SWITCH, "switch", 6 }, + { TK_IMPORT, "import", 6 }, + { TK_EXPORT, "export", 6 }, { TK_ENDIF, "endif", 5 }, { TK_WHILE, "while", 5 }, { TK_BREAK, "break", 5 }, @@ -66,11 +68,13 @@ static const struct keyword reserved_words[] = { { TK_THIS, "this", 4 }, { TK_NULL, "null", 4 }, { TK_CASE, "case", 4 }, + { TK_FROM, "from", 4 }, { TK_TRY, "try", 3 }, { TK_FOR, "for", 3 }, { TK_LOCAL, "let", 3 }, { TK_IF, "if", 2 }, { TK_IN, "in", 2 }, + { TK_AS, "as", 2 }, }; From 3a6f9cbc8c1356ccf309f3bb8bdd5b895fb2e01a Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 28 Jul 2022 10:44:19 +0200 Subject: [PATCH 03/21] types: add ability to mark array and object values as constant The upcoming module import support requires constant object values to implement module wildcard import. Reuse the existing u64 bit in ucv heads to mark array or object values as constant and add corresponding `ucv_is_constant()` and `ucv_set_constant()` helpers. Signed-off-by: Jo-Philipp Wich --- include/ucode/types.h | 25 +++++++++++++++++++++++-- types.c | 16 ++++++++-------- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/ucode/types.h b/include/ucode/types.h index d1e01a19..314dbc82 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -47,7 +47,7 @@ typedef enum uc_type { typedef struct uc_value { uint32_t type:4; uint32_t mark:1; - uint32_t u64:1; + uint32_t u64_or_constant:1; uint32_t refcount:26; } uc_value_t; @@ -448,7 +448,28 @@ ucv_is_arrowfn(uc_value_t *uv) static inline bool ucv_is_u64(uc_value_t *uv) { - return (((uintptr_t)uv & 3) == 0 && uv != NULL && uv->u64 == true); + return (((uintptr_t)uv & 3) == 0 && uv != NULL && uv->u64_or_constant == true && + uv->type == UC_INTEGER); +} + +static inline bool +ucv_is_constant(uc_value_t *uv) +{ + return (((uintptr_t)uv & 3) == 0 && uv != NULL && uv->u64_or_constant == true && + (uv->type == UC_ARRAY || uv->type == UC_OBJECT)); +} + +static inline bool +ucv_set_constant(uc_value_t *uv, bool constant) +{ + if (((uintptr_t)uv & 3) == 0 && uv != NULL && uv->u64_or_constant != constant && + (uv->type == UC_ARRAY || uv->type == UC_OBJECT)) { + uv->u64_or_constant = constant; + + return true; + } + + return false; } static inline bool diff --git a/types.c b/types.c index 4f5088c9..9700d8fc 100644 --- a/types.c +++ b/types.c @@ -470,7 +470,7 @@ ucv_int64_new(int64_t n) integer = xalloc(sizeof(*integer)); integer->header.type = UC_INTEGER; integer->header.refcount = 1; - integer->header.u64 = 0; + integer->header.u64_or_constant = 0; integer->i.s64 = n; return &integer->header; @@ -492,7 +492,7 @@ ucv_uint64_new(uint64_t n) integer = xalloc(sizeof(*integer)); integer->header.type = UC_INTEGER; integer->header.refcount = 1; - integer->header.u64 = 1; + integer->header.u64_or_constant = 1; integer->i.u64 = n; return &integer->header; @@ -520,7 +520,7 @@ ucv_uint64_get(uc_value_t *uv) case UC_INTEGER: integer = (uc_integer_t *)uv; - if (integer->header.u64) + if (integer->header.u64_or_constant) return integer->i.u64; if (integer->i.s64 >= 0) @@ -574,10 +574,10 @@ ucv_int64_get(uc_value_t *uv) case UC_INTEGER: integer = (uc_integer_t *)uv; - if (integer->header.u64 && integer->i.u64 <= (uint64_t)INT64_MAX) + if (integer->header.u64_or_constant && integer->i.u64 <= (uint64_t)INT64_MAX) return (int64_t)integer->i.u64; - if (!integer->header.u64) + if (!integer->header.u64_or_constant) return integer->i.s64; errno = ERANGE; @@ -715,7 +715,7 @@ ucv_array_push(uc_value_t *uv, uc_value_t *item) { uc_array_t *array = (uc_array_t *)uv; - if (ucv_type(uv) != UC_ARRAY) + if (ucv_type(uv) != UC_ARRAY || uv->u64_or_constant) return NULL; ucv_array_set(uv, array->count, item); @@ -899,7 +899,7 @@ ucv_object_add(uc_value_t *uv, const char *key, uc_value_t *val) unsigned long hash; void *k; - if (ucv_type(uv) != UC_OBJECT) + if (ucv_type(uv) != UC_OBJECT || uv->u64_or_constant) return false; hash = lh_get_hash(object->table, (const void *)key); @@ -932,7 +932,7 @@ ucv_object_delete(uc_value_t *uv, const char *key) { uc_object_t *object = (uc_object_t *)uv; - if (ucv_type(uv) != UC_OBJECT) + if (ucv_type(uv) != UC_OBJECT || uv->u64_or_constant) return false; return (lh_table_delete(object->table, key) == 0); From 3c104f5312604a6a1a0dd80528cc937159bc57ef Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Fri, 29 Jul 2022 12:27:09 +0200 Subject: [PATCH 04/21] types: resolve upvalue references on stringification When stringifying upvalue references, resolve their target value and convert it to a string. Only yield the abstract string representation if the target value cannot be resolved. Signed-off-by: Jo-Philipp Wich --- types.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/types.c b/types.c index 9700d8fc..e7c9afa9 100644 --- a/types.c +++ b/types.c @@ -1487,6 +1487,7 @@ ucv_to_stringbuf_formatted(uc_vm_t *vm, uc_stringbuf_t *pb, uc_value_t *uv, size uc_closure_t *closure; uc_regexp_t *regexp; uc_value_t *argname; + uc_upvalref_t *ref; uc_array_t *array; size_t i, l; double d; @@ -1686,10 +1687,17 @@ ucv_to_stringbuf_formatted(uc_vm_t *vm, uc_stringbuf_t *pb, uc_value_t *uv, size break; case UC_UPVALUE: - ucv_stringbuf_printf(pb, "%s%s", - json ? "\"" : "", - uv, - json ? "\"" : ""); + ref = (uc_upvalref_t *)uv; + + if (ref->closed) + ucv_to_stringbuf_formatted(vm, pb, ref->value, depth, pad_char, pad_size); + else if (vm != NULL && ref->slot < vm->stack.count) + ucv_to_stringbuf_formatted(vm, pb, vm->stack.entries[ref->slot], depth, pad_char, pad_size); + else + ucv_stringbuf_printf(pb, "%s%s", + json ? "\"" : "", + uv, + json ? "\"" : ""); break; From 70ae3040fb384e7a77ef43ca6b426269f9acdcab Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 28 Jul 2022 11:07:07 +0200 Subject: [PATCH 05/21] lib: honor constant flag of arrays Reject modifications on array values with a type exception when the constant flag is set on the array operated upon. Signed-off-by: Jo-Philipp Wich --- lib.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/lib.c b/lib.c index 4d37531c..0dee0734 100644 --- a/lib.c +++ b/lib.c @@ -369,6 +369,23 @@ uc_rindex(uc_vm_t *vm, size_t nargs) return uc_index(vm, nargs, true); } +static bool +assert_mutable_array(uc_vm_t *vm, uc_value_t *val) +{ + if (ucv_type(val) != UC_ARRAY) + return false; + + if (ucv_is_constant(val)) { + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "%s value is immutable", + ucv_typename(val)); + + return false; + } + + return true; +} + static uc_value_t * uc_push(uc_vm_t *vm, size_t nargs) { @@ -376,7 +393,7 @@ uc_push(uc_vm_t *vm, size_t nargs) uc_value_t *item = NULL; size_t arridx; - if (ucv_type(arr) != UC_ARRAY) + if (!assert_mutable_array(vm, arr)) return NULL; for (arridx = 1; arridx < nargs; arridx++) { @@ -392,6 +409,9 @@ uc_pop(uc_vm_t *vm, size_t nargs) { uc_value_t *arr = uc_fn_arg(0); + if (!assert_mutable_array(vm, arr)) + return NULL; + return ucv_array_pop(arr); } @@ -400,6 +420,9 @@ uc_shift(uc_vm_t *vm, size_t nargs) { uc_value_t *arr = uc_fn_arg(0); + if (!assert_mutable_array(vm, arr)) + return NULL; + return ucv_array_shift(arr); } @@ -410,7 +433,7 @@ uc_unshift(uc_vm_t *vm, size_t nargs) uc_value_t *item = NULL; size_t i; - if (ucv_type(arr) != UC_ARRAY) + if (!assert_mutable_array(vm, arr)) return NULL; for (i = 1; i < nargs; i++) { @@ -755,6 +778,9 @@ uc_reverse(uc_vm_t *vm, size_t nargs) char *dup, *p; if (ucv_type(obj) == UC_ARRAY) { + if (!assert_mutable_array(vm, obj)) + return NULL; + rv = ucv_array_new(vm); for (arridx = ucv_array_length(obj); arridx > 0; arridx--) @@ -851,7 +877,7 @@ uc_sort(uc_vm_t *vm, size_t nargs) uc_value_t *arr = uc_fn_arg(0); uc_value_t *fn = uc_fn_arg(1); - if (ucv_type(arr) != UC_ARRAY) + if (!assert_mutable_array(vm, arr)) return NULL; sort_ctx.vm = vm; @@ -870,7 +896,7 @@ uc_splice(uc_vm_t *vm, size_t nargs) int64_t remlen = ucv_to_integer(uc_fn_arg(2)); size_t arrlen, addlen, idx; - if (ucv_type(arr) != UC_ARRAY) + if (!assert_mutable_array(vm, arr)) return NULL; arrlen = ucv_array_length(arr); From 41114a02a38a65956010bab95c4bff19af7ac1ed Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 14:49:18 +0200 Subject: [PATCH 06/21] source: add tracking of exported symbols Extend abstract source objects to maintain a list of exported symbols and add functions to append and lookup exported names. Signed-off-by: Jo-Philipp Wich --- include/ucode/source.h | 3 +++ include/ucode/types.h | 2 ++ source.c | 25 +++++++++++++++++++++++++ types.c | 4 ++++ 4 files changed, 34 insertions(+) diff --git a/include/ucode/source.h b/include/ucode/source.h index 6f9a8d75..b3eaa343 100644 --- a/include/ucode/source.h +++ b/include/ucode/source.h @@ -54,4 +54,7 @@ void uc_source_line_update(uc_source_t *source, size_t off); void uc_source_runpath_set(uc_source_t *source, const char *runpath); +bool uc_source_export_add(uc_source_t *source, uc_value_t *name); +ssize_t uc_source_export_lookup(uc_source_t *source, uc_value_t *name); + #endif /* UCODE_SOURCE_H */ diff --git a/include/ucode/types.h b/include/ucode/types.h index 314dbc82..cde84f6a 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -65,6 +65,7 @@ typedef struct { /* Source buffer defintions */ uc_declare_vector(uc_lineinfo_t, uint8_t); +uc_declare_vector(uc_exports_t, uc_value_t *); typedef struct { uc_value_t header; @@ -72,6 +73,7 @@ typedef struct { FILE *fp; size_t off; uc_lineinfo_t lineinfo; + uc_exports_t exports; } uc_source_t; diff --git a/source.c b/source.c index b8f2e918..902ad4cf 100644 --- a/source.c +++ b/source.c @@ -196,3 +196,28 @@ uc_source_runpath_set(uc_source_t *source, const char *runpath) source->runpath = xstrdup(runpath); } + +bool +uc_source_export_add(uc_source_t *source, uc_value_t *name) +{ + ssize_t idx = uc_source_export_lookup(source, name); + + if (idx > -1) + return false; + + uc_vector_push(&source->exports, ucv_get(name)); + + return true; +} + +ssize_t +uc_source_export_lookup(uc_source_t *source, uc_value_t *name) +{ + size_t i; + + for (i = 0; i < source->exports.count; i++) + if (ucv_is_equal(source->exports.entries[i], name)) + return i; + + return -1; +} diff --git a/types.c b/types.c index e7c9afa9..2fba2078 100644 --- a/types.c +++ b/types.c @@ -292,7 +292,11 @@ ucv_free(uc_value_t *uv, bool retain) if (source->runpath != source->filename) free(source->runpath); + for (i = 0; i < source->exports.count; i++) + ucv_put(source->exports.entries[i]); + uc_vector_clear(&source->lineinfo); + uc_vector_clear(&source->exports); fclose(source->fp); free(source->buffer); break; From 9c9a9ec383608287802bb2639a4ee6b7bbfd6793 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 14 Jul 2022 23:15:11 +0200 Subject: [PATCH 07/21] program: fix en/decoding debuginfo upvalue slots in precompiled bytecode The sizeof(size_t) might differ from the sizeof(uint32_t) used to serialize compiled bytecode, so extra care is needed to properly encode and decode upvalue slot values which are defined as (size_t)-1 / 2 + n. Signed-off-by: Jo-Philipp Wich --- program.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/program.c b/program.c index 1810b064..bfe50bcc 100644 --- a/program.c +++ b/program.c @@ -224,7 +224,7 @@ enum { static void write_chunk(uc_chunk_t *chunk, FILE *file, uint32_t flags) { - size_t i; + size_t i, slot; /* write bytecode data */ write_vector(chunk, file); @@ -246,9 +246,14 @@ write_chunk(uc_chunk_t *chunk, FILE *file, uint32_t flags) write_u32(chunk->debuginfo.variables.count, file); for (i = 0; i < chunk->debuginfo.variables.count; i++) { + slot = chunk->debuginfo.variables.entries[i].slot; + + if (slot >= ((size_t)-1 / 2)) + slot = ((uint32_t)-1 / 2) + (slot - ((size_t)-1 / 2)); + write_u32(chunk->debuginfo.variables.entries[i].from, file); write_u32(chunk->debuginfo.variables.entries[i].to, file); - write_u32(chunk->debuginfo.variables.entries[i].slot, file); + write_u32(slot, file); write_u32(chunk->debuginfo.variables.entries[i].nameidx, file); } @@ -657,6 +662,9 @@ read_chunk(FILE *file, uc_chunk_t *chunk, uint32_t flags, const char *subj, char !read_size_t(file, &varrange->slot, sizeof(uint32_t), subjbuf, errp) || !read_size_t(file, &varrange->nameidx, sizeof(uint32_t), subjbuf, errp)) goto out; + + if (varrange->slot >= ((uint32_t)-1 / 2)) + varrange->slot = ((size_t)-1 / 2) + (varrange->slot - ((uint32_t)-1 / 2)); } snprintf(subjbuf, sizeof(subjbuf), "%s variable names", subj); From 23224687205076604c1e0e5829690a5b6e150c06 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 09:33:21 +0200 Subject: [PATCH 08/21] program: fix reporting source position of first instruction We must always report the chunk source position relative to the function start offset, even if it is zero. Signed-off-by: Jo-Philipp Wich --- program.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/program.c b/program.c index bfe50bcc..57252f8c 100644 --- a/program.c +++ b/program.c @@ -100,14 +100,10 @@ uc_program_function_load(uc_program_t *prog, size_t id) size_t uc_program_function_srcpos(uc_function_t *fn, size_t off) { - size_t pos; - if (!fn) return 0; - pos = uc_chunk_debug_get_srcpos(&fn->chunk, off); - - return pos ? fn->srcpos + pos : 0; + return fn->srcpos + uc_chunk_debug_get_srcpos(&fn->chunk, off); } void From c441f65c19e62eefaa8af37ae74ae47c4d4eaa4e Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 00:12:08 +0200 Subject: [PATCH 09/21] program: add infrastructure to handle multiple sources per program The upcoming module support requires maintaining multiple source objects within the same program, so add the necessary infrastructure for it. Signed-off-by: Jo-Philipp Wich --- compiler.c | 17 +++-- include/ucode/program.h | 5 +- include/ucode/types.h | 5 +- lib.c | 4 +- program.c | 160 +++++++++++++++++++++++++--------------- types.c | 10 ++- vm.c | 4 +- 7 files changed, 129 insertions(+), 76 deletions(-) diff --git a/compiler.c b/compiler.c index 67053c5d..d03ce5e2 100644 --- a/compiler.c +++ b/compiler.c @@ -121,7 +121,7 @@ uc_compiler_exprstack_is(uc_compiler_t *compiler, uc_exprflag_t flag) } static void -uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_program_t *program, bool strict) +uc_compiler_init(uc_compiler_t *compiler, const char *name, uc_source_t *source, size_t srcpos, uc_program_t *program, bool strict) { uc_value_t *varname = ucv_string_new("(callee)"); uc_function_t *fn; @@ -129,7 +129,7 @@ uc_compiler_init(uc_compiler_t *compiler, const char *name, size_t srcpos, uc_pr compiler->scope_depth = 0; compiler->program = program; - compiler->function = uc_program_function_new(program, name, srcpos); + compiler->function = uc_program_function_new(program, name, source, srcpos); compiler->locals.count = 0; compiler->locals.entries = NULL; @@ -163,7 +163,7 @@ uc_compiler_current_chunk(uc_compiler_t *compiler) static uc_source_t * uc_compiler_current_source(uc_compiler_t *compiler) { - return compiler->program->source; + return uc_program_function_source(compiler->function); } __attribute__((format(printf, 3, 0))) static void @@ -1110,7 +1110,8 @@ uc_compiler_compile_arrowfn(uc_compiler_t *compiler, uc_value_t *args, bool rest pos = compiler->parser->prev.pos; - uc_compiler_init(&fncompiler, NULL, compiler->parser->prev.pos, + uc_compiler_init(&fncompiler, NULL, uc_compiler_current_source(compiler), + compiler->parser->prev.pos, compiler->program, uc_compiler_is_strict(compiler)); @@ -1561,7 +1562,9 @@ uc_compiler_compile_function(uc_compiler_t *compiler) } uc_compiler_init(&fncompiler, - name ? ucv_string_get(name) : NULL, compiler->parser->prev.pos, + name ? ucv_string_get(name) : NULL, + uc_compiler_current_source(compiler), + compiler->parser->prev.pos, compiler->program, uc_compiler_is_strict(compiler)); @@ -2926,10 +2929,10 @@ uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, char **er uc_program_t *prog; uc_function_t *fn; - prog = uc_program_new(source); + prog = uc_program_new(); uc_lexer_init(&parser.lex, config, source); - uc_compiler_init(&compiler, "main", 0, prog, + uc_compiler_init(&compiler, "main", source, 0, prog, config && config->strict_declarations); uc_compiler_parse_advance(&compiler); diff --git a/include/ucode/program.h b/include/ucode/program.h index e8b96eda..e5408077 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -20,7 +20,7 @@ #include "types.h" -uc_program_t *uc_program_new(uc_source_t *); +uc_program_t *uc_program_new(void); static inline uc_program_t * uc_program_get(uc_program_t *prog) { @@ -46,9 +46,10 @@ uc_program_put(uc_program_t *prog) { fn = fn##_tmp, \ fn##_tmp = (uc_function_t *)fn##_tmp->progref.prev) -uc_function_t *uc_program_function_new(uc_program_t *, const char *, size_t); +uc_function_t *uc_program_function_new(uc_program_t *, const char *, uc_source_t *, size_t); size_t uc_program_function_id(uc_program_t *, uc_function_t *); uc_function_t *uc_program_function_load(uc_program_t *, size_t); +uc_source_t *uc_program_function_source(uc_function_t *); size_t uc_program_function_srcpos(uc_function_t *, size_t); void uc_program_function_free(uc_function_t *); diff --git a/include/ucode/types.h b/include/ucode/types.h index cde84f6a..14f2a43b 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -115,6 +115,7 @@ typedef struct uc_function { bool arrow, vararg, strict; size_t nargs; size_t nupvals; + size_t srcidx; size_t srcpos; uc_chunk_t chunk; struct uc_program *program; @@ -204,11 +205,13 @@ uc_declare_vector(uc_resource_types_t, uc_resource_type_t *); /* Program structure definitions */ +uc_declare_vector(uc_sources_t, uc_source_t *); + typedef struct uc_program { uc_value_t header; uc_value_list_t constants; uc_weakref_t functions; - uc_source_t *source; + uc_sources_t sources; } uc_program_t; diff --git a/lib.c b/lib.c index 0dee0734..779e3f5e 100644 --- a/lib.c +++ b/lib.c @@ -2378,7 +2378,7 @@ uc_include_common(uc_vm_t *vm, size_t nargs, bool raw_mode) if (!closure) return NULL; - p = include_path(closure->function->program->source->runpath, ucv_string_get(path)); + p = include_path(uc_program_function_source(closure->function)->runpath, ucv_string_get(path)); if (!p) { uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, @@ -2896,7 +2896,7 @@ uc_sourcepath(uc_vm_t *vm, size_t nargs) continue; } - path = realpath(frame->closure->function->program->source->runpath, NULL); + path = realpath(uc_program_function_source(frame->closure->function)->runpath, NULL); break; } diff --git a/program.c b/program.c index 57252f8c..4321409d 100644 --- a/program.c +++ b/program.c @@ -25,7 +25,7 @@ uc_program_t * -uc_program_new(uc_source_t *source) +uc_program_new(void) { uc_program_t *prog; @@ -37,15 +37,13 @@ uc_program_new(uc_source_t *source) prog->functions.next = &prog->functions; prog->functions.prev = &prog->functions; - prog->source = uc_source_get(source); - uc_vallist_init(&prog->constants); return prog; } uc_function_t * -uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos) +uc_program_function_new(uc_program_t *prog, const char *name, uc_source_t *source, size_t srcpos) { uc_function_t *func; size_t namelen = 0; @@ -58,6 +56,13 @@ uc_program_function_new(uc_program_t *prog, const char *name, size_t srcpos) if (name) strcpy(func->name, name); + for (func->srcidx = 0; func->srcidx < prog->sources.count; func->srcidx++) + if (prog->sources.entries[func->srcidx] == source) + break; + + if (func->srcidx >= prog->sources.count) + uc_vector_push(&prog->sources, uc_source_get(source)); + func->nargs = 0; func->nupvals = 0; func->srcpos = srcpos; @@ -97,6 +102,14 @@ uc_program_function_load(uc_program_t *prog, size_t id) return NULL; } +uc_source_t * +uc_program_function_source(uc_function_t *fn) +{ + assert(fn->srcidx < fn->program->sources.count); + + return fn->program->sources.entries[fn->srcidx]; +} + size_t uc_program_function_srcpos(uc_function_t *fn, size_t off) { @@ -294,6 +307,7 @@ write_function(uc_function_t *func, FILE *file, bool debug) write_u16(func->nargs, file); write_u16(func->nupvals, file); + write_u32(func->srcidx, file); write_u32(func->srcpos, file); write_chunk(&func->chunk, file, flags); @@ -308,33 +322,38 @@ uc_program_write(uc_program_t *prog, FILE *file, bool debug) if (debug) flags |= UC_PROGRAM_F_DEBUG; - if (debug && prog->source) { + if (debug && prog->sources.count) flags |= UC_PROGRAM_F_SOURCEINFO; - if (prog->source->buffer) - flags |= UC_PROGRAM_F_SOURCEBUF; - } - /* magic word + flags */ write_u32(UC_PRECOMPILED_BYTECODE_MAGIC, file); write_u32(flags, file); + /* write source information */ if (flags & UC_PROGRAM_F_SOURCEINFO) { - /* write source file name */ - write_string(prog->source->filename, file); + write_u32(prog->sources.count, file); - /* include source buffer if program was compiled from stdin */ - if (flags & UC_PROGRAM_F_SOURCEBUF) - write_string(prog->source->buffer, file); + for (i = 0; i < prog->sources.count; i++) { + /* write source file name */ + write_string(prog->sources.entries[i]->filename, file); - /* write lineinfo data */ - write_vector(&prog->source->lineinfo, file); + /* include source buffer if program was compiled from stdin */ + if (prog->sources.entries[i]->buffer) + write_string(prog->sources.entries[i]->buffer, file); + else + //write_string("", file); + write_u32(0, file); + + /* write lineinfo data */ + write_vector(&prog->sources.entries[i]->lineinfo, file); + } } /* write constants */ write_vallist(&prog->constants, file); /* write program sections */ + i = 0; uc_program_function_foreach(prog, fn1) { (void)fn1; i++; @@ -542,57 +561,73 @@ read_vallist(FILE *file, uc_value_list_t *vallist, const char *subj, char **errp } static uc_source_t * -read_sourceinfo(uc_source_t *input, uint32_t flags, char **errp) +read_sourceinfo(uc_source_t *input, uint32_t flags, char **errp, uc_program_t *program) { char *path = NULL, *code = NULL; uc_source_t *source = NULL; - size_t len; + size_t len, count; if (flags & UC_PROGRAM_F_SOURCEINFO) { - if (!read_size_t(input->fp, &len, sizeof(uint32_t), "sourceinfo filename length", errp)) - goto out; + if (!read_size_t(input->fp, &count, sizeof(uint32_t), "amount of source entries", errp)) + return NULL; - path = xalloc(len); + while (count > 0) { + if (!read_size_t(input->fp, &len, sizeof(uint32_t), "sourceinfo filename length", errp)) + return NULL; - if (!read_string(input->fp, path, len, "sourceinfo filename", errp)) - goto out; + path = xalloc(len); + + if (!read_string(input->fp, path, len, "sourceinfo filename", errp)) { + free(path); + + return NULL; + } - if (flags & UC_PROGRAM_F_SOURCEBUF) { if (!read_size_t(input->fp, &len, sizeof(uint32_t), "sourceinfo code buffer length", errp)) - goto out; + return NULL; - code = xalloc(len); + if (len > 0) { + code = xalloc(len); - if (!read_string(input->fp, code, len, "sourceinfo code buffer data", errp)) { - free(code); - goto out; + if (!read_string(input->fp, code, len, "sourceinfo code buffer data", errp)) { + free(code); + free(path); + + return NULL; + } + + source = uc_source_new_buffer(path, code, len); } + else { + source = uc_source_new_file(path); - source = uc_source_new_buffer(path, code, len); - } - else { - source = uc_source_new_file(path); + if (!source) { + fprintf(stderr, "Unable to open source file %s: %s\n", path, strerror(errno)); + source = uc_source_new_buffer(path, xstrdup(""), 0); + } + } + + if (!read_vector(input->fp, &source->lineinfo, "sourceinfo lineinfo", errp)) { + uc_source_put(source); + free(path); - if (!source) { - fprintf(stderr, "Unable to open source file %s: %s\n", path, strerror(errno)); - source = uc_source_new_buffer(path, xstrdup(""), 0); + return NULL; } - } - if (!read_vector(input->fp, &source->lineinfo, "sourceinfo lineinfo", errp)) { - uc_source_put(source); - source = NULL; - goto out; + uc_source_runpath_set(source, input->runpath); + uc_vector_push(&program->sources, source); + + free(path); + + count--; } } else { source = uc_source_new_buffer("[no source]", xstrdup(""), 0); - } - - uc_source_runpath_set(source, input->runpath); -out: - free(path); + uc_source_runpath_set(source, input->runpath); + uc_vector_push(&program->sources, source); + } return source; } @@ -701,8 +736,10 @@ read_chunk(FILE *file, uc_chunk_t *chunk, uint32_t flags, const char *subj, char static bool read_function(FILE *file, uc_program_t *program, size_t idx, char **errp) { + size_t nargs, nupvals, srcidx, srcpos; char subjbuf[64], *name = NULL; uc_function_t *func = NULL; + uc_source_t *source; uint32_t flags, u32; snprintf(subjbuf, sizeof(subjbuf), "function #%zu flags", idx); @@ -726,15 +763,25 @@ read_function(FILE *file, uc_program_t *program, size_t idx, char **errp) snprintf(subjbuf, sizeof(subjbuf), "function #%zu (%s) arg count and offset", idx, name ? name : "-"); - func = (uc_function_t *)uc_program_function_new(program, name, 0); + if (!read_size_t(file, &nargs, sizeof(uint16_t), subjbuf, errp) || + !read_size_t(file, &nupvals, sizeof(uint16_t), subjbuf, errp) || + !read_size_t(file, &srcidx, sizeof(uint32_t), subjbuf, errp) || + !read_size_t(file, &srcpos, sizeof(uint32_t), subjbuf, errp)) { + goto out; + } + + // FIXME + if (srcidx < program->sources.count) + source = program->sources.entries[srcidx]; + else + source = program->sources.entries[0]; + + func = (uc_function_t *)uc_program_function_new(program, name, source, srcpos); func->arrow = (flags & UC_FUNCTION_F_IS_ARROW); func->vararg = (flags & UC_FUNCTION_F_IS_VARARG); func->strict = (flags & UC_FUNCTION_F_IS_STRICT); - - if (!read_size_t(file, &func->nargs, sizeof(uint16_t), subjbuf, errp) || - !read_size_t(file, &func->nupvals, sizeof(uint16_t), subjbuf, errp) || - !read_size_t(file, &func->srcpos, sizeof(uint32_t), subjbuf, errp)) - goto out; + func->nargs = nargs; + func->nupvals = nupvals; snprintf(subjbuf, sizeof(subjbuf), "function #%zu (%s) body", idx, name ? name : "-"); @@ -755,7 +802,6 @@ uc_program_t * uc_program_load(uc_source_t *input, char **errp) { uc_program_t *program = NULL; - uc_source_t *source = NULL; uint32_t flags, nfuncs, i; if (!read_u32(input->fp, &i, "file magic", errp)) @@ -769,15 +815,11 @@ uc_program_load(uc_source_t *input, char **errp) if (!read_u32(input->fp, &flags, "program flags", errp)) goto out; - source = read_sourceinfo(input, flags, errp); + program = uc_program_new(); - if (!source) + if (!read_sourceinfo(input, flags, errp, program)) goto out; - program = uc_program_new(source); - - uc_source_put(source); - if (!read_vallist(input->fp, &program->constants, "constants", errp)) goto out; diff --git a/types.c b/types.c index 2fba2078..d0b933d0 100644 --- a/types.c +++ b/types.c @@ -191,8 +191,8 @@ ucv_gc_mark(uc_value_t *uv) case UC_PROGRAM: program = (uc_program_t *)uv; - if (program->source) - ucv_gc_mark(&program->source->header); + for (i = 0; i < program->sources.count; i++) + ucv_gc_mark(&program->sources.entries[i]->header); break; @@ -283,7 +283,11 @@ ucv_free(uc_value_t *uv, bool retain) uc_program_function_free(func); uc_vallist_free(&program->constants); - ucv_put_value(&program->source->header, retain); + + for (i = 0; i < program->sources.count; i++) + ucv_put_value(&program->sources.entries[i]->header, retain); + + uc_vector_clear(&program->sources); break; case UC_SOURCE: diff --git a/vm.c b/vm.c index 29ace389..0529ee33 100644 --- a/vm.c +++ b/vm.c @@ -211,7 +211,7 @@ uc_vm_frame_program(uc_callframe_t *frame) static uc_source_t * uc_vm_frame_source(uc_callframe_t *frame) { - return frame->closure ? frame->closure->function->program->source : NULL; + return frame->closure ? uc_program_function_source(frame->closure->function) : NULL; } static uc_callframe_t * @@ -829,7 +829,7 @@ uc_vm_capture_stacktrace(uc_vm_t *vm, size_t i) if (frame->closure) { function = frame->closure->function; - source = function->program->source; + source = uc_program_function_source(function); off = (frame->ip - uc_vm_frame_chunk(frame)->entries) - 1; srcpos = uc_program_function_srcpos(function, off); From 50cf5723f9d41a5a65a6f5d38f8dfff4ff9422a5 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 14:52:19 +0200 Subject: [PATCH 10/21] program: add function to globally lookup exported name Add a helper function to query the global index of a named export within a specific source which is a prerequisite for compiling import statements. Signed-off-by: Jo-Philipp Wich --- include/ucode/program.h | 1 + program.c | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/ucode/program.h b/include/ucode/program.h index e5408077..4fb4a9b2 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -53,6 +53,7 @@ uc_source_t *uc_program_function_source(uc_function_t *); size_t uc_program_function_srcpos(uc_function_t *, size_t); void uc_program_function_free(uc_function_t *); +ssize_t uc_program_export_lookup(uc_program_t *, uc_source_t *, uc_value_t *); uc_value_t *uc_program_get_constant(uc_program_t *, size_t); ssize_t uc_program_add_constant(uc_program_t *, uc_value_t *); diff --git a/program.c b/program.c index 4321409d..f08e0cd0 100644 --- a/program.c +++ b/program.c @@ -846,3 +846,24 @@ uc_program_entry(uc_program_t *program) return (uc_function_t *)program->functions.prev; } + +ssize_t +uc_program_export_lookup(uc_program_t *program, uc_source_t *source, uc_value_t *name) +{ + size_t i, off; + ssize_t slot; + + for (i = 0, off = 0; i < program->sources.count; i++) { + if (program->sources.entries[i] != source) { + off += program->sources.entries[i]->exports.count; + continue; + } + + slot = uc_source_export_lookup(source, name); + + if (slot > -1) + return off + slot; + } + + return -1; +} From 341896786c604d3f37e3095cdef16d786192f014 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 09:22:40 +0200 Subject: [PATCH 11/21] vm: gracefully handle unresolved upvalues Upcoming module support will rely on upresolved upvalues which are patched at runtime to realize module imports, make sure the VM trace code does not choke on such unresolved upvalues. Signed-off-by: Jo-Philipp Wich --- vm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vm.c b/vm.c index 0529ee33..d0fe80e6 100644 --- a/vm.c +++ b/vm.c @@ -345,7 +345,10 @@ uc_vm_frame_dump(uc_vm_t *vm, uc_callframe_t *frame) fprintf(stderr, " [%zu] <%p> %s ", i, (void *)ref, uc_vm_format_val(vm, v)); - if (ref->closed) { + if (!ref) { + fprintf(stderr, "{unresolved}\n"); + } + else if (ref->closed) { fprintf(stderr, "{closed} %s\n", uc_vm_format_val(vm, ref->value)); } From 6becc643230180c8985d135007c344a2fa966552 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 28 Jul 2022 11:16:34 +0200 Subject: [PATCH 12/21] vm: transparently resolve upvalue references Resolve upvalue references to their actual values when pushing such references onto the stack (or when attempting to call them as method). This allows constructing objects of pointers, as needed for wildcard module imports. Signed-off-by: Jo-Philipp Wich --- vm.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/vm.c b/vm.c index d0fe80e6..61fc7f3d 100644 --- a/vm.c +++ b/vm.c @@ -363,20 +363,43 @@ uc_vm_frame_dump(uc_vm_t *vm, uc_callframe_t *frame) } } +static uc_value_t * +uc_vm_resolve_upval(uc_vm_t *vm, uc_value_t *value) +{ + uc_upvalref_t *ref; + +#ifdef __clang_analyzer__ + /* Clang static analyzer does not understand that ucv_type(NULL) can't + * possibly yield UC_UPVALUE. Nudge it. */ + if (value != NULL && ucv_type(value) == UC_UPVALUE) +#else + if (ucv_type(value) == UC_UPVALUE) +#endif + { + ref = (uc_upvalref_t *)value; + + if (ref->closed) + return ucv_get(ref->value); + else + return ucv_get(vm->stack.entries[ref->slot]); + } + + return value; +} + void uc_vm_stack_push(uc_vm_t *vm, uc_value_t *value) { uc_vector_grow(&vm->stack); ucv_put(vm->stack.entries[vm->stack.count]); - - vm->stack.entries[vm->stack.count] = value; + vm->stack.entries[vm->stack.count] = uc_vm_resolve_upval(vm, value); vm->stack.count++; if (vm->trace) { fprintf(stderr, " [+%zd] %s\n", vm->stack.count - 1, - uc_vm_format_val(vm, value)); + uc_vm_format_val(vm, vm->stack.entries[vm->stack.count - 1])); } } @@ -2230,7 +2253,7 @@ uc_vm_insn_mcall(uc_vm_t *vm, uc_vm_insn_t insn) size_t key_slot = vm->stack.count - (vm->arg.u32 & 0xffff) - 1; uc_value_t *ctx = vm->stack.entries[key_slot - 1]; uc_value_t *key = vm->stack.entries[key_slot]; - uc_value_t *fno = ucv_key_get(vm, ctx, key); + uc_value_t *fno = uc_vm_resolve_upval(vm, ucv_key_get(vm, ctx, key)); if (!ucv_is_callable(fno) && insn == I_QMCALL) return uc_vm_skip_call(vm, true); From 365782e002255c67b81cf96471fe41cfa6f6b714 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 28 Jul 2022 10:47:58 +0200 Subject: [PATCH 13/21] vm: honor constant flag of objects and arrays Reject modifications on object and array values with a type exception when the constant flag is set on the value operated upon. Signed-off-by: Jo-Philipp Wich --- vm.c | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/vm.c b/vm.c index 61fc7f3d..7e8b12af 100644 --- a/vm.c +++ b/vm.c @@ -1230,6 +1230,21 @@ uc_vm_insn_store_var(uc_vm_t *vm, uc_vm_insn_t insn) uc_vm_stack_push(vm, v); } +static bool +assert_mutable_value(uc_vm_t *vm, uc_value_t *val) +{ + if (ucv_is_constant(val)) { + uc_vm_stack_push(vm, NULL); + uc_vm_raise_exception(vm, EXCEPTION_TYPE, + "%s value is immutable", + ucv_typename(val)); + + return false; + } + + return true; +} + static void uc_vm_insn_store_val(uc_vm_t *vm, uc_vm_insn_t insn) { @@ -1240,7 +1255,9 @@ uc_vm_insn_store_val(uc_vm_t *vm, uc_vm_insn_t insn) switch (ucv_type(o)) { case UC_OBJECT: case UC_ARRAY: - uc_vm_stack_push(vm, ucv_key_set(vm, o, k, v)); + if (assert_mutable_value(vm, o)) + uc_vm_stack_push(vm, ucv_key_set(vm, o, k, v)); + break; default: @@ -1710,8 +1727,11 @@ uc_vm_insn_update_val(uc_vm_t *vm, uc_vm_insn_t insn) switch (ucv_type(v)) { case UC_OBJECT: case UC_ARRAY: - val = ucv_key_get(vm, v, k); - uc_vm_stack_push(vm, ucv_key_set(vm, v, k, uc_vm_value_arith(vm, vm->arg.u8, val, inc))); + if (assert_mutable_value(vm, v)) { + val = ucv_key_get(vm, v, k); + uc_vm_stack_push(vm, ucv_key_set(vm, v, k, uc_vm_value_arith(vm, vm->arg.u8, val, inc))); + } + break; default: @@ -2306,8 +2326,11 @@ uc_vm_insn_delete(uc_vm_t *vm, uc_vm_insn_t insn) switch (ucv_type(v)) { case UC_OBJECT: - rv = ucv_key_delete(vm, v, k); - uc_vm_stack_push(vm, ucv_boolean_new(rv)); + if (assert_mutable_value(vm, v)) { + rv = ucv_key_delete(vm, v, k); + uc_vm_stack_push(vm, ucv_boolean_new(rv)); + } + break; default: From d85bc716df9b97ac6093afa0bdf77c1b6b0cf6aa Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 10:28:47 +0200 Subject: [PATCH 14/21] vm: introduce import and export opcodes Introduce new opcodes to realize module imports and exports. The export operation will capture a local variable as upvalue and store it in VM wide module export registry while the import operation will connect an upvalue from the module export registry with a preallocated upvalue in the running function scope. Signed-off-by: Jo-Philipp Wich --- include/ucode/types.h | 2 ++ include/ucode/vm.h | 4 ++- vm.c | 83 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) diff --git a/include/ucode/types.h b/include/ucode/types.h index 14f2a43b..b8107440 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -254,6 +254,7 @@ typedef struct { uc_declare_vector(uc_callframes_t, uc_callframe_t); uc_declare_vector(uc_stack_t, uc_value_t *); +uc_declare_vector(uc_modexports_t, uc_upvalref_t *); typedef struct printbuf uc_stringbuf_t; @@ -270,6 +271,7 @@ struct uc_vm { uc_source_t *sources; uc_weakref_t values; uc_resource_types_t restypes; + uc_modexports_t exports; union { uint32_t u32; int32_t s32; diff --git a/include/ucode/vm.h b/include/ucode/vm.h index 83774467..cc57fdb2 100644 --- a/include/ucode/vm.h +++ b/include/ucode/vm.h @@ -95,7 +95,9 @@ __insn(QMCALL) \ __insn(PRINT) \ __insn(NEXTK) \ __insn(NEXTKV) \ -__insn(DELETE) +__insn(DELETE) \ +__insn(IMPORT) \ +__insn(EXPORT) #undef __insn diff --git a/vm.c b/vm.c index 7e8b12af..1ef3bddf 100644 --- a/vm.c +++ b/vm.c @@ -73,6 +73,9 @@ static const int8_t insn_operand_bytes[__I_MAX] = { [I_MCALL] = 4, [I_QCALL] = 4, [I_QMCALL] = 4, + + [I_IMPORT] = 4, + [I_EXPORT] = 4 }; static const char *exception_type_strings[] = { @@ -181,6 +184,9 @@ void uc_vm_free(uc_vm_t *vm) for (i = 0; i < vm->restypes.count; i++) ucv_put(vm->restypes.entries[i]->proto); + for (i = 0; i < vm->exports.count; i++) + ucv_put(&vm->exports.entries[i]->header); + uc_vm_reset_callframes(vm); uc_vm_reset_stack(vm); uc_vector_clear(&vm->stack); @@ -194,6 +200,7 @@ void uc_vm_free(uc_vm_t *vm) free(vm->restypes.entries[i]); uc_vector_clear(&vm->restypes); + uc_vector_clear(&vm->exports); } static uc_chunk_t * @@ -2345,6 +2352,74 @@ uc_vm_insn_delete(uc_vm_t *vm, uc_vm_insn_t insn) ucv_put(v); } +static void +uc_vm_insn_import(uc_vm_t *vm, uc_vm_insn_t insn) +{ + uc_callframe_t *frame = uc_vm_current_frame(vm); + uint16_t from = vm->arg.u32 & 0xffff; + uint16_t to = vm->arg.u32 >> 16; + uc_value_t *name, *modobj; + uint32_t cidx; + + /* is a wildcard import * from ... */ + if (to == 0xffff) { + to = from; + modobj = ucv_object_new(vm); + + /* instruction is followed by u16 containing the offset of the + * first module export and `from` times u32 values containing + * the constant indexes of the names */ + for (from = frame->ip[0] * 0x100 + frame->ip[1], frame->ip += 2; + from < to && from < vm->exports.count; + from++) { + + cidx = ( + frame->ip[0] * 0x1000000UL + + frame->ip[1] * 0x10000UL + + frame->ip[2] * 0x100UL + + frame->ip[3] + ); + + frame->ip += 4; + + name = uc_program_get_constant(uc_vm_current_program(vm), cidx); + + if (ucv_type(name) == UC_STRING && vm->exports.entries[from]) + ucv_object_add(modobj, ucv_string_get(name), + ucv_get(&vm->exports.entries[from]->header)); + + ucv_put(name); + } + + ucv_set_constant(modobj, true); + + uc_vm_stack_push(vm, modobj); + } + + /* module export available, patch into upvalue */ + else if (from < vm->exports.count && vm->exports.entries[from]) { + frame->closure->upvals[to] = vm->exports.entries[from]; + ucv_get(&vm->exports.entries[from]->header); + } + + /* module export missing, e.g. due to premature return in module, + * patch up dummy upvalue ref with `null` value */ + else { + frame->closure->upvals[to] = (uc_upvalref_t *)ucv_upvalref_new(0); + frame->closure->upvals[to]->closed = true; + } +} + +static void +uc_vm_insn_export(uc_vm_t *vm, uc_vm_insn_t insn) +{ + uc_callframe_t *frame = uc_vm_current_frame(vm); + uc_upvalref_t *ref = uc_vm_capture_upval(vm, frame->stackframe + vm->arg.u32); + + uc_vector_push(&vm->exports, ref); + ucv_get(&ref->header); +} + static uc_value_t * uc_vm_callframe_pop(uc_vm_t *vm) { @@ -2632,6 +2707,14 @@ uc_vm_execute_chunk(uc_vm_t *vm) uc_vm_insn_delete(vm, insn); break; + case I_IMPORT: + uc_vm_insn_import(vm, insn); + break; + + case I_EXPORT: + uc_vm_insn_export(vm, insn); + break; + default: uc_vm_raise_exception(vm, EXCEPTION_RUNTIME, "unknown opcode %d", insn); break; From 3c168b5184ebd217ea276bf374d28bbf937681fd Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 21 Jul 2022 10:49:23 +0200 Subject: [PATCH 15/21] vm, cli: move search path into global configuration structure The upcoming compile-time module support will require the configured extension search path in the compiler as well, so move it to the already shared uc_parse_config_t structure and add the appropriate utility functions to initialize, append and free the search path vector. Signed-off-by: Jo-Philipp Wich --- include/ucode/types.h | 20 +++++++++++++++ main.c | 60 ++++++++++++++++++++++++++----------------- types.c | 18 +++++++++++++ vm.c | 7 +++-- 4 files changed, 77 insertions(+), 28 deletions(-) diff --git a/include/ucode/types.h b/include/ucode/types.h index b8107440..c32829f2 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -217,13 +217,33 @@ typedef struct uc_program { /* Parser definitions */ +uc_declare_vector(uc_search_path_t, char *); + typedef struct { bool lstrip_blocks; bool trim_blocks; bool strict_declarations; bool raw_mode; + uc_search_path_t module_search_path; } uc_parse_config_t; +extern uc_parse_config_t uc_default_parse_config; + +void uc_search_path_init(uc_search_path_t *search_path); + +static inline void +uc_search_path_add(uc_search_path_t *search_path, char *path) { + uc_vector_push(search_path, xstrdup(path)); +} + +static inline void +uc_search_path_free(uc_search_path_t *search_path) { + while (search_path->count > 0) + free(search_path->entries[--search_path->count]); + + uc_vector_clear(search_path); +} + /* VM definitions */ diff --git a/main.c b/main.c index d98b5d5e..087efff5 100644 --- a/main.c +++ b/main.c @@ -387,14 +387,13 @@ parse_define_string(char *opt, uc_value_t *globals) } static void -parse_search_path(char *pattern, uc_value_t *globals) +parse_search_path(char *pattern, uc_parse_config_t *config) { - uc_value_t *rsp = ucv_object_get(globals, "REQUIRE_SEARCH_PATH", NULL); size_t len; char *p; if (strchr(pattern, '*')) { - ucv_array_push(rsp, ucv_string_new(pattern)); + uc_search_path_add(&config->module_search_path, pattern); return; } @@ -407,11 +406,11 @@ parse_search_path(char *pattern, uc_value_t *globals) pattern[--len] = 0; xasprintf(&p, "%s/*.so", pattern); - ucv_array_push(rsp, ucv_string_new(p)); + uc_search_path_add(&config->module_search_path, p); free(p); xasprintf(&p, "%s/*.uc", pattern); - ucv_array_push(rsp, ucv_string_new(p)); + uc_search_path_add(&config->module_search_path, p); free(p); } @@ -462,6 +461,7 @@ appname(const char *argv0) int main(int argc, char **argv) { + const char *optspec = "he:tST::RD:F:U:l:L:c::o:s"; char *interp = "/usr/bin/env ucode"; uc_source_t *source = NULL; FILE *precompile = NULL; @@ -480,6 +480,8 @@ main(int argc, char **argv) .raw_mode = true }; + uc_search_path_init(&config.module_search_path); + app = appname(argv[0]); if (argc == 1) { @@ -494,6 +496,31 @@ main(int argc, char **argv) stdin_unused = stdin; + /* parse options iteration 1: parse config related options */ + while ((opt = getopt(argc, argv, optspec)) != -1) + { + switch (opt) { + case 'L': + parse_search_path(optarg, &config); + break; + + case 'S': + config.strict_declarations = true; + break; + + case 'R': + config.raw_mode = true; + break; + + case 'T': + config.raw_mode = false; + parse_template_modeflags(optarg, &config); + break; + } + } + + optind = 1; + uc_vm_init(&vm, &config); /* load std functions into global scope */ @@ -504,8 +531,8 @@ main(int argc, char **argv) ucv_object_add(uc_vm_scope_get(&vm), "ARGV", ucv_get(o)); - /* parse options */ - while ((opt = getopt(argc, argv, "he:tST::RD:F:U:l:L:c::o:s")) != -1) + /* parse options iteration 2: process remaining options */ + while ((opt = getopt(argc, argv, optspec)) != -1) { switch (opt) { case 'h': @@ -520,19 +547,6 @@ main(int argc, char **argv) uc_vm_trace_set(&vm, 1); break; - case 'S': - config.strict_declarations = true; - break; - - case 'R': - config.raw_mode = true; - break; - - case 'T': - config.raw_mode = false; - parse_template_modeflags(optarg, &config); - break; - case 'D': if (!parse_define_string(optarg, uc_vm_scope_get(&vm))) { rv = 1; @@ -553,10 +567,6 @@ main(int argc, char **argv) ucv_object_delete(uc_vm_scope_get(&vm), optarg); break; - case 'L': - parse_search_path(optarg, uc_vm_scope_get(&vm)); - break; - case 'l': if (!parse_library_load(optarg, &vm)) { rv = 1; @@ -629,6 +639,8 @@ main(int argc, char **argv) rv = compile(&vm, source, precompile, strip, interp); out: + uc_search_path_free(&config.module_search_path); + uc_source_put(source); uc_vm_free(&vm); diff --git a/types.c b/types.c index d0b933d0..3a3e35c9 100644 --- a/types.c +++ b/types.c @@ -30,6 +30,15 @@ #include "ucode/vm.h" #include "ucode/program.h" +static char *uc_default_search_path[] = { LIB_SEARCH_PATH }; + +uc_parse_config_t uc_default_parse_config = { + .module_search_path = { + .count = ARRAY_SIZE(uc_default_search_path), + .entries = uc_default_search_path + } +}; + uc_type_t ucv_type(uc_value_t *uv) { @@ -2245,3 +2254,12 @@ ucv_freeall(uc_vm_t *vm) { ucv_gc_common(vm, true); } + +void +uc_search_path_init(uc_search_path_t *search_path) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(uc_default_search_path); i++) + uc_vector_push(search_path, xstrdup(uc_default_search_path[i])); +} diff --git a/vm.c b/vm.c index 1ef3bddf..ad5cd44b 100644 --- a/vm.c +++ b/vm.c @@ -111,7 +111,6 @@ uc_vm_reset_callframes(uc_vm_t *vm) static uc_value_t * uc_vm_alloc_global_scope(uc_vm_t *vm) { - const char *path[] = { LIB_SEARCH_PATH }; uc_value_t *scope, *arr; size_t i; @@ -120,8 +119,8 @@ uc_vm_alloc_global_scope(uc_vm_t *vm) /* build default require() search path */ arr = ucv_array_new(vm); - for (i = 0; i < ARRAY_SIZE(path); i++) - ucv_array_push(arr, ucv_string_new(path[i])); + for (i = 0; i < vm->config->module_search_path.count; i++) + ucv_array_push(arr, ucv_string_new(vm->config->module_search_path.entries[i])); /* register module related constants */ ucv_object_add(scope, "REQUIRE_SEARCH_PATH", arr); @@ -147,7 +146,7 @@ void uc_vm_init(uc_vm_t *vm, uc_parse_config_t *config) vm->exception.type = EXCEPTION_NONE; vm->exception.message = NULL; - vm->config = config; + vm->config = config ? config : &uc_default_parse_config; vm->open_upvals = NULL; From e1c3db05168d0417b3ac5465517c7e6957b28bb5 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 28 Jul 2022 20:20:57 +0200 Subject: [PATCH 16/21] tests: run_tests.sh: substitute dynamic test directory path in output Replace all occurrences for the test file directory path with "." in stderr and stdout results to ensure stable test outputs. Signed-off-by: Jo-Philipp Wich --- tests/custom/run_tests.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/custom/run_tests.sh b/tests/custom/run_tests.sh index fb92379d..96ac783e 100755 --- a/tests/custom/run_tests.sh +++ b/tests/custom/run_tests.sh @@ -105,6 +105,8 @@ run_testcase() { printf "%d\n" $? > "$dir/res.code" touch "$dir/empty" + sed -i -e "s#$dir#.#g" "$dir/res.out" "$dir/res.err" + if ! cmp -s "$dir/res.err" "${err:-$dir/empty}"; then [ $fail = 0 ] && printf "!\n" printf "Testcase #%d: Expected stderr did not match:\n" $num From afd78c1f30ce5d32d0a8dbce72e76d7871903f2f Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Fri, 29 Jul 2022 12:25:54 +0200 Subject: [PATCH 17/21] compiler: fix reported source position in inc/dec operator error Report the proper source location when raising an error due to an increment/decrement operation on a constant value. Signed-off-by: Jo-Philipp Wich --- compiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler.c b/compiler.c index d03ce5e2..26377d89 100644 --- a/compiler.c +++ b/compiler.c @@ -871,7 +871,7 @@ uc_compiler_emit_inc_dec(uc_compiler_t *compiler, uc_tokentype_t toktype, bool i varname = compiler->upvals.entries[cidx].name; if (varname) - uc_compiler_syntax_error(compiler, 0, + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, "Invalid increment/decrement of constant '%s'", ucv_string_get(varname)); From 78dfb08f7569904394249758b2d7a563366b60ff Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 09:15:59 +0200 Subject: [PATCH 18/21] compiler: require a name in function declarations So far we allowed anonymous toplevel function expressions which makes little sense since those can't be used for anything. Require toplevel function declarations to be named and turn a missing name into a compile time syntax error. Signed-off-by: Jo-Philipp Wich --- compiler.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/compiler.c b/compiler.c index 26377d89..db8917b3 100644 --- a/compiler.c +++ b/compiler.c @@ -36,7 +36,7 @@ static void uc_compiler_compile_constant(uc_compiler_t *compiler); static void uc_compiler_compile_template(uc_compiler_t *compiler); static void uc_compiler_compile_comma(uc_compiler_t *compiler); static void uc_compiler_compile_labelexpr(uc_compiler_t *compiler); -static void uc_compiler_compile_function(uc_compiler_t *compiler); +static void uc_compiler_compile_funcexpr(uc_compiler_t *compiler); static void uc_compiler_compile_and(uc_compiler_t *compiler); static void uc_compiler_compile_or(uc_compiler_t *compiler); static void uc_compiler_compile_nullish(uc_compiler_t *compiler); @@ -76,7 +76,7 @@ uc_compiler_parse_rules[TK_ERROR + 1] = { [TK_TEMPLATE] = { uc_compiler_compile_template, NULL, P_NONE }, [TK_COMMA] = { NULL, uc_compiler_compile_comma, P_COMMA }, [TK_LABEL] = { uc_compiler_compile_labelexpr, NULL, P_NONE }, - [TK_FUNC] = { uc_compiler_compile_function, NULL, P_NONE }, + [TK_FUNC] = { uc_compiler_compile_funcexpr, NULL, P_NONE }, [TK_AND] = { NULL, uc_compiler_compile_and, P_AND }, [TK_OR] = { NULL, uc_compiler_compile_or, P_OR }, [TK_NULLISH] = { NULL, uc_compiler_compile_nullish, P_OR }, @@ -1537,7 +1537,7 @@ uc_compiler_compile_delimitted_block(uc_compiler_t *compiler, uc_tokentype_t end } static void -uc_compiler_compile_function(uc_compiler_t *compiler) +uc_compiler_compile_funcexpr_common(uc_compiler_t *compiler, bool require_name) { uc_compiler_t fncompiler = { 0 }; uc_value_t *name = NULL; @@ -1560,6 +1560,9 @@ uc_compiler_compile_function(uc_compiler_t *compiler) if (slot == -1) uc_compiler_initialize_local(compiler); } + else if (require_name) { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, "Expecting function name"); + } uc_compiler_init(&fncompiler, name ? ucv_string_get(name) : NULL, @@ -1646,6 +1649,18 @@ uc_compiler_compile_function(uc_compiler_t *compiler) } } +static void +uc_compiler_compile_funcexpr(uc_compiler_t *compiler) +{ + return uc_compiler_compile_funcexpr_common(compiler, false); +} + +static void +uc_compiler_compile_funcdecl(uc_compiler_t *compiler) +{ + return uc_compiler_compile_funcexpr_common(compiler, true); +} + static void uc_compiler_compile_and(uc_compiler_t *compiler) { @@ -2877,7 +2892,7 @@ uc_compiler_compile_statement(uc_compiler_t *compiler) else if (uc_compiler_parse_match(compiler, TK_TRY)) uc_compiler_compile_try(compiler); else if (uc_compiler_parse_match(compiler, TK_FUNC)) - uc_compiler_compile_function(compiler); + uc_compiler_compile_funcdecl(compiler); else if (uc_compiler_parse_match(compiler, TK_BREAK)) uc_compiler_compile_control(compiler); else if (uc_compiler_parse_match(compiler, TK_CONTINUE)) From 862e49de33bd07daea129d553968579019c79b59 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Wed, 20 Jul 2022 09:38:16 +0200 Subject: [PATCH 19/21] compiler: resolve predeclared upvalues Do not require a parent function compiler reference to lookup an already declared (potentially unresolved) upvalue in the current scope. Instead, search the named upvalues in the current function scope in case there is no parent compiler reference. This is required for the upcoming module support which will use unresolved upvalues to realize import/export functionality. Signed-off-by: Jo-Philipp Wich --- compiler.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/compiler.c b/compiler.c index db8917b3..70d15d65 100644 --- a/compiler.c +++ b/compiler.c @@ -790,10 +790,22 @@ uc_compiler_add_upval(uc_compiler_t *compiler, size_t idx, bool local, uc_value_ static ssize_t uc_compiler_resolve_upval(uc_compiler_t *compiler, uc_value_t *name, bool *constant) { + uc_upvals_t *upvals = &compiler->upvals; + uc_upval_t *uv; ssize_t idx; + size_t i; + + if (!compiler->parent) { + for (i = 0, uv = upvals->entries; i < upvals->count; i++, uv = upvals->entries + i) { + if (ucv_is_equal(uv->name, name) && uv->local == false) { + *constant = uv->constant; + + return i; + } + } - if (!compiler->parent) return -1; + } idx = uc_compiler_resolve_local(compiler->parent, name, constant); From 10e056d3744384a029f05de5903c489898722fc3 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Sun, 17 Jul 2022 23:21:03 +0200 Subject: [PATCH 20/21] compiler: add support for import/export statements This commit introduces syntax level support for ES6 style module import and export statements. Imports are resolved at compile time and the corresponding module code is compiled into the main program. Also add testcases to cover import and export statement semantics. Signed-off-by: Jo-Philipp Wich --- compiler.c | 546 +++++++++++++++++- include/ucode/program.h | 2 + .../04_modules/01_export_variable_declaration | 29 + .../04_modules/02_export_function_declaration | 22 + tests/custom/04_modules/03_export_list | 27 + tests/custom/04_modules/04_export_rename | 28 + tests/custom/04_modules/05_export_default | 38 ++ tests/custom/04_modules/06_export_errors | 89 +++ tests/custom/04_modules/07_import_default | 99 ++++ tests/custom/04_modules/08_import_list | 105 ++++ tests/custom/04_modules/09_import_wildcard | 73 +++ tests/custom/04_modules/10_import_none | 18 + .../04_modules/11_import_many_exec_once | 28 + .../custom/04_modules/12_import_immutability | 52 ++ tests/custom/04_modules/13_import_liveness | 29 + .../01_try_catch_stack_mismatch | 0 .../02_array_pop_use_after_free | 0 .../03_switch_fallthrough_miscompilation | 0 .../04_property_set_abort | 0 .../05_duplicate_resource_type | 0 .../06_lexer_escape_at_boundary | 0 .../07_lexer_overlong_lines | 0 .../08_compiler_arrow_fn_expressions | 0 .../09_reject_invalid_array_indexes | 0 .../10_break_stack_mismatch | 0 .../11_switch_stack_mismatch | 0 .../12_altblock_stack_mismatch | 0 .../13_split_by_string_leading_trailing | 0 .../14_incomplete_expression_at_eof | 0 .../15_segfault_on_prefix_increment | 0 .../16_hang_on_regexp_at_eof | 0 .../17_hang_on_unclosed_expression_block | 0 .../18_hang_on_line_comments_at_eof | 0 .../19_truncated_format_string | 0 .../20_use_strict_stack_mismatch | 0 .../21_compiler_parenthesized_prop_keyword | 0 .../22_compiler_break_continue_scoping | 0 .../23_compiler_parenthesized_division | 0 .../24_compiler_local_for_loop_declaration | 0 .../25_lexer_shifted_offsets | 0 .../26_compiler_jmp_to_zero | 0 .../27_invalid_sparse_array_set | 0 .../{04_bugs => 99_bugs}/28_null_equality | 0 .../29_empty_string_as_number | 0 .../30_nan_strict_equality | 0 .../31_vallist_8bit_shortstrings | 0 .../32_compiler_switch_patchlist_corruption | 0 .../33_vm_computed_prop_decl_crash | 0 .../34_dirname_off_by_one | 0 .../35_vm_callframe_double_free | 0 .../36_vm_nested_call_return | 0 .../37_compiler_unexpected_unary_op | 0 .../{04_bugs => 99_bugs}/38_index_segfault | 0 .../39_compiler_switch_continue_mismatch | 0 .../40_lexer_bug_on_lstrip_off | 0 55 files changed, 1175 insertions(+), 10 deletions(-) create mode 100644 tests/custom/04_modules/01_export_variable_declaration create mode 100644 tests/custom/04_modules/02_export_function_declaration create mode 100644 tests/custom/04_modules/03_export_list create mode 100644 tests/custom/04_modules/04_export_rename create mode 100644 tests/custom/04_modules/05_export_default create mode 100644 tests/custom/04_modules/06_export_errors create mode 100644 tests/custom/04_modules/07_import_default create mode 100644 tests/custom/04_modules/08_import_list create mode 100644 tests/custom/04_modules/09_import_wildcard create mode 100644 tests/custom/04_modules/10_import_none create mode 100644 tests/custom/04_modules/11_import_many_exec_once create mode 100644 tests/custom/04_modules/12_import_immutability create mode 100644 tests/custom/04_modules/13_import_liveness rename tests/custom/{04_bugs => 99_bugs}/01_try_catch_stack_mismatch (100%) rename tests/custom/{04_bugs => 99_bugs}/02_array_pop_use_after_free (100%) rename tests/custom/{04_bugs => 99_bugs}/03_switch_fallthrough_miscompilation (100%) rename tests/custom/{04_bugs => 99_bugs}/04_property_set_abort (100%) rename tests/custom/{04_bugs => 99_bugs}/05_duplicate_resource_type (100%) rename tests/custom/{04_bugs => 99_bugs}/06_lexer_escape_at_boundary (100%) rename tests/custom/{04_bugs => 99_bugs}/07_lexer_overlong_lines (100%) rename tests/custom/{04_bugs => 99_bugs}/08_compiler_arrow_fn_expressions (100%) rename tests/custom/{04_bugs => 99_bugs}/09_reject_invalid_array_indexes (100%) rename tests/custom/{04_bugs => 99_bugs}/10_break_stack_mismatch (100%) rename tests/custom/{04_bugs => 99_bugs}/11_switch_stack_mismatch (100%) rename tests/custom/{04_bugs => 99_bugs}/12_altblock_stack_mismatch (100%) rename tests/custom/{04_bugs => 99_bugs}/13_split_by_string_leading_trailing (100%) rename tests/custom/{04_bugs => 99_bugs}/14_incomplete_expression_at_eof (100%) rename tests/custom/{04_bugs => 99_bugs}/15_segfault_on_prefix_increment (100%) rename tests/custom/{04_bugs => 99_bugs}/16_hang_on_regexp_at_eof (100%) rename tests/custom/{04_bugs => 99_bugs}/17_hang_on_unclosed_expression_block (100%) rename tests/custom/{04_bugs => 99_bugs}/18_hang_on_line_comments_at_eof (100%) rename tests/custom/{04_bugs => 99_bugs}/19_truncated_format_string (100%) rename tests/custom/{04_bugs => 99_bugs}/20_use_strict_stack_mismatch (100%) rename tests/custom/{04_bugs => 99_bugs}/21_compiler_parenthesized_prop_keyword (100%) rename tests/custom/{04_bugs => 99_bugs}/22_compiler_break_continue_scoping (100%) rename tests/custom/{04_bugs => 99_bugs}/23_compiler_parenthesized_division (100%) rename tests/custom/{04_bugs => 99_bugs}/24_compiler_local_for_loop_declaration (100%) rename tests/custom/{04_bugs => 99_bugs}/25_lexer_shifted_offsets (100%) rename tests/custom/{04_bugs => 99_bugs}/26_compiler_jmp_to_zero (100%) rename tests/custom/{04_bugs => 99_bugs}/27_invalid_sparse_array_set (100%) rename tests/custom/{04_bugs => 99_bugs}/28_null_equality (100%) rename tests/custom/{04_bugs => 99_bugs}/29_empty_string_as_number (100%) rename tests/custom/{04_bugs => 99_bugs}/30_nan_strict_equality (100%) rename tests/custom/{04_bugs => 99_bugs}/31_vallist_8bit_shortstrings (100%) rename tests/custom/{04_bugs => 99_bugs}/32_compiler_switch_patchlist_corruption (100%) rename tests/custom/{04_bugs => 99_bugs}/33_vm_computed_prop_decl_crash (100%) rename tests/custom/{04_bugs => 99_bugs}/34_dirname_off_by_one (100%) rename tests/custom/{04_bugs => 99_bugs}/35_vm_callframe_double_free (100%) rename tests/custom/{04_bugs => 99_bugs}/36_vm_nested_call_return (100%) rename tests/custom/{04_bugs => 99_bugs}/37_compiler_unexpected_unary_op (100%) rename tests/custom/{04_bugs => 99_bugs}/38_index_segfault (100%) rename tests/custom/{04_bugs => 99_bugs}/39_compiler_switch_continue_mismatch (100%) rename tests/custom/{04_bugs => 99_bugs}/40_lexer_bug_on_lstrip_off (100%) diff --git a/compiler.c b/compiler.c index 70d15d65..4878be1d 100644 --- a/compiler.c +++ b/compiler.c @@ -497,12 +497,23 @@ uc_compiler_set_u32(uc_compiler_t *compiler, size_t off, uint32_t n) } static size_t -uc_compiler_emit_constant(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) +uc_compiler_emit_constant_index(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) { size_t cidx = uc_program_add_constant(compiler->program, val); + uc_compiler_emit_u32(compiler, srcpos, cidx); + + return cidx; +} + +static size_t +uc_compiler_emit_constant(uc_compiler_t *compiler, size_t srcpos, uc_value_t *val) +{ + size_t cidx; + uc_compiler_emit_insn(compiler, srcpos, I_LOAD); - uc_compiler_emit_u32(compiler, 0, cidx); + + cidx = uc_compiler_emit_constant_index(compiler, srcpos, val); return cidx; } @@ -2924,13 +2935,516 @@ uc_compiler_compile_statement(uc_compiler_t *compiler) } static void -uc_compiler_compile_declaration(uc_compiler_t *compiler) +uc_compiler_export_add(uc_compiler_t *compiler, uc_value_t *name, ssize_t slot) +{ + uc_source_t *source = uc_compiler_current_source(compiler); + + if (!uc_source_export_add(source, name)) { + if (name) + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Duplicate export '%s' for module '%s'", ucv_string_get(name), source->filename); + else + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Duplicate default export for module '%s'", source->filename); + } + else { + uc_compiler_emit_insn(compiler, 0, I_EXPORT); + uc_compiler_emit_u32(compiler, 0, slot); + } +} + +static void +uc_compiler_compile_exportlist(uc_compiler_t *compiler) +{ + uc_value_t *label, *name; + bool constant; + ssize_t slot; + + /* parse export symbols */ + do { + uc_compiler_parse_consume(compiler, TK_LABEL); + + label = ucv_get(compiler->parser->prev.uv); + name = NULL; + + slot = uc_compiler_resolve_local(compiler, label, &constant); + + if (slot == -1) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Attempt to export undeclared or non-local variable '%s'", + ucv_string_get(label)); + } + + if (uc_compiler_parse_match(compiler, TK_AS)) { + if (uc_compiler_parse_match(compiler, TK_LABEL) || uc_compiler_parse_match(compiler, TK_STRING)) { + name = ucv_get(compiler->parser->prev.uv); + } + else if (!uc_compiler_parse_match(compiler, TK_DEFAULT)) { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting Label, String or 'default'"); + } + } + else { + name = ucv_get(label); + } + + uc_compiler_export_add(compiler, name, slot); + + ucv_put(label); + ucv_put(name); + + if (uc_compiler_parse_match(compiler, TK_RBRACE)) + break; + } + while (uc_compiler_parse_match(compiler, TK_COMMA)); + + uc_compiler_parse_consume(compiler, TK_SCOL); +} + +static void +uc_compiler_compile_export(uc_compiler_t *compiler) +{ + uc_locals_t *locals = &compiler->locals; + size_t off = locals->count; + uc_value_t *name; + ssize_t slot; + + if (compiler->program->sources.count == 1 || compiler->scope_depth) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Exports may only appear at top level of a module"); + + return; + } + + if (uc_compiler_parse_match(compiler, TK_LBRACE)) { + uc_compiler_compile_exportlist(compiler); + + return; + } + + if (uc_compiler_parse_match(compiler, TK_LOCAL)) + uc_compiler_compile_declexpr(compiler, false); + else if (uc_compiler_parse_match(compiler, TK_CONST)) + uc_compiler_compile_declexpr(compiler, true); + else if (uc_compiler_parse_match(compiler, TK_FUNC)) + uc_compiler_compile_funcdecl(compiler); + else if (uc_compiler_parse_match(compiler, TK_DEFAULT)) + uc_compiler_compile_expression(compiler); + else + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting 'let', 'const', 'function', 'default' or '{'"); + + if (off == locals->count) { + name = ucv_string_new("(module default export)"); + slot = uc_compiler_declare_local(compiler, name, true); + ucv_put(name); + + if (slot != -1) + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Duplicate default export statement"); + else + uc_compiler_export_add(compiler, NULL, compiler->locals.count - 1); + } + else { + for (; off < locals->count; off++) + uc_compiler_export_add(compiler, locals->entries[off].name, off); + } + + uc_compiler_parse_consume(compiler, TK_SCOL); +} + +static uc_program_t * +uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, uc_program_t *prog, char **errp); + +static bool +uc_compiler_compile_module_source(uc_compiler_t *compiler, uc_source_t *source, uc_value_t *imports, char **errp) +{ + uc_parse_config_t config = { + .raw_mode = true, + .strict_declarations = true, + .module_search_path = compiler->parser->lex.config->module_search_path + }; + + size_t i, load_idx = 0, n_imports = 0; + bool loaded = false; + uc_value_t *import; + ssize_t slot; + + uc_program_function_foreach(compiler->program, fn) { + if (uc_program_function_source(fn) == source) { + loaded = true; + break; + } + } + + if (!loaded) { + load_idx = uc_program_function_id(compiler->program, + uc_program_function_last(compiler->program)) + 1; + + if (!uc_compile_from_source(&config, source, compiler->program, errp)) + return false; + + /* emit load, call & pop instructions */ + uc_compiler_emit_insn(compiler, compiler->parser->prev.pos, I_CLFN); + uc_compiler_emit_u32(compiler, 0, load_idx); + + uc_compiler_emit_insn(compiler, 0, I_CALL); + uc_compiler_emit_u32(compiler, 0, 0); + + uc_compiler_emit_insn(compiler, 0, I_POP); + } + + /* count imports, handle wildcard imports */ + for (i = 0; i < ucv_array_length(imports); i++) { + if (ucv_boolean_get(ucv_array_get(imports, i))) { + /* find index of first module export */ + slot = uc_program_export_lookup(compiler->program, source, source->exports.entries[0]); + + if (slot > 0xffff || source->exports.count > 0xffff) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Too many module exports"); + } + + /* emit import instruction... */ + uc_compiler_emit_insn(compiler, 0, I_IMPORT); + uc_compiler_emit_u32(compiler, 0, source->exports.count | (0xffff << 16)); + + /* ... followed by first module export offset ... */ + uc_compiler_emit_u16(compiler, 0, slot); + + /* ... and constant indexes for all exported names */ + for (load_idx = 0; load_idx < source->exports.count; load_idx++) { + if (source->exports.entries[load_idx]) + import = ucv_get(source->exports.entries[load_idx]); + else + import = ucv_string_new("default"); + + uc_compiler_emit_constant_index(compiler, 0, import); + ucv_put(import); + } + + } + else { + n_imports++; + } + } + + /* 0xffff is reserved for wildcard import */ + if (n_imports > 0xfffe) + uc_compiler_syntax_error(compiler, 0, "Too many imports"); + + /* emit non-wilcard import instructions */ + for (i = 0; i < ucv_array_length(imports); i++) { + import = ucv_array_get(imports, i); + + if (!ucv_boolean_get(import)) { + slot = uc_program_export_lookup(compiler->program, source, import); + + if (slot == -1) { + if (import) + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Module %s does not export '%s'", source->filename, ucv_string_get(import)); + else + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Module %s has no default export", source->filename); + } + else if (slot > 0xffff) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Too many module exports"); + } + else { + uc_compiler_emit_insn(compiler, 0, I_IMPORT); + uc_compiler_emit_u32(compiler, 0, slot | ((compiler->upvals.count - n_imports + i) << 16)); + } + } + } + + return true; +} + +static char * +uc_compiler_canonicalize_path(const char *path, const char *basedir) +{ + char *p, *resolved; + + if (*path == '/') + xasprintf(&p, "%s", path); + else if (basedir) + xasprintf(&p, "%s/%s", basedir, path); + else + xasprintf(&p, "./%s", path); + + resolved = realpath(p, NULL); + + free(p); + + return resolved; +} + +static char * +uc_compiler_expand_module_path(const char *name, const char *basedir, const char *template) +{ + int namelen, prefixlen; + char *path, *p; + + p = strchr(template, '*'); + + if (!p) + return NULL; + + prefixlen = p - template; + namelen = strlen(name); + + xasprintf(&path, "%.*s%.*s%s", prefixlen, template, namelen, name, p + 1); + + for (p = path + prefixlen; namelen > 0; namelen--, p++) + if (*p == '.') + *p = '/'; + + p = uc_compiler_canonicalize_path(path, basedir); + + free(path); + + return p; +} + +static char * +uc_compiler_resolve_module_path(uc_compiler_t *compiler, const char *name) +{ + uc_search_path_t *search = &compiler->parser->lex.config->module_search_path; + uc_source_t *source = uc_compiler_current_source(compiler); + char *path = NULL; + size_t i; + + if (strchr(name, '/')) + return uc_compiler_canonicalize_path(name, source->runpath); + + for (i = 0; i < search->count && !path; i++) + path = uc_compiler_expand_module_path(name, source->runpath, search->entries[i]); + + return path; +} + +static uc_source_t * +uc_compiler_acquire_source(uc_compiler_t *compiler, const char *path) +{ + size_t i; + + for (i = 0; i < compiler->program->sources.count; i++) + if (!strcmp(compiler->program->sources.entries[i]->filename, path)) + return uc_source_get(compiler->program->sources.entries[i]); + + return uc_source_new_file(path); +} + +static bool +uc_compiler_compile_module(uc_compiler_t *compiler, const char *name, uc_value_t *imports) +{ + uc_source_t *source; + char *path, *err; + bool res; + + if (!name) + return false; + + path = uc_compiler_resolve_module_path(compiler, name); + + if (path) { + source = uc_compiler_acquire_source(compiler, path); + + if (source) { + err = NULL; + res = uc_compiler_compile_module_source(compiler, source, imports, &err); + + if (!res) + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unable to compile module '%s':\n%s", source->filename, err); + + free(err); + } + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unable to open module '%s': %s", + path, strerror(errno)); + + res = false; + } + } + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unable to resolve path for module '%s'", name); + + return false; + } + + uc_source_put(source); + free(path); + + return res; +} + +static void +uc_compiler_import_add(uc_compiler_t *compiler, uc_value_t *name) +{ + bool constant; + ssize_t slot; + + slot = uc_compiler_resolve_local(compiler, name, &constant); + + if (slot != -1) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Import name '%s' is already declared as local variable", + ucv_string_get(name)); + + return; + } + + slot = uc_compiler_resolve_upval(compiler, name, &constant); + + if (slot != -1) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Import name '%s' is already used", + ucv_string_get(name)); + + return; + } + + uc_compiler_add_upval(compiler, (2 << 14) + compiler->upvals.count, false, name, true); +} + +static void +uc_compiler_compile_importlist(uc_compiler_t *compiler, uc_value_t *namelist) { + uc_value_t *label, *name; + /* parse export symbols */ + do { + name = NULL; + label = NULL; + + if (uc_compiler_parse_match(compiler, TK_DEFAULT)) { + uc_compiler_parse_consume(compiler, TK_AS); + uc_compiler_parse_consume(compiler, TK_LABEL); + + label = ucv_get(compiler->parser->prev.uv); + } + else if (uc_compiler_parse_match(compiler, TK_STRING)) { + name = ucv_get(compiler->parser->prev.uv); + + uc_compiler_parse_consume(compiler, TK_AS); + uc_compiler_parse_consume(compiler, TK_LABEL); + + label = ucv_get(compiler->parser->prev.uv); + } + else if (uc_compiler_parse_match(compiler, TK_LABEL)) { + name = ucv_get(compiler->parser->prev.uv); + + if (uc_compiler_parse_match(compiler, TK_AS)) { + uc_compiler_parse_consume(compiler, TK_LABEL); + + label = ucv_get(compiler->parser->prev.uv); + } + else { + label = ucv_get(name); + } + } + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting Label, String or 'default'"); + } + + uc_compiler_import_add(compiler, label); + ucv_array_push(namelist, name); + ucv_put(label); + + if (uc_compiler_parse_match(compiler, TK_RBRACE)) + break; + } + while (uc_compiler_parse_match(compiler, TK_COMMA)); +} + +static void +uc_compiler_compile_import(uc_compiler_t *compiler) +{ + uc_value_t *namelist = ucv_array_new(NULL); + + if (compiler->scope_depth) { + uc_compiler_syntax_error(compiler, compiler->parser->prev.pos, + "Imports may only appear at top level"); + + return; + } + + /* import { ... } from */ + if (uc_compiler_parse_match(compiler, TK_LBRACE)) { + uc_compiler_compile_importlist(compiler, namelist); + uc_compiler_parse_consume(compiler, TK_FROM); + } + + /* import * as name from */ + else if (uc_compiler_parse_match(compiler, TK_MUL)) { + uc_compiler_parse_consume(compiler, TK_AS); + uc_compiler_parse_consume(compiler, TK_LABEL); + + uc_compiler_declare_local(compiler, compiler->parser->prev.uv, true); + uc_compiler_initialize_local(compiler); + ucv_array_push(namelist, ucv_boolean_new(true)); + + uc_compiler_parse_consume(compiler, TK_FROM); + } + + /* import defaultExport [, ... ] from */ + else if (uc_compiler_parse_match(compiler, TK_LABEL)) { + uc_compiler_import_add(compiler, compiler->parser->prev.uv); + ucv_array_push(namelist, NULL); + + /* import defaultExport, ... from */ + if (uc_compiler_parse_match(compiler, TK_COMMA)) { + /* import defaultExport, { ... } from */ + if (uc_compiler_parse_match(compiler, TK_LBRACE)) { + uc_compiler_compile_importlist(compiler, namelist); + } + + /* import defaultExport, * as name from */ + else if (uc_compiler_parse_match(compiler, TK_MUL)) { + uc_compiler_parse_consume(compiler, TK_AS); + uc_compiler_parse_consume(compiler, TK_LABEL); + + uc_compiler_declare_local(compiler, compiler->parser->prev.uv, true); + uc_compiler_initialize_local(compiler); + ucv_array_push(namelist, ucv_boolean_new(true)); + } + + /* error */ + else { + uc_compiler_syntax_error(compiler, compiler->parser->curr.pos, + "Unexpected token\nExpecting '{' or '*'"); + } + } + + uc_compiler_parse_consume(compiler, TK_FROM); + } + + uc_compiler_parse_consume(compiler, TK_STRING); + + uc_compiler_compile_module(compiler, ucv_string_get(compiler->parser->prev.uv), namelist); + + uc_compiler_parse_consume(compiler, TK_SCOL); + + ucv_put(namelist); +} + +static void +uc_compiler_compile_declaration(uc_compiler_t *compiler) +{ if (uc_compiler_parse_match(compiler, TK_LOCAL)) uc_compiler_compile_local(compiler); else if (uc_compiler_parse_match(compiler, TK_CONST)) uc_compiler_compile_const(compiler); + else if (uc_compiler_parse_match(compiler, TK_EXPORT)) + uc_compiler_compile_export(compiler); + else if (uc_compiler_parse_match(compiler, TK_IMPORT)) + uc_compiler_compile_import(compiler); else uc_compiler_compile_statement(compiler); @@ -2942,7 +3456,7 @@ uc_compiler_compile_declaration(uc_compiler_t *compiler) static uc_program_t * -uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, char **errp) +uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, uc_program_t *prog, char **errp) { #ifdef NO_COMPILE if (errp) @@ -2953,13 +3467,21 @@ uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, char **er uc_exprstack_t expr = { .token = TK_EOF }; uc_parser_t parser = { .config = config }; uc_compiler_t compiler = { .parser = &parser, .exprstack = &expr }; - uc_program_t *prog; + uc_program_t *progptr; uc_function_t *fn; + const char *name; - prog = uc_program_new(); + if (!prog) { + progptr = uc_program_new(); + name = "main"; + } + else { + progptr = prog; + name = "module"; + } uc_lexer_init(&parser.lex, config, source); - uc_compiler_init(&compiler, "main", source, 0, prog, + uc_compiler_init(&compiler, name, source, 0, progptr, config && config->strict_declarations); uc_compiler_parse_advance(&compiler); @@ -2980,12 +3502,13 @@ uc_compile_from_source(uc_parse_config_t *config, uc_source_t *source, char **er uc_lexer_free(&parser.lex); if (!fn) { - ucv_put(&prog->header); + if (progptr != prog) + ucv_put(&progptr->header); return NULL; } - return prog; + return progptr; #endif } @@ -3011,9 +3534,12 @@ uc_compile(uc_parse_config_t *config, uc_source_t *source, char **errp) { uc_program_t *prog = NULL; + if (!config) + config = &uc_default_parse_config; + switch (uc_source_type_test(source)) { case UC_SOURCE_TYPE_PLAIN: - prog = uc_compile_from_source(config, source, errp); + prog = uc_compile_from_source(config, source, NULL, errp); break; case UC_SOURCE_TYPE_PRECOMPILED: diff --git a/include/ucode/program.h b/include/ucode/program.h index 4fb4a9b2..c350fea3 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -46,6 +46,8 @@ uc_program_put(uc_program_t *prog) { fn = fn##_tmp, \ fn##_tmp = (uc_function_t *)fn##_tmp->progref.prev) +#define uc_program_function_last(prog) (uc_function_t *)prog->functions.next + uc_function_t *uc_program_function_new(uc_program_t *, const char *, uc_source_t *, size_t); size_t uc_program_function_id(uc_program_t *, uc_function_t *); uc_function_t *uc_program_function_load(uc_program_t *, size_t); diff --git a/tests/custom/04_modules/01_export_variable_declaration b/tests/custom/04_modules/01_export_variable_declaration new file mode 100644 index 00000000..19a1c11e --- /dev/null +++ b/tests/custom/04_modules/01_export_variable_declaration @@ -0,0 +1,29 @@ +Variable declarations can be prepended with `export` to automatically +export each variable using the same name as the variable itself. + +Updates to the variable after the export are reflected properly in +the including scope. + +-- File test-var-decl.uc -- +export let a, b, c; +export let d = 4, e = 5, f = 6; +export const g = 7, h = 8, i = 9; + +a = 1; +b = 2; +c = 3; +-- End -- + +-- Testcase -- +import { a, b, c, d, e, f, g, h, i } from "./files/test-var-decl.uc"; + +print([ a, b, c, d, e, f, g, h, i ], "\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 1, 2, 3, 4, 5, 6, 7, 8, 9 ] +-- End -- diff --git a/tests/custom/04_modules/02_export_function_declaration b/tests/custom/04_modules/02_export_function_declaration new file mode 100644 index 00000000..4067da97 --- /dev/null +++ b/tests/custom/04_modules/02_export_function_declaration @@ -0,0 +1,22 @@ +A named function declaration can be prepended with `export` to +automatically export the function. + +-- File test-func-decl.uc -- +export function func() { + print("Hello, world!\n"); +}; +-- End -- + +-- Testcase -- +import { func } from "./files/test-func-decl.uc"; + +func(); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +Hello, world! +-- End -- diff --git a/tests/custom/04_modules/03_export_list b/tests/custom/04_modules/03_export_list new file mode 100644 index 00000000..8f93f087 --- /dev/null +++ b/tests/custom/04_modules/03_export_list @@ -0,0 +1,27 @@ +Already declared local variables and functions may be exported using the +curly brace export list syntax. + +-- File test-var-decl.uc -- +let testvar = 123; +const testconst = "Test"; + +function testfunc() { + print("Hello, world!\n"); +} + +export { testvar, testconst, testfunc }; +-- End -- + +-- Testcase -- +import { testvar, testconst, testfunc } from "./files/test-var-decl.uc"; + +print([ testvar, testconst, testfunc ], "\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 123, "Test", "function testfunc() { ... }" ] +-- End -- diff --git a/tests/custom/04_modules/04_export_rename b/tests/custom/04_modules/04_export_rename new file mode 100644 index 00000000..49057fd6 --- /dev/null +++ b/tests/custom/04_modules/04_export_rename @@ -0,0 +1,28 @@ +By using the `as` keyword, exports may be renamed when using the export +list syntax. It is also possible to specify string aliases which are not +valid variable names, in this case a rename on import is mandatory. + +-- File test.uc -- +let testvar = 123; +const testconst = "Test"; + +function testfunc() { + print("Hello, world!\n"); +} + +export { testvar as modvar, testconst as 'define', testfunc as "module-function" }; +-- End -- + +-- Testcase -- +import { modvar, define, "module-function" as func } from "./files/test.uc"; + +print([ modvar, define, func ], "\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 123, "Test", "function testfunc() { ... }" ] +-- End -- diff --git a/tests/custom/04_modules/05_export_default b/tests/custom/04_modules/05_export_default new file mode 100644 index 00000000..a4c8a437 --- /dev/null +++ b/tests/custom/04_modules/05_export_default @@ -0,0 +1,38 @@ +The `export default` statement can be used to declare a default export +value for a module. The value for `export default` can be an arbitrary +expression, it must not refer to a local variable. + +When using the export list syntax, the alias "default" can be used to +designate the default export. + +-- File test-default-expr.uc -- +export default 7 * 21; +-- End -- + +-- File test-default-func.uc -- +export default function() { + return "Hello, world!"; +}; +-- End -- + +-- File test-default-alias.uc -- +let a = 1, b = 2, c = 3; + +export { a, b as default, c }; +-- End -- + +-- Testcase -- +import def1 from "./files/test-default-expr.uc"; +import def2 from "./files/test-default-func.uc"; +import def3 from "./files/test-default-alias.uc"; + +print([ def1, def2(), def3 ], "\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 147, "Hello, world!", 2 ] +-- End -- diff --git a/tests/custom/04_modules/06_export_errors b/tests/custom/04_modules/06_export_errors new file mode 100644 index 00000000..c02a5475 --- /dev/null +++ b/tests/custom/04_modules/06_export_errors @@ -0,0 +1,89 @@ +Export statements are only allowed at the toplevel of a module. + +-- Testcase -- +export let x = 1; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Exports may only appear at top level of a module + + `export let x = 1;` + ^-- Near here + + +-- End -- + + +Export statements are not allowed within functions or nested blocks. + +-- Testcase -- +import "./files/test.uc"; +-- End -- + +-- File test.uc -- +{ + export let x = 1; +} +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Unable to compile module './files/test.uc': +Syntax error: Exports may only appear at top level of a module +In line 2, byte 2: + + ` export let x = 1;` + ^-- Near here + + + +In line 1, byte 25: + + `import "./files/test.uc";` + Near here --------------^ + + +-- End -- + + +Duplicate export names should result in an error. + +-- Testcase -- +import "./files/test-duplicate.uc"; +-- End -- + +-- File test-duplicate.uc -- +let x = 1, y = 2; + +export { x }; +export { y as x }; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Unable to compile module './files/test-duplicate.uc': +Syntax error: Duplicate export 'x' for module './files/test-duplicate.uc' +In line 4, byte 15: + + `export { y as x };` + Near here ----^ + + + +In line 1, byte 35: + + `import "./files/test-duplicate.uc";` + Near here ------------------------^ + + +-- End -- diff --git a/tests/custom/04_modules/07_import_default b/tests/custom/04_modules/07_import_default new file mode 100644 index 00000000..7190a22a --- /dev/null +++ b/tests/custom/04_modules/07_import_default @@ -0,0 +1,99 @@ +An `import` statement with a sole label will import the modules default +export and bind it to a local variable named after the label. + +-- Testcase -- +import defVal from "./files/test1.uc"; + +print(defVal, "\n"); +-- End -- + +-- File test1.uc -- +export default "This is the default export"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +This is the default export +-- End -- + + +Attemping to import a default export from a module without default +export will raise an error. + +-- Testcase -- +import defVal from "./files/test2.uc"; + +print(defVal, "\n"); +-- End -- + +-- File test2.uc -- +export const x = "This is a non-default export"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Module ./files/test2.uc has no default export +In line 1, byte 20: + + `import defVal from "./files/test2.uc";` + Near here ---------^ + + +-- End -- + + +In import statements usign the list syntax, the `default` keyword can be +used to refer to default exports. + +-- Testcase -- +import { default as defVal } from "./files/test3.uc"; + +print(defVal, "\n"); +-- End -- + +-- File test3.uc -- +export default "This is the default export"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +This is the default export +-- End -- + + +When using the default keyword within the list syntax, the `as` keyword is +mandatory to assign a non-reserved keyword as name. + +-- Testcase -- +import { default } from "./files/test4.uc"; + +print(defVal, "\n"); +-- End -- + +-- File test4.uc -- +export default "This is the default export"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Unexpected token +Expecting 'as' +In line 1, byte 18: + + `import { default } from "./files/test4.uc";` + Near here -------^ + + +-- End -- diff --git a/tests/custom/04_modules/08_import_list b/tests/custom/04_modules/08_import_list new file mode 100644 index 00000000..1a4f116e --- /dev/null +++ b/tests/custom/04_modules/08_import_list @@ -0,0 +1,105 @@ +An `import` statement followed by a curly brace enclosed list of names +will import the corresponding exports from the module. + +-- Testcase -- +import { a, b, c } from "./files/test1.uc"; + +print([ a, b, c ], "\n"); +-- End -- + +-- File test1.uc -- +export const a = 1, b = 2, c = 3; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 1, 2, 3 ] +-- End -- + + +Attemping to import a not exported name will raise an error. + +-- Testcase -- +import y from "./files/test2.uc"; + +print(y, "\n"); +-- End -- + +-- File test2.uc -- +export const x = "This is a test"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Module ./files/test2.uc has no default export +In line 1, byte 15: + + `import y from "./files/test2.uc";` + Near here ----^ + + +-- End -- + + +Imports may be renamed to assign an alternative local name to the +exported module symbols. Renaming is also required for string export +names which are no valid variable identifiers. + +-- Testcase -- +import { a as var1, bool as var2, "my function" as var3 } from "./files/test3.uc"; + +print([ var1, var2, var3 ], "\n"); +-- End -- + +-- File test3.uc -- +const a = "A string"; + +let b = 123; + +function c() { + return "A function" +} + +export { + a, + b as bool, + c as "my function" +}; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ "A string", 123, "function c() { ... }" ] +-- End -- + + +A list expression may follow a default import expression in an `import` +statment. + +-- Testcase -- +import defVal, { a as x, b as y, c as z } from "./files/test4.uc"; + +print([defVal, x, y, z], "\n"); +-- End -- + +-- File test4.uc -- +export const a = 1, b = 2, c = 3; +export default a + b + c; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 6, 1, 2, 3 ] +-- End -- diff --git a/tests/custom/04_modules/09_import_wildcard b/tests/custom/04_modules/09_import_wildcard new file mode 100644 index 00000000..aa3dc820 --- /dev/null +++ b/tests/custom/04_modules/09_import_wildcard @@ -0,0 +1,73 @@ +By specifying `*` instead of a label or an import list after an `import` +keyword, all of the modules exports are aggregated into an object whose +keys and values refer to the exported names and their corresponding +values respectively. + +-- Testcase -- +import * as mod from "./files/test1.uc"; + +print(mod, "\n"); +-- End -- + +-- File test1.uc -- +export const a = 1, b = 2, c = 3; +export default a + b + c; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +{ "a": 1, "b": 2, "c": 3, "default": 6 } +-- End -- + + +When using the wildcard import syntax, assigning a name using the `as` +expression is mandatory. + +-- Testcase -- +import * from "./files/test2.uc"; +-- End -- + +-- File test2.uc -- +export const x = "This is a test"; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Unexpected token +Expecting 'as' +In line 1, byte 10: + + `import * from "./files/test2.uc";` + ^-- Near here + + +-- End -- + + +A wildcard expression may follow a default import expression in an `import` +statment. + +-- Testcase -- +import defVal, * as mod from "./files/test3.uc"; + +print([defVal, mod], "\n"); +-- End -- + +-- File test3.uc -- +export const a = 1, b = 2, c = 3; +export default a + b + c; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +[ 6, { "a": 1, "b": 2, "c": 3, "default": 6 } ] +-- End -- diff --git a/tests/custom/04_modules/10_import_none b/tests/custom/04_modules/10_import_none new file mode 100644 index 00000000..be301069 --- /dev/null +++ b/tests/custom/04_modules/10_import_none @@ -0,0 +1,18 @@ +An `import` statement may omit a default name, wildcard expression or name +lsit entirely to execute a module code solely for its side effects. + +-- Testcase -- +import "./files/test.uc"; +-- End -- + +-- File test.uc -- +print("This is the test module running\n"); +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +This is the test module running +-- End -- diff --git a/tests/custom/04_modules/11_import_many_exec_once b/tests/custom/04_modules/11_import_many_exec_once new file mode 100644 index 00000000..f469c7fe --- /dev/null +++ b/tests/custom/04_modules/11_import_many_exec_once @@ -0,0 +1,28 @@ +When multiple imports refer to the same module, the module will only be +executed once. The equivalence of module paths is tested after canonicalizing +the requested path. + +-- Testcase -- +import { counter as counter1 } from "./files/test/example.uc"; +import { counter as counter2 } from "files/test/example.uc"; +import { counter as counter3 } from "test.example"; + +print([ counter1, counter2, counter3 ], "\n"); +-- End -- + +-- File test/example.uc -- +print("This is the test module running\n"); + +export let counter = 0; + +counter++; +-- End -- + +-- Args -- +-R -L ./files +-- End -- + +-- Expect stdout -- +This is the test module running +[ 1, 1, 1 ] +-- End -- diff --git a/tests/custom/04_modules/12_import_immutability b/tests/custom/04_modules/12_import_immutability new file mode 100644 index 00000000..37c0bc6a --- /dev/null +++ b/tests/custom/04_modules/12_import_immutability @@ -0,0 +1,52 @@ +Module imports are read-only bindings to the exported module variables. + +-- Testcase -- +import { a } from "./files/test.uc"; + +a = 2; +-- End -- + +-- File test.uc -- +export let a = 1; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Syntax error: Invalid assignment to constant 'a' +In line 3, byte 5: + + `a = 2;` + ^-- Near here + + +-- End -- + + +Aggregated module objects are read-only as well. + +-- Testcase -- +import * as mod from "./files/test.uc"; + +mod.a = 2; +-- End -- + +-- File test.uc -- +export let a = 1; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stderr -- +Type error: object value is immutable +In line 3, byte 9: + + `mod.a = 2;` + ^-- Near here + + +-- End -- diff --git a/tests/custom/04_modules/13_import_liveness b/tests/custom/04_modules/13_import_liveness new file mode 100644 index 00000000..ca7ff356 --- /dev/null +++ b/tests/custom/04_modules/13_import_liveness @@ -0,0 +1,29 @@ +Imported bindings to exported module variables are live, they'll reflect +every change to the exported variable values. + +-- Testcase -- +import { counter, count } from "./files/test.uc"; + +print(counter, "\n"); +count(); +print(counter, "\n"); +-- End -- + +-- File test.uc -- +let counter = 1; + +function count() { + counter++; +} + +export { counter, count }; +-- End -- + +-- Args -- +-R +-- End -- + +-- Expect stdout -- +1 +2 +-- End -- diff --git a/tests/custom/04_bugs/01_try_catch_stack_mismatch b/tests/custom/99_bugs/01_try_catch_stack_mismatch similarity index 100% rename from tests/custom/04_bugs/01_try_catch_stack_mismatch rename to tests/custom/99_bugs/01_try_catch_stack_mismatch diff --git a/tests/custom/04_bugs/02_array_pop_use_after_free b/tests/custom/99_bugs/02_array_pop_use_after_free similarity index 100% rename from tests/custom/04_bugs/02_array_pop_use_after_free rename to tests/custom/99_bugs/02_array_pop_use_after_free diff --git a/tests/custom/04_bugs/03_switch_fallthrough_miscompilation b/tests/custom/99_bugs/03_switch_fallthrough_miscompilation similarity index 100% rename from tests/custom/04_bugs/03_switch_fallthrough_miscompilation rename to tests/custom/99_bugs/03_switch_fallthrough_miscompilation diff --git a/tests/custom/04_bugs/04_property_set_abort b/tests/custom/99_bugs/04_property_set_abort similarity index 100% rename from tests/custom/04_bugs/04_property_set_abort rename to tests/custom/99_bugs/04_property_set_abort diff --git a/tests/custom/04_bugs/05_duplicate_resource_type b/tests/custom/99_bugs/05_duplicate_resource_type similarity index 100% rename from tests/custom/04_bugs/05_duplicate_resource_type rename to tests/custom/99_bugs/05_duplicate_resource_type diff --git a/tests/custom/04_bugs/06_lexer_escape_at_boundary b/tests/custom/99_bugs/06_lexer_escape_at_boundary similarity index 100% rename from tests/custom/04_bugs/06_lexer_escape_at_boundary rename to tests/custom/99_bugs/06_lexer_escape_at_boundary diff --git a/tests/custom/04_bugs/07_lexer_overlong_lines b/tests/custom/99_bugs/07_lexer_overlong_lines similarity index 100% rename from tests/custom/04_bugs/07_lexer_overlong_lines rename to tests/custom/99_bugs/07_lexer_overlong_lines diff --git a/tests/custom/04_bugs/08_compiler_arrow_fn_expressions b/tests/custom/99_bugs/08_compiler_arrow_fn_expressions similarity index 100% rename from tests/custom/04_bugs/08_compiler_arrow_fn_expressions rename to tests/custom/99_bugs/08_compiler_arrow_fn_expressions diff --git a/tests/custom/04_bugs/09_reject_invalid_array_indexes b/tests/custom/99_bugs/09_reject_invalid_array_indexes similarity index 100% rename from tests/custom/04_bugs/09_reject_invalid_array_indexes rename to tests/custom/99_bugs/09_reject_invalid_array_indexes diff --git a/tests/custom/04_bugs/10_break_stack_mismatch b/tests/custom/99_bugs/10_break_stack_mismatch similarity index 100% rename from tests/custom/04_bugs/10_break_stack_mismatch rename to tests/custom/99_bugs/10_break_stack_mismatch diff --git a/tests/custom/04_bugs/11_switch_stack_mismatch b/tests/custom/99_bugs/11_switch_stack_mismatch similarity index 100% rename from tests/custom/04_bugs/11_switch_stack_mismatch rename to tests/custom/99_bugs/11_switch_stack_mismatch diff --git a/tests/custom/04_bugs/12_altblock_stack_mismatch b/tests/custom/99_bugs/12_altblock_stack_mismatch similarity index 100% rename from tests/custom/04_bugs/12_altblock_stack_mismatch rename to tests/custom/99_bugs/12_altblock_stack_mismatch diff --git a/tests/custom/04_bugs/13_split_by_string_leading_trailing b/tests/custom/99_bugs/13_split_by_string_leading_trailing similarity index 100% rename from tests/custom/04_bugs/13_split_by_string_leading_trailing rename to tests/custom/99_bugs/13_split_by_string_leading_trailing diff --git a/tests/custom/04_bugs/14_incomplete_expression_at_eof b/tests/custom/99_bugs/14_incomplete_expression_at_eof similarity index 100% rename from tests/custom/04_bugs/14_incomplete_expression_at_eof rename to tests/custom/99_bugs/14_incomplete_expression_at_eof diff --git a/tests/custom/04_bugs/15_segfault_on_prefix_increment b/tests/custom/99_bugs/15_segfault_on_prefix_increment similarity index 100% rename from tests/custom/04_bugs/15_segfault_on_prefix_increment rename to tests/custom/99_bugs/15_segfault_on_prefix_increment diff --git a/tests/custom/04_bugs/16_hang_on_regexp_at_eof b/tests/custom/99_bugs/16_hang_on_regexp_at_eof similarity index 100% rename from tests/custom/04_bugs/16_hang_on_regexp_at_eof rename to tests/custom/99_bugs/16_hang_on_regexp_at_eof diff --git a/tests/custom/04_bugs/17_hang_on_unclosed_expression_block b/tests/custom/99_bugs/17_hang_on_unclosed_expression_block similarity index 100% rename from tests/custom/04_bugs/17_hang_on_unclosed_expression_block rename to tests/custom/99_bugs/17_hang_on_unclosed_expression_block diff --git a/tests/custom/04_bugs/18_hang_on_line_comments_at_eof b/tests/custom/99_bugs/18_hang_on_line_comments_at_eof similarity index 100% rename from tests/custom/04_bugs/18_hang_on_line_comments_at_eof rename to tests/custom/99_bugs/18_hang_on_line_comments_at_eof diff --git a/tests/custom/04_bugs/19_truncated_format_string b/tests/custom/99_bugs/19_truncated_format_string similarity index 100% rename from tests/custom/04_bugs/19_truncated_format_string rename to tests/custom/99_bugs/19_truncated_format_string diff --git a/tests/custom/04_bugs/20_use_strict_stack_mismatch b/tests/custom/99_bugs/20_use_strict_stack_mismatch similarity index 100% rename from tests/custom/04_bugs/20_use_strict_stack_mismatch rename to tests/custom/99_bugs/20_use_strict_stack_mismatch diff --git a/tests/custom/04_bugs/21_compiler_parenthesized_prop_keyword b/tests/custom/99_bugs/21_compiler_parenthesized_prop_keyword similarity index 100% rename from tests/custom/04_bugs/21_compiler_parenthesized_prop_keyword rename to tests/custom/99_bugs/21_compiler_parenthesized_prop_keyword diff --git a/tests/custom/04_bugs/22_compiler_break_continue_scoping b/tests/custom/99_bugs/22_compiler_break_continue_scoping similarity index 100% rename from tests/custom/04_bugs/22_compiler_break_continue_scoping rename to tests/custom/99_bugs/22_compiler_break_continue_scoping diff --git a/tests/custom/04_bugs/23_compiler_parenthesized_division b/tests/custom/99_bugs/23_compiler_parenthesized_division similarity index 100% rename from tests/custom/04_bugs/23_compiler_parenthesized_division rename to tests/custom/99_bugs/23_compiler_parenthesized_division diff --git a/tests/custom/04_bugs/24_compiler_local_for_loop_declaration b/tests/custom/99_bugs/24_compiler_local_for_loop_declaration similarity index 100% rename from tests/custom/04_bugs/24_compiler_local_for_loop_declaration rename to tests/custom/99_bugs/24_compiler_local_for_loop_declaration diff --git a/tests/custom/04_bugs/25_lexer_shifted_offsets b/tests/custom/99_bugs/25_lexer_shifted_offsets similarity index 100% rename from tests/custom/04_bugs/25_lexer_shifted_offsets rename to tests/custom/99_bugs/25_lexer_shifted_offsets diff --git a/tests/custom/04_bugs/26_compiler_jmp_to_zero b/tests/custom/99_bugs/26_compiler_jmp_to_zero similarity index 100% rename from tests/custom/04_bugs/26_compiler_jmp_to_zero rename to tests/custom/99_bugs/26_compiler_jmp_to_zero diff --git a/tests/custom/04_bugs/27_invalid_sparse_array_set b/tests/custom/99_bugs/27_invalid_sparse_array_set similarity index 100% rename from tests/custom/04_bugs/27_invalid_sparse_array_set rename to tests/custom/99_bugs/27_invalid_sparse_array_set diff --git a/tests/custom/04_bugs/28_null_equality b/tests/custom/99_bugs/28_null_equality similarity index 100% rename from tests/custom/04_bugs/28_null_equality rename to tests/custom/99_bugs/28_null_equality diff --git a/tests/custom/04_bugs/29_empty_string_as_number b/tests/custom/99_bugs/29_empty_string_as_number similarity index 100% rename from tests/custom/04_bugs/29_empty_string_as_number rename to tests/custom/99_bugs/29_empty_string_as_number diff --git a/tests/custom/04_bugs/30_nan_strict_equality b/tests/custom/99_bugs/30_nan_strict_equality similarity index 100% rename from tests/custom/04_bugs/30_nan_strict_equality rename to tests/custom/99_bugs/30_nan_strict_equality diff --git a/tests/custom/04_bugs/31_vallist_8bit_shortstrings b/tests/custom/99_bugs/31_vallist_8bit_shortstrings similarity index 100% rename from tests/custom/04_bugs/31_vallist_8bit_shortstrings rename to tests/custom/99_bugs/31_vallist_8bit_shortstrings diff --git a/tests/custom/04_bugs/32_compiler_switch_patchlist_corruption b/tests/custom/99_bugs/32_compiler_switch_patchlist_corruption similarity index 100% rename from tests/custom/04_bugs/32_compiler_switch_patchlist_corruption rename to tests/custom/99_bugs/32_compiler_switch_patchlist_corruption diff --git a/tests/custom/04_bugs/33_vm_computed_prop_decl_crash b/tests/custom/99_bugs/33_vm_computed_prop_decl_crash similarity index 100% rename from tests/custom/04_bugs/33_vm_computed_prop_decl_crash rename to tests/custom/99_bugs/33_vm_computed_prop_decl_crash diff --git a/tests/custom/04_bugs/34_dirname_off_by_one b/tests/custom/99_bugs/34_dirname_off_by_one similarity index 100% rename from tests/custom/04_bugs/34_dirname_off_by_one rename to tests/custom/99_bugs/34_dirname_off_by_one diff --git a/tests/custom/04_bugs/35_vm_callframe_double_free b/tests/custom/99_bugs/35_vm_callframe_double_free similarity index 100% rename from tests/custom/04_bugs/35_vm_callframe_double_free rename to tests/custom/99_bugs/35_vm_callframe_double_free diff --git a/tests/custom/04_bugs/36_vm_nested_call_return b/tests/custom/99_bugs/36_vm_nested_call_return similarity index 100% rename from tests/custom/04_bugs/36_vm_nested_call_return rename to tests/custom/99_bugs/36_vm_nested_call_return diff --git a/tests/custom/04_bugs/37_compiler_unexpected_unary_op b/tests/custom/99_bugs/37_compiler_unexpected_unary_op similarity index 100% rename from tests/custom/04_bugs/37_compiler_unexpected_unary_op rename to tests/custom/99_bugs/37_compiler_unexpected_unary_op diff --git a/tests/custom/04_bugs/38_index_segfault b/tests/custom/99_bugs/38_index_segfault similarity index 100% rename from tests/custom/04_bugs/38_index_segfault rename to tests/custom/99_bugs/38_index_segfault diff --git a/tests/custom/04_bugs/39_compiler_switch_continue_mismatch b/tests/custom/99_bugs/39_compiler_switch_continue_mismatch similarity index 100% rename from tests/custom/04_bugs/39_compiler_switch_continue_mismatch rename to tests/custom/99_bugs/39_compiler_switch_continue_mismatch diff --git a/tests/custom/04_bugs/40_lexer_bug_on_lstrip_off b/tests/custom/99_bugs/40_lexer_bug_on_lstrip_off similarity index 100% rename from tests/custom/04_bugs/40_lexer_bug_on_lstrip_off rename to tests/custom/99_bugs/40_lexer_bug_on_lstrip_off From 156d584e4d0af46c39234ee68a98a16ab4cbe225 Mon Sep 17 00:00:00 2001 From: Jo-Philipp Wich Date: Thu, 28 Jul 2022 14:09:57 +0200 Subject: [PATCH 21/21] treewide: unexport libucode internal functions Trim down the libucode.so size somewhat by marking purely internal, non-public API functions hidden. Signed-off-by: Jo-Philipp Wich --- include/ucode/chunk.h | 14 +++++++------- include/ucode/lexer.h | 13 ++++++------- include/ucode/lib.h | 4 ++-- include/ucode/program.h | 18 +++++++++--------- include/ucode/source.h | 14 +++++++------- include/ucode/types.h | 11 +++++------ include/ucode/util.h | 5 +++++ include/ucode/vallist.h | 14 +++++++------- 8 files changed, 48 insertions(+), 45 deletions(-) diff --git a/include/ucode/chunk.h b/include/ucode/chunk.h index 78d5ec6e..a5f0b1cf 100644 --- a/include/ucode/chunk.h +++ b/include/ucode/chunk.h @@ -24,14 +24,14 @@ #include "util.h" #include "types.h" -void uc_chunk_init(uc_chunk_t *chunk); -void uc_chunk_free(uc_chunk_t *chunk); -size_t uc_chunk_add(uc_chunk_t *chunk, uint8_t byte, size_t line); +__hidden void uc_chunk_init(uc_chunk_t *chunk); +__hidden void uc_chunk_free(uc_chunk_t *chunk); +__hidden size_t uc_chunk_add(uc_chunk_t *chunk, uint8_t byte, size_t line); -void uc_chunk_pop(uc_chunk_t *chunk); +__hidden void uc_chunk_pop(uc_chunk_t *chunk); -size_t uc_chunk_debug_get_srcpos(uc_chunk_t *chunk, size_t off); -void uc_chunk_debug_add_variable(uc_chunk_t *chunk, size_t from, size_t to, size_t slot, bool upval, uc_value_t *name); -uc_value_t *uc_chunk_debug_get_variable(uc_chunk_t *chunk, size_t off, size_t slot, bool upval); +__hidden size_t uc_chunk_debug_get_srcpos(uc_chunk_t *chunk, size_t off); +__hidden void uc_chunk_debug_add_variable(uc_chunk_t *chunk, size_t from, size_t to, size_t slot, bool upval, uc_value_t *name); +__hidden uc_value_t *uc_chunk_debug_get_variable(uc_chunk_t *chunk, size_t off, size_t slot, bool upval); #endif /* UCODE_CHUNK_H */ diff --git a/include/ucode/lexer.h b/include/ucode/lexer.h index e3aba8ed..c013aac6 100644 --- a/include/ucode/lexer.h +++ b/include/ucode/lexer.h @@ -176,16 +176,15 @@ typedef struct { } uc_lexer_t; -void uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source); -void uc_lexer_free(uc_lexer_t *lex); +__hidden void uc_lexer_init(uc_lexer_t *lex, uc_parse_config_t *config, uc_source_t *source); +__hidden void uc_lexer_free(uc_lexer_t *lex); -uc_token_t *uc_lexer_next_token(uc_lexer_t *lex); +__hidden uc_token_t *uc_lexer_next_token(uc_lexer_t *lex); -bool uc_lexer_is_keyword(uc_value_t *label); +__hidden bool uc_lexer_is_keyword(uc_value_t *label); -bool utf8enc(char **out, int *rem, int code); +__hidden bool utf8enc(char **out, int *rem, int code); -const char * -uc_tokenname(unsigned type); +__hidden const char *uc_tokenname(unsigned type); #endif /* UCODE_LEXER_H */ diff --git a/include/ucode/lib.h b/include/ucode/lib.h index a80844db..4c7a3b0c 100644 --- a/include/ucode/lib.h +++ b/include/ucode/lib.h @@ -31,8 +31,8 @@ extern const uc_function_list_t uc_stdlib_functions[]; void uc_stdlib_load(uc_value_t *scope); uc_cfn_ptr_t uc_stdlib_function(const char *name); -bool uc_source_context_format(uc_stringbuf_t *buf, uc_source_t *src, size_t off, bool compact); -bool uc_error_context_format(uc_stringbuf_t *buf, uc_source_t *src, uc_value_t *stacktrace, size_t off); +__hidden bool uc_source_context_format(uc_stringbuf_t *buf, uc_source_t *src, size_t off, bool compact); +__hidden bool uc_error_context_format(uc_stringbuf_t *buf, uc_source_t *src, uc_value_t *stacktrace, size_t off); /* vm helper */ diff --git a/include/ucode/program.h b/include/ucode/program.h index c350fea3..9014ae43 100644 --- a/include/ucode/program.h +++ b/include/ucode/program.h @@ -48,17 +48,17 @@ uc_program_put(uc_program_t *prog) { #define uc_program_function_last(prog) (uc_function_t *)prog->functions.next -uc_function_t *uc_program_function_new(uc_program_t *, const char *, uc_source_t *, size_t); -size_t uc_program_function_id(uc_program_t *, uc_function_t *); -uc_function_t *uc_program_function_load(uc_program_t *, size_t); -uc_source_t *uc_program_function_source(uc_function_t *); -size_t uc_program_function_srcpos(uc_function_t *, size_t); -void uc_program_function_free(uc_function_t *); +__hidden uc_function_t *uc_program_function_new(uc_program_t *, const char *, uc_source_t *, size_t); +__hidden size_t uc_program_function_id(uc_program_t *, uc_function_t *); +__hidden uc_function_t *uc_program_function_load(uc_program_t *, size_t); +__hidden uc_source_t *uc_program_function_source(uc_function_t *); +__hidden size_t uc_program_function_srcpos(uc_function_t *, size_t); +__hidden void uc_program_function_free(uc_function_t *); -ssize_t uc_program_export_lookup(uc_program_t *, uc_source_t *, uc_value_t *); +__hidden ssize_t uc_program_export_lookup(uc_program_t *, uc_source_t *, uc_value_t *); -uc_value_t *uc_program_get_constant(uc_program_t *, size_t); -ssize_t uc_program_add_constant(uc_program_t *, uc_value_t *); +__hidden uc_value_t *uc_program_get_constant(uc_program_t *, size_t); +__hidden ssize_t uc_program_add_constant(uc_program_t *, uc_value_t *); void uc_program_write(uc_program_t *, FILE *, bool); uc_program_t *uc_program_load(uc_source_t *, char **); diff --git a/include/ucode/source.h b/include/ucode/source.h index b3eaa343..e1fd211f 100644 --- a/include/ucode/source.h +++ b/include/ucode/source.h @@ -35,7 +35,7 @@ typedef enum { uc_source_t *uc_source_new_file(const char *path); uc_source_t *uc_source_new_buffer(const char *name, char *buf, size_t len); -size_t uc_source_get_line(uc_source_t *source, size_t *offset); +__hidden size_t uc_source_get_line(uc_source_t *source, size_t *offset); static inline uc_source_t * uc_source_get(uc_source_t *source) { @@ -47,14 +47,14 @@ uc_source_put(uc_source_t *source) { ucv_put(source ? &source->header : NULL); } -uc_source_type_t uc_source_type_test(uc_source_t *source); +__hidden uc_source_type_t uc_source_type_test(uc_source_t *source); -void uc_source_line_next(uc_source_t *source); -void uc_source_line_update(uc_source_t *source, size_t off); +__hidden void uc_source_line_next(uc_source_t *source); +__hidden void uc_source_line_update(uc_source_t *source, size_t off); -void uc_source_runpath_set(uc_source_t *source, const char *runpath); +__hidden void uc_source_runpath_set(uc_source_t *source, const char *runpath); -bool uc_source_export_add(uc_source_t *source, uc_value_t *name); -ssize_t uc_source_export_lookup(uc_source_t *source, uc_value_t *name); +__hidden bool uc_source_export_add(uc_source_t *source, uc_value_t *name); +__hidden ssize_t uc_source_export_lookup(uc_source_t *source, uc_value_t *name); #endif /* UCODE_SOURCE_H */ diff --git a/include/ucode/types.h b/include/ucode/types.h index c32829f2..0b63501d 100644 --- a/include/ucode/types.h +++ b/include/ucode/types.h @@ -310,13 +310,12 @@ struct uc_vm { /* Value API */ -void ucv_free(uc_value_t *, bool); -void ucv_put(uc_value_t *); - -void ucv_unref(uc_weakref_t *); -void ucv_ref(uc_weakref_t *, uc_weakref_t *); +__hidden void ucv_free(uc_value_t *, bool); +__hidden void ucv_unref(uc_weakref_t *); +__hidden void ucv_ref(uc_weakref_t *, uc_weakref_t *); uc_value_t *ucv_get(uc_value_t *uv); +void ucv_put(uc_value_t *); uc_type_t ucv_type(uc_value_t *); const char *ucv_typename(uc_value_t *); @@ -547,6 +546,6 @@ ucv_clear_mark(uc_value_t *uv) void ucv_gc(uc_vm_t *); -void ucv_freeall(uc_vm_t *); +__hidden void ucv_freeall(uc_vm_t *); #endif /* UCODE_TYPES_H */ diff --git a/include/ucode/util.h b/include/ucode/util.h index 093951e2..52303cc3 100644 --- a/include/ucode/util.h +++ b/include/ucode/util.h @@ -26,6 +26,11 @@ #include +#ifndef __hidden +#define __hidden __attribute__((visibility("hidden"))) +#endif + + /* alignment & array size */ #ifndef ALIGN diff --git a/include/ucode/vallist.h b/include/ucode/vallist.h index 3dc57201..78b9a3ee 100644 --- a/include/ucode/vallist.h +++ b/include/ucode/vallist.h @@ -33,17 +33,17 @@ typedef enum { TAG_LSTR = 5 } uc_value_type_t; -uc_value_t *uc_number_parse(const char *buf, char **end); -uc_value_t *uc_number_parse_octal(const char *buf, char **end); +__hidden uc_value_t *uc_number_parse(const char *buf, char **end); +__hidden uc_value_t *uc_number_parse_octal(const char *buf, char **end); bool uc_double_pack(double d, char *buf, bool little_endian); double uc_double_unpack(const char *buf, bool little_endian); -void uc_vallist_init(uc_value_list_t *list); -void uc_vallist_free(uc_value_list_t *list); +__hidden void uc_vallist_init(uc_value_list_t *list); +__hidden void uc_vallist_free(uc_value_list_t *list); -ssize_t uc_vallist_add(uc_value_list_t *list, uc_value_t *value); -uc_value_type_t uc_vallist_type(uc_value_list_t *list, size_t idx); -uc_value_t *uc_vallist_get(uc_value_list_t *list, size_t idx); +__hidden ssize_t uc_vallist_add(uc_value_list_t *list, uc_value_t *value); +__hidden uc_value_type_t uc_vallist_type(uc_value_list_t *list, size_t idx); +__hidden uc_value_t *uc_vallist_get(uc_value_list_t *list, size_t idx); #endif /* UCODE_VALUE_H */