diff --git a/Makefile b/Makefile index 6b67f54d2..ccd3468a3 100644 --- a/Makefile +++ b/Makefile @@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ dis_mips64.lua dis_mips64el.lua \ dis_mips64r6.lua dis_mips64r6el.lua \ + dis_riscv.lua dis_riscv64.lua \ vmdef.lua ifeq (,$(findstring Windows,$(OS))) diff --git a/dynasm/dasm_riscv.h b/dynasm/dasm_riscv.h new file mode 100644 index 000000000..b2739fdbb --- /dev/null +++ b/dynasm/dasm_riscv.h @@ -0,0 +1,435 @@ +/* +** DynASM RISC-V encoding engine. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +** +** Contributed by gns from PLCT Lab, ISCAS. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "riscv" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals. */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl; + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imms(int n) +{ + return (n >= -2048 && n < 2048) ? n : 4096; +} +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 20); + if (action >= DASM__MAX || (ins & 0xf)) { + ofs += 4; + } else { + ins >>= 4; + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); +#endif + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMS: +#ifdef DASM_CHECKS + CK(dasm_imms(n) != 4096, RANGE_I); +#endif + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 20); + if (ins & 0xf) continue; else ins >>= 4; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMS: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + if (ins & 0xf) { *cp++ = ins; continue; } + unsigned int action = (ins >> 20); + unsigned int val = (ins >> 4); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (val & 2047), 1); + goto patchrel; + case DASM_ALIGN: + val &= 255; while ((((char *)cp - base) & val)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; + patchrel: + if (val & 2048) { /* B */ + CK((n & 1) == 0 && ((n + 0x1000) >> 13) == 0, RANGE_REL); + cp[-1] |= ((n << 19) & 0x80000000) | ((n << 20) & 0x7e000000) + | ((n << 7) & 0x00000f00) | ((n >> 4) & 0x00000080); + } else { /* J */ + CK((n & 1) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL); + cp[-1] |= ((n << 11) & 0x80000000) | ((n << 20) & 0x7fe00000) + | ((n << 9) & 0x00100000) | (n & 0x000ff000); + } + break; + case DASM_LABEL_LG: + val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((val>>5)&31))-1)) << (val&31); + break; + case DASM_IMMS: + cp[-1] |= (((n << 20) & 0xfe000000) | ((n << 7) & 0x00000f80)); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); + return D->status; +} +#endif + diff --git a/dynasm/dasm_riscv.lua b/dynasm/dasm_riscv.lua new file mode 100644 index 000000000..4c8518f16 --- /dev/null +++ b/dynasm/dasm_riscv.lua @@ -0,0 +1,979 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +-- +-- Contributed by gns from PLCT Lab, ISCAS. +------------------------------------------------------------------------------ + +local riscv32 = riscv32 +local riscv64 = riscv64 + +-- Module information: +local _info = { + arch = riscv32 and "riscv32" or riscv64 and "riscv64", + description = "DynASM RISC-V module", + version = "1.5.0", + vernum = 10500, + release = "2022-07-12", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +local function __orderedIndexGen(t) + local orderedIndex = {} + for key in pairs(t) do + table.insert(orderedIndex, key) + end + table.sort( orderedIndex ) + return orderedIndex +end + +local function __orderedNext(t, state) + local key = nil + if state == nil then + t.__orderedIndex = __orderedIndexGen(t) + key = t.__orderedIndex[1] + else + local j = 0 + for _,_ in pairs(t.__orderedIndex) do j = j + 1 end + for i = 1, j do + if t.__orderedIndex[i] == state then + key = t.__orderedIndex[i+1] + end + end + end + + if key then + return key, t[key] + end + + t.__orderedIndex = nil + return +end + +local function opairs(t) + return __orderedNext, t, nil +end + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMS", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x100000 + (val or 0) * 16) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if band(n, 0xf) == 0 then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= -0x80000000 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { + ra = "x1", sp = "x2", +} -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "x1" then return "ra" + elseif s == "x2" then return "sp" end + return s +end + +------------------------------------------------------------------------------ + +-- Template strings for RISC-V instructions. +local map_op = {} + +local map_op_rv32imafd = { + + -- RV32I + lui_2 = "00000037DU", + auipc_2 = "00000017DA", + + jal_2 = "0000006fDJ", + jalr_3 = "00000067DRJ", + -- pseudo-instrs + j_1 = "0000006fJ", + jal_1 = "000000efJ", + jr_1 = "00000067R", + jalr_1 = "000000e7R", + jalr_2 = "000000e7RJ", + + beq_3 = "00000063RrB", + bne_3 = "00001063RrB", + blt_3 = "00004063RrB", + bge_3 = "00005063RrB", + bltu_3 = "00006063RrB", + bgeu_3 = "00007063RrB", + -- pseudo-instrs + bnez_2 = "00001063RB", + beqz_2 = "00000063RB", + blez_2 = "00005063rB", + bgez_2 = "00005063RB", + bltz_2 = "00004063RB", + bgtz_2 = "00004063rB", + bgt_3 = "00004063rRB", + ble_3 = "00005063rRB", + bgtu_3 = "00006063rRB", + bleu_3 = "00007063rRB", + + lb_2 = "00000003DL", + lh_2 = "00001003DL", + lw_2 = "00002003DL", + lbu_2 = "00004003DL", + lhu_2 = "00005003DL", + + sb_2 = "00000023rS", + sh_2 = "00001023rS", + sw_2 = "00002023rS", + + addi_3 = "00000013DRI", + slti_3 = "00002013DRI", + sltiu_3 = "00003013DRI", + xori_3 = "00004013DRI", + ori_3 = "00006013DRI", + andi_3 = "00007013DRI", + slli_3 = "00001013DRi", + srli_3 = "00005013DRi", + srai_3 = "40005013DRi", + -- pseudo-instrs + seqz_2 = "00103013DR", + ["zext.b_2"] = "0ff07013DR", + + add_3 = "00000033DRr", + sub_3 = "40000033DRr", + sll_3 = "00001033DRr", + slt_3 = "00002033DRr", + sltu_3 = "00003033DRr", + xor_3 = "00004033DRr", + srl_3 = "00005033DRr", + sra_3 = "40005033DRr", + or_3 = "00006033DRr", + and_3 = "00007033DRr", + -- pseudo-instrs + snez_2 = "00003033Dr", + sltz_2 = "00002033DR", + sgtz_2 = "00002033Dr", + + ecall_0 = "00000073", + ebreak_0 = "00100073", + + nop_0 = "00000013", + li_2 = "00000013DI", + mv_2 = "00000013DR", + not_2 = "fff04013DR", + neg_2 = "40000033Dr", + ret_0 = "00008067", + + -- RV32M + mul_3 = "02000033DRr", + mulh_3 = "02001033DRr", + mulhsu_3 = "02002033DRr", + mulhu_3 = "02003033DRr", + div_3 = "02004033DRr", + divu_3 = "02005033DRr", + rem_3 = "02006033DRr", + remu_3 = "02007033DRr", + + -- RV32A + ["lr.w_2"] = "c0000053FR", + ["sc.w_2"] = "c0001053FRr", + ["amoswap.w_3"] = "c0002053FRr", + ["amoadd.w_3"] = "c0003053FRr", + ["amoxor.w_3"] = "c0004053FRr", + ["amoor.w_3"] = "c0005053FRr", + ["amoand.w_3"] = "c0006053FRr", + ["amomin.w_3"] = "c0007053FRr", + ["amomax.w_3"] = "c0008053FRr", + ["amominu.w_3"] = "c0009053FRr", + ["amomaxu.w_3"] = "c000a053FRr", + + -- RV32F + ["flw_2"] = "00002007FL", + ["fsw_2"] = "00002027gS", + + ["fmadd.s_4"] = "00000043FGgH", + ["fmsub.s_4"] = "00000047FGgH", + ["fnmsub.s_4"] = "0000004bFGgH", + ["fnmadd.s_4"] = "0000004fFGgH", + ["fmadd.s_5"] = "00000043FGgHM", + ["fmsub.s_5"] = "00000047FGgHM", + ["fnmsub.s_5"] = "0000004bFGgHM", + ["fnmadd.s_5"] = "0000004fFGgHM", + + ["fadd.s_3"] = "00000053FGg", + ["fsub.s_3"] = "08000053FGg", + ["fmul.s_3"] = "10000053FGg", + ["fdiv.s_3"] = "18000053FGg", + ["fsqrt.s_2"] = "58000053FG", + ["fadd.s_4"] = "00000053FGgM", + ["fsub.s_4"] = "08000053FGgM", + ["fmul.s_4"] = "10000053FGgM", + ["fdiv.s_4"] = "18000053FGgM", + ["fsqrt.s_3"] = "58000053FGM", + + ["fsgnj.s_3"] = "20000053FGg", + ["fsgnjn.s_3"] = "20001053FGg", + ["fsgnjx.s_3"] = "20002053FGg", + + ["fmin.s_3"] = "28000053FGg", + ["fmax.s_3"] = "28001053FGg", + + ["fcvt.w.s_2"] = "c0000053DG", + ["fcvt.wu.s_2"] = "c0100053DG", + ["fcvt.w.s_3"] = "c0000053DGM", + ["fcvt.wu.s_3"] = "c0100053DGM", + ["fmv.x.w_2"] = "e0000053DG", + + ["feq.s_3"] = "a0002053DGg", + ["flt.s_3"] = "a0001053DGg", + ["fle.s_3"] = "a0000053DGg", + + ["fclass.s_2"] = "e0001053DG", + + ["fcvt.s.w_2"] = "d0000053FR", + ["fcvt.s.wu_2"] = "d0100053FR", + ["fcvt.s.w_3"] = "d0000053FRM", + ["fcvt.s.wu_3"] = "d0100053FRM", + ["fmv.w.x_2"] = "f0000053FR", + + -- RV32D + ["fld_2"] = "00003007FL", + ["fsd_2"] = "00003027gS", + + ["fmadd.d_4"] = "02000043FGgH", + ["fmsub.d_4"] = "02000047FGgH", + ["fnmsub.d_4"] = "0200004bFGgH", + ["fnmadd.d_4"] = "0200004fFGgH", + ["fmadd.d_5"] = "02000043FGgHM", + ["fmsub.d_5"] = "02000047FGgHM", + ["fnmsub.d_5"] = "0200004bFGgHM", + ["fnmadd.d_5"] = "0200004fFGgHM", + + ["fadd.d_3"] = "02000053FGg", + ["fsub.d_3"] = "0a000053FGg", + ["fmul.d_3"] = "12000053FGg", + ["fdiv.d_3"] = "1a000053FGg", + ["fsqrt.d_2"] = "5a000053FG", + ["fadd.d_4"] = "02000053FGgM", + ["fsub.d_4"] = "0a000053FGgM", + ["fmul.d_4"] = "12000053FGgM", + ["fdiv.d_4"] = "1a000053FGgM", + ["fsqrt.d_3"] = "5a000053FGM", + + ["fsgnj.d_3"] = "22000053FGg", + ["fsgnjn.d_3"] = "22001053FGg", + ["fsgnjx.d_3"] = "22002053FGg", + ["fmin.d_3"] = "2a000053FGg", + ["fmax.d_3"] = "2a001053FGg", + ["fcvt.s.d_2"] = "40100053FG", + ["fcvt.d.s_2"] = "42000053FG", + ["feq.d_3"] = "a2002053DGg", + ["flt.d_3"] = "a2001053DGg", + ["fle.d_3"] = "a2000053DGg", + ["fclass.d_2"] = "e2001053DG", + ["fcvt.w.d_2"] = "c2000053DG", + ["fcvt.wu.d_2"] = "c2100053DG", + ["fcvt.d.w_2"] = "d2000053FR", + ["fcvt.d.wu_2"] = "d2100053FR", + ["fcvt.w.d_3"] = "c2000053DGM", + ["fcvt.wu.d_3"] = "c2100053DGM", + ["fcvt.d.w_3"] = "d2000053FRM", + ["fcvt.d.wu_3"] = "d2100053FRM", + + ["fmv.d_2"] = "22000053FY", + ["fneg.d_2"] = "22001053FY", + ["fabs.d_2"] = "22002053FY", + +} + +local map_op_rv64imafd = { + + -- RV64I + lwu_2 = "00006003DL", + ld_2 = "00003003DL", + + sd_2 = "00003023rS", + + slli_3 = "00001013DRj", + srli_3 = "00005013DRj", + srai_3 = "40005013DRj", + + addiw_3 = "0000001bDRI", + slliw_3 = "0000101bDRi", + srliw_3 = "0000501bDRi", + sraiw_3 = "4000501bDRi", + + addw_3 = "0000003bDRr", + subw_3 = "4000003bDRr", + sllw_3 = "0000103bDRr", + srlw_3 = "0000503bDRr", + sraw_3 = "4000503bDRr", + + negw_2 = "4000003bDr", + ["sext.w_2"] = "0000001bDR", + + -- RV64M + mulw_3 = "0200003bDRr", + divw_3 = "0200403bDRr", + divuw_3 = "0200503bDRr", + remw_3 = "0200603bDRr", + remuw_3 = "0200703bDRr", + + -- RV64A + ["lr.d_2"] = "c2000053FR", + ["sc.d_2"] = "c2001053FRr", + ["amoswap.d_3"] = "c2002053FRr", + ["amoadd.d_3"] = "c2003053FRr", + ["amoxor.d_3"] = "c2004053FRr", + ["amoor.d_3"] = "c2005053FRr", + ["amoand.d_3"] = "c2006053FRr", + ["amomin.d_3"] = "c2007053FRr", + ["amomax.d_3"] = "c2008053FRr", + ["amominu.d_3"] = "c2009053FRr", + ["amomaxu.d_3"] = "c200a053FRr", + + -- RV64F + ["fcvt.l.s_2"] = "c0200053DG", + ["fcvt.lu.s_2"] = "c0300053DG", + ["fcvt.l.s_3"] = "c0200053DGM", + ["fcvt.lu.s_3"] = "c0300053DGM", + ["fcvt.s.l_2"] = "d0200053FR", + ["fcvt.s.lu_2"] = "d0300053FR", + ["fcvt.s.l_3"] = "d0200053FRM", + ["fcvt.s.lu_3"] = "d0300053FRM", + + -- RV64D + ["fcvt.l.d_2"] = "c2200053DG", + ["fcvt.lu.d_2"] = "c2300053DG", + ["fcvt.l.d_3"] = "c2200053DGM", + ["fcvt.lu.d_3"] = "c2300053DGM", + ["fmv.x.d_2"] = "e2000053DG", + ["fcvt.d.l_2"] = "d2200053FR", + ["fcvt.d.lu_2"] = "d2300053FR", + ["fcvt.d.l_3"] = "d2200053FRM", + ["fcvt.d.lu_3"] = "d2300053FRM", + ["fmv.d.x_2"] = "f2000053FR", + +} + +local map_op_zicsr = { + csrrw_3 = "00001073DCR", + csrrs_3 = "00002073DCR", + csrrc_3 = "00003073DCR", + csrrwi_3 = "00005073DCu", + csrrsi_3 = "00006073DCu", + csrrci_3 = "00007073DCu", + + -- pseudo-ops + csrrw_2 = "00001073DC", + csrrs_2 = "00002073CR", + csrrc_2 = "00003073CR", + csrrwi_2 = "00005073Cu", + csrrsi_2 = "00006073Cu", + csrrci_2 = "00007073Cu", + + rdinstret_1 = "C0202073D", + rdcycle_1 = "C0002073D", + rdtime_1 = "C0102073D", + rdinstreth_1 = "C8202073D", + rdcycleh_1 = "C8002073D", + rdtimeh_1 = "C8102073D", + + frcsr_1 = "00302073D", + fscsr_2 = "00301073DR", + fscsr_1 = "00301073R", + frrm_1 = "00202073D", + fsrm_2 = "00201073DR", + fsrm_1 = "00201073R", + fsrmi_2 = "00205073Du", + fsrmi_1 = "00205073u", + frflags_1 = "00102073D", + fsflags_2 = "00101073DR", + fsflagsi_2 = "00105073Du", + fsflagsi_1 = "00105073u", +} + +local map_op_zifencei = { + ["fence.i_3"] = "0000100fDRI", +} + +local list_map_op_rv32 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_zifencei, ['c'] = map_op_zicsr } +local list_map_op_rv64 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_rv64imafd, ['c'] = map_op_zifencei, ['d'] = map_op_zicsr } + +if riscv32 then for _, map in opairs(list_map_op_rv32) do + for k, v in pairs(map) do map_op[k] = v end + end +end +if riscv64 then for _, map in opairs(list_map_op_rv64) do + for k, v in pairs(map) do map_op[k] = v end + end +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(x[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^x([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_imm(imm, bits, shift, scale, signed, action) + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[xf]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction(action or "IMM", + (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) + return 0 + end +end + +local function parse_csr(expr) + local r = match(expr, "^([1-4]?[0-9]?[0-9]?[0-9])$") + if r then + r = tonumber(r) + if r <= 4095 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_imms(imm) + local n = tonumber(imm) + if n then + if n >= -2048 and n < 2048 then + local imm5, imm7 = band(n, 0x1f), shr(band(n, 0xfe0), 5) + return shl(imm5, 7) + shl(imm7, 25) + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[xf]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMMS", 0, imm); return 0 + end +end + +local function parse_rm(mode) + local rnd_mode = { + rne = 0, rtz = 1, rdn = 2, rup = 3, rmm = 4, dyn = 7 + } + local n = rnd_mode[mode] + if n then return n + else werror("bad rounding mode `"..mode.."'") end +end + +local function parse_disp(disp, mode) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = shl(parse_gpr(reg), 15) + local extname = match(imm, "^extern%s+(%S+)$") + if extname then + waction("REL_EXT", map_extern[extname], nil, 1) + return r + else + if mode == "load" then + return r + parse_imm(imm, 12, 20, 0, true) + elseif mode == "store" then + return r + parse_imms(imm) + else + werror("bad displacement mode '"..mode.."'") + end + end + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if tp then + if mode == "load" then + waction("IMM", 32768+12*32+20, format(tp.ctypefmt, tailr)) + elseif mode == "store" then + waction("IMMS", 0, format(tp.ctypefmt, tailr)) + else + werror("bad displacement mode '"..mode.."'") + end + return shl(r, 15) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 2 positions (ins/ext). + if secpos+2 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "D" then -- gpr rd + op = op + shl(parse_gpr(params[n]), 7); n = n + 1 + elseif p == "R" then -- gpr rs1 + op = op + shl(parse_gpr(params[n]), 15); n = n + 1 + elseif p == "r" then -- gpr rs2 + op = op + shl(parse_gpr(params[n]), 20); n = n + 1 + elseif p == "F" then -- fpr rd + op = op + shl(parse_fpr(params[n]), 7); n = n + 1 + elseif p == "G" then -- fpr rs1 + op = op + shl(parse_fpr(params[n]), 15); n = n + 1 + elseif p == "g" then -- fpr rs2 + op = op + shl(parse_fpr(params[n]), 20); n = n + 1 + elseif p == "H" then -- fpr rs3 + op = op + shl(parse_fpr(params[n]), 27); n = n + 1 + elseif p == "C" then -- csr + op = op + shl(parse_csr(params[n]), 20); n = n + 1 + elseif p == "M" then -- fpr rounding mode + op = op + shl(parse_rm(params[n]), 12); n = n + 1 + elseif p == "Y" then -- fpr psuedo-op + local r = parse_fpr(params[n]) + op = op + shl(r, 15) + shl(r, 20); n = n + 1 + elseif p == "I" then -- I-type imm12 + op = op + parse_imm(params[n], 12, 20, 0, true); n = n + 1 + elseif p == "i" then -- I-type shamt5 + op = op + parse_imm(params[n], 5, 20, 0, false); n = n + 1 + elseif p == "j" then -- I-type shamt6 + op = op + parse_imm(params[n], 6, 20, 0, false); n = n + 1 + elseif p == "u" then -- I-type uimm + op = op + parse_imm(params[n], 5, 15, 0, false); n = n + 1 + elseif p == "U" then -- U-type imm20 + op = op + parse_imm(params[n], 20, 12, 0, false); n = n + 1 + elseif p == "L" then -- load + op = op + parse_disp(params[n], "load"); n = n + 1 + elseif p == "S" then -- store + op = op + parse_disp(params[n], "store"); n = n + 1 + elseif p == "B" or p == "J" then -- control flow + local mode, m, s = parse_label(params[n], false) + if p == "B" then m = m + 2048 end + waction("REL_"..mode, m, s, 1); n = n + 1 + elseif p == "A" then -- AUIPC + local mode, m, s = parse_label(params[n], false) + waction("REL_"..mode, m, s, 1); n = n + 1 + else + assert(false) + end + end + wputpos(pos, op) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/dynasm/dasm_riscv32.lua b/dynasm/dasm_riscv32.lua new file mode 100644 index 000000000..f194ce1dc --- /dev/null +++ b/dynasm/dasm_riscv32.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V 32 module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 32 bit mode for the combined RISC-V module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +riscv32 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") diff --git a/dynasm/dasm_riscv64.lua b/dynasm/dasm_riscv64.lua new file mode 100644 index 000000000..25274395d --- /dev/null +++ b/dynasm/dasm_riscv64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V 64 module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined RISC-V module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +riscv64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") diff --git a/src/Makefile b/src/Makefile index 44a739530..6471afeac 100644 --- a/src/Makefile +++ b/src/Makefile @@ -52,6 +52,7 @@ CCOPT_arm= CCOPT_arm64= CCOPT_ppc= CCOPT_mips= +CCOPT_riscv64= # #CCDEBUG= # Uncomment the next line to generate debug information: @@ -269,6 +270,9 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) else TARGET_LJARCH= mips endif +else +ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= riscv64 else $(error Unsupported target architecture) endif @@ -278,6 +282,7 @@ endif endif endif endif +endif ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) TARGET_SYS= PS3 @@ -484,6 +489,9 @@ ifeq (ppc,$(TARGET_LJARCH)) DASM_AFLAGS+= -D ELFV2 endif endif +ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D RISCV64 +endif endif endif diff --git a/src/host/buildvm.c b/src/host/buildvm.c index d460b3144..ff4e01e11 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -69,6 +69,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); #include "../dynasm/dasm_mips.h" #elif LJ_TARGET_S390X #include "../dynasm/dasm_s390x.h" +#elif LJ_TARGET_RISCV64 +#include "../dynasm/dasm_riscv.h" #else #error "No support for this architecture (yet)" #endif diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index c0b0594c8..c8d609dd2 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -208,6 +208,34 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, "Error: unsupported opcode %08x for %s symbol relocation.\n", ins, sym); exit(1); +#elif LJ_TARGET_RISCV64 + if ((ins & 0x7f) == 0x17u) { + fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym); + } else if ((ins & 0x7f) == 0x67u) { + fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym); + } else if ((ins & 0x7f) == 0x6fu) { + fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym); + } else if ((ins & 0x7f) == 0x03u) { + uint8_t funct3 = (ins >> 12) & 7; + uint8_t rd = (ins >> 7) & 31, rs1 = (ins >> 15) & 31; + switch (funct3) { + case 0: fprintf(ctx->fp, "\tlb"); break; + case 1: fprintf(ctx->fp, "\tlh"); break; + case 2: fprintf(ctx->fp, "\tlw"); break; + case 3: fprintf(ctx->fp, "\tld"); break; + case 4: fprintf(ctx->fp, "\tlbu"); break; + case 5: fprintf(ctx->fp, "\tlhu"); break; + case 6: fprintf(ctx->fp, "\tlwu"); break; + default: goto rv_reloc_err; + } + fprintf(ctx->fp, " x%d, %s(x%d)\n", rd, sym, rs1); + } else { +rv_reloc_err: + fprintf(stderr, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); + } #else #error "missing relocation support for this architecture" #endif @@ -303,6 +331,9 @@ void emit_asm(BuildCtx *ctx) #endif #if LJ_TARGET_MIPS fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); +#endif +#if LJ_TARGET_RISCV64 + fprintf(ctx->fp, ".option arch, -c\n.option norelax\n"); #endif emit_asm_align(ctx, 4); diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index f0ca5514b..7caea74a1 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -103,6 +103,7 @@ local map_arch = { mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, s390x = { e = "be", b = 64, m = 22, }, + riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, }, } local map_os = { diff --git a/src/jit/dis_riscv.lua b/src/jit/dis_riscv.lua new file mode 100644 index 000000000..8de563a72 --- /dev/null +++ b/src/jit/dis_riscv.lua @@ -0,0 +1,979 @@ +------------------------------------------------------------------------------ +-- LuaJIT RISC-V disassembler module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +-- +-- Contributed by Milos Poletanovic from Syrmia.com. +-- Contributed by gns from PLCT Lab, ISCAS. +------------------------------------------------------------------------------ +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- It disassembles most standard RISC-V instructions. +-- Mode is little-endian +------------------------------------------------------------------------------ + +local type = type +local byte, format = string.byte, string.format +local match, gmatch = string.match, string.gmatch +local concat = table.concat +local bit = require("bit") +local band, bor, tohex = bit.band, bit.bor, bit.tohex +local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift +local jit = require("jit") + +local jstat = { jit.status() } +local function is_opt_enabled(opt) + for _, v in ipairs(jstat) do + if v == opt then + return true + end + end + return false +end +local xthead = is_opt_enabled("XThead") + +------------------------------------------------------------------------------ +-- Opcode maps +------------------------------------------------------------------------------ + +--RVC32 extension + +local map_quad0 = { + shift = 13, mask = 7, + [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn", + false, "c.fsdNMh", "c.swZMn", "c.fswNMn" +} + +local map_sub2quad1 = { + shift = 5, mask = 3, + [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ" +} + +local map_sub1quad1 = { + shift = 10, mask = 3, + [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1 +} + +local map_quad1 = { + shift = 13, mask = 7, + [0] = { + shift = 7, mask = 31, + [0] = "c.nop", _ = "c.addiDx" + }, + [1] = "c.jalT", [2] = "c.liDx", + [3] = { + shift = 7, mask = 31, + [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX", + _ = "c.luiDK" + }, + [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq" +} + +local map_sub1quad2 = { + shift = 12, mask = 1, + [0] = { + shift = 2, mask = 31, + [0] = "c.jrD", _ = "c.mvDE" + }, + [1] = { + shift = 2, mask = 31, + [0] = { + shift = 7, mask = 31, + [0] = "c.ebreak", _ = "c.jalrD" + }, + _ = "c.addDE" + } +} + +local map_quad2 = { + shift = 13, mask = 7, + [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY", + [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu" +} + +local map_compr = { + [0] = map_quad0, map_quad1, map_quad2 +} + +--RV32M +local map_mext = { + shift = 12, mask = 7, + [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr", + "divDRr", "divuDRr", "remDRr", "remuDRr" +} + +--RV64M +local map_mext64 = { + shift = 12, mask = 7, + [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr", + [7] = "remuwDRr" +} + +--RV32F, RV64F, RV32D, RV64D +local map_fload = { + shift = 12, mask = 7, + [2] = "flwFL", [3] = "fldFL" +} + +local map_fstore = { + shift = 12, mask = 7, + [2] = "fswSg", [3] = "fsdSg" +} + +local map_fmadd = { + shift = 25, mask = 3, + [0] = "fmadd.sFGgHo", "fmadd.dFGgHo" +} + +local map_fmsub = { + shift = 25, mask = 3, + [0] = "fmsub.sFGgHo", "fmsub.dFGgHo" +} + +local map_fnmsub = { + shift = 25, mask = 3, + [0] = "fnmsub.sFGgHo", "fnmsub.dFGgHo" +} + +local map_fnmadd = { + shift = 25, mask = 3, + [0] = "fnmadd.sFGgHo", "fnmadd.dFGgHo" +} + +local map_fsgnjs = { + shift = 12, mask = 7, + [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6" +} + +local map_fsgnjd = { + shift = 12, mask = 7, + [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6" +} + +local map_fms = { + shift = 12, mask = 7, + [0] = "fmin.sFGg", "fmax.sFGg", "fminm.sFGg", "fmaxm.sFGg" +} + +local map_fmd = { + shift = 12, mask = 7, + [0] = "fmin.dFGg", "fmax.dFGg", "fminm.dFGg", "fmaxm.dFGg" +} + +local map_fcomps = { + shift = 12, mask = 7, + [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg", + [4] = "fleq.sDGg", "fltq.sDGg" +} + +local map_fcompd = { + shift = 12, mask = 7, + [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg", + [4] = "fleq.dDGg", "fltq.dDGg" +} + +local map_fcvtwls = { + shift = 20, mask = 31, + [0] = "fcvt.w.sDGo", "fcvt.wu.sDGo", "fcvt.l.sDGo", "fcvt.lu.sDGo" +} + +local map_fcvtwld = { + shift = 20, mask = 31, + [0] = "fcvt.w.dDGo", "fcvt.wu.dDGo", "fcvt.l.dDGo", "fcvt.lu.dDGo", + [8] = { + shift = 12, mask = 7, + [1] = "fcvtmodw.dDG" + } +} + +local map_fcvts = { + shift = 20, mask = 31, + [0] = "fcvt.s.wFRo", "fcvt.s.wuFRo", "fcvt.s.lFRo", "fcvt.s.luFRo" +} + +local map_fcvtd = { + shift = 20, mask = 31, + [0] = "fcvt.d.wFRo", "fcvt.d.wuFRo", "fcvt.d.lFRo", "fcvt.d.luFRo" +} + +local map_fcvtsd = { + shift = 20, mask = 31, + [0] = "fcvt.s.dFGo", + [4] = "fround.sFGo", [5] = "froundnx.sFGo" +} + +local map_fcvtds = { + shift = 20, mask = 31, + [0] = "fcvt.d.sFGo", + [4] = "fround.dFGo", [5] = "froundnx.dFGo" +} + +local map_fmvwx = { + shift = 20, mask = 31, + [0] = "fmv.w.xFR", [1] = "fli.sFy" +} + +local map_fmvdx = { + shift = 20, mask = 31, + [0] = "fmv.d.xFR", [1] = "fli.dFy" +} + +local map_fext = { + shift = 25, mask = 127, + [0] = "fadd.sFGgo", [1] = "fadd.dFGgo", [4] = "fsub.sFGgo", [5] = "fsub.dFGgo", + [8] = "fmul.sFGgo", [9] = "fmul.dFGgo", [12] = "fdiv.sFGgo", [13] = "fdiv.dFGgo", + [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd, + [32] = map_fcvtsd, [33] = map_fcvtds,[44] = "fsqrt.sFGo", [45] = "fsqrt.dFGo", + [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld, + [104] = map_fcvts, [105] = map_fcvtd, + [112] = { + shift = 12, mask = 7, + [0] = "fmv.x.wDG", "fclass.sDG" + }, + [113] = { + shift = 12, mask = 7, + [0] = "fmv.x.dDG", "fclass.dDG" + }, + [120] = map_fmvwx, [121] = map_fmvdx +} + +--RV32A, RV64A +local map_aext = { + shift = 27, mask = 31, + [0] = { + shift = 12, mask = 7, + [2] = "amoadd.wDrO", [3] = "amoadd.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "amoswap.wDrO", [3] = "amoswap.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "lr.wDO", [3] = "lr.dDO" + }, + { + shift = 12, mask = 7, + [2] = "sc.wDrO", [3] = "sc.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "amoxor.wDrO", [3] = "amoxor.dDrO" + }, + [8] = { + shift = 12, mask = 7, + [2] = "amoor.wDrO", [3] = "amoor.dDrO" + }, + [12] = { + shift = 12, mask = 7, + [2] = "amoand.wDrO", [3] = "amoand.dDrO" + }, + [16] = { + shift = 12, mask = 7, + [2] = "amomin.wDrO", [3] = "amomin.dDrO" + }, + [20] = { + shift = 12, mask = 7, + [2] = "amomax.wDrO", [3] = "amomax.dDrO" + }, + [24] = { + shift = 12, mask = 7, + [2] = "amominu.wDrO", [3] = "amominu.dDrO" + }, + [28] = { + shift = 12, mask = 7, + [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO" + }, +} + +-- RV32I, RV64I +local map_load = { + shift = 12, mask = 7, + [0] = "lbDL", "lhDL", "lwDL", "ldDL", + "lbuDL", "lhuDL", "lwuDL" +} + +local map_opimm = { + shift = 12, mask = 7, + [0] = { + shift = 7, mask = 0x1ffffff, + [0] = "nop", _ = "addi|li|mvDR0I2" + }, + { + shift = 25, mask = 127, + [48] = { + shift = 20, mask = 31, + [4] = "sext.bDR", [5] = "sext.hDR" + }, + _ = "slliDRi", + }, "sltiDRI", "sltiu|seqzDRI5", + "xori|notDRI4", + { + shift = 26, mask = 63, + [0] = "srliDRi", [16] = "sraiDRi", [24] = "roriDRi", + [26] = { + shift = 20, mask = 63, + [56] = "rev8DR" + } + }, + "oriDRI", "andiDRI" +} + +local map_branch = { + shift = 12, mask = 7, + [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false, + "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB" +} + +local map_store = { + shift = 12, mask = 7, + [0] = "sbSr", "shSr", "swSr", "sdSr" +} + +local map_op = { + shift = 25, mask = 127, + [0] = { + shift = 12, mask = 7, + [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r", + "xorDRr", "srlDRr", "orDRr", "andDRr" + }, + [1] = map_mext, + [4] = { + + }, + [5] = { -- Zbb + shift = 12, mask = 7, + [4] = "minDRr", [5] = "minuDRr", [6] = "maxDRr", [7] = "maxuDRr" + }, + [7] = { -- Zicond + shift = 12, mask = 7, + [5] = "czero.eqzDRr", [7] = "czero.nezDRr" + }, + [16] = { -- Zba + shift = 12, mask = 7, + [2] = "sh1addDRr", [4] = "sh2addDRr", [6] = "sh3addDRr" + }, + [32] = { -- Zbb + shift = 12, mask = 7, + [0] = "sub|negDR0r", [4] = "xnorDRr", [5] = "sraDRr", [6] = "ornDRr", [7] = "andnDRr" + }, + [48] = { -- Zbb + shift = 12, mask = 7, + [1] = "rolDRr", [5] = "rorDRr" + } +} + +--- 64I +local map_opimm32 = { + shift = 12, mask = 7, + [0] = "addiw|sext.wDRI0", "slliwDRi", + [2] = { -- Zba + shift = 25, mask = 127, + [1] = "slli.uwDRi" + }, + [5] = { -- 64I + shift = 25, mask = 127, + [0] = "srliwDRi", [32] = "sraiwDRi", [48] = "roriwDRi" + }, + [48] = { -- Zbb + shift = 25, mask = 127, + [5] = "roriwDRi" + } +} + +local map_op32 = { + shift = 25, mask = 127, + [0] = { -- 64I + shift = 12, mask = 7, + [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr" + }, + [1] = map_mext64, + [4] = { -- Zba & Zbb + shift = 12, mask = 7, + [0] = "add.uw|zext.w|DRr0", [4] = "zext.hDRr" + }, + [16] = { -- Zba + shift = 12, mask = 7, + [2] = "sh1add.uw", [4] = "sh2add.uw", [6] = "sh3add.uw" + }, + [32] = { -- 64I + shift = 12, mask = 7, + [0] = "subw|negwDR0r", [5] = "srawDRr" + }, + [48] = { -- Zbb + shift = 12, mask = 7, + [1] = "rolwDRr", [5] = "rorwDRr" + } +} + +local map_ecabre = { + shift = 12, mask = 7, + [0] = { + shift = 20, mask = 4095, + [0] = "ecall", "ebreak" + } +} + +local map_fence = { + shift = 12, mask = 1, + [0] = "fence", --"fence.i" ZIFENCEI EXTENSION +} + +local map_jalr = { + shift = 7, mask = 0x1ffffff, + _ = "jalr|jrDRI7", [256] = "ret" +} + +local map_xthead_custom0 = { + shift = 12, mask = 7, + [1] = { -- Arithmetic + shift = 27, mask = 31, + [0] = "th.addslDRrv", + [2] = { + shift = 26, mask = 63, + [4] = "th.srriDRi", + [5] = { + shift = 25, mask = 127, + [10] = "th.srriwDRi" + } + }, + [4] = { -- XTheadMac + shift = 25, mask = 3, + [0] = "th.mulaDRr", "th.mulsDRr", "th.mulawDRr", "th.mulswDRr" + }, + [5] = { -- XTheadMac + shift = 25, mask = 3, + [0] = "th.mulahDRr", "th.mulshDRr" + }, + [8] = { -- XTheadCondMov + shift = 25, mask = 3, + [0] = "th.mveqzDRr", "th.mvnezDRr" + }, + [16] = { -- XTheadBb + shift = 20, mask = 31, + [0] = { + shift = 25, mask = 3, + [0] = "th.tstnbzDRi", "th.revDR", "th.ff0DR", "th.ff1DR" + } + }, + [17] = { -- XTheadBb + shift = 26, mask = 1, + [0] = "th.tstDRi" + }, + [18] = { -- XTheadBb + shift = 20, mask = 31, + [0] = { + shift = 25, mask = 3, + [0] = "th.revwDR" + } + } + }, + [2] = "th.extDRji", [3] = "th.extuDRji", + { -- MemLoad + shift = 29, mask = 7, + [7] = { -- XTheadMemPair + shift = 25, mask = 3, + [0] = "th.lwdDrP", [2] = "th.lwudDrP", "th.lddDrP" + } + }, + { -- MemStore + shift = 29, mask = 7, + [7] = { -- XTheadMemPair + shift = 25, mask = 3, + [0] = "th.swdDrP", [3] = "th.sddDrP" + } + } +} + +local map_custom0 = xthead and map_xthead_custom0 or nil + +local map_pri = { + [3] = map_load, [7] = map_fload, [11] = map_custom0, [15] = map_fence, [19] = map_opimm, + [23] = "auipcDA", [27] = map_opimm32, + [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_op, + [55] = "luiDU", [59] = map_op32, [67] = map_fmadd, [71] = map_fmsub, + [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext, + [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre +} + +------------------------------------------------------------------------------ + +local map_gpr = { + [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31", +} + +local map_fgpr = { + [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", +} + +local map_rm = { + [0] = "rne", "rtz", "rdn", "rup", "rmm", [7] = "dyn" +} + +local map_fli = { + [0] = "-1.0", + "min", + "0x1p-16", "0x1p-15", "0x1p-8", "0x1p-7", + "0.0625", "0.125", + "0.25", "0.3125", "0.375", "0.4375", + "0.5", "0.625", "0.75", "0.875", + "1.0", "1.25", "1.5", "1.75", + "2.0", "2.5", "3.0", + "4.0", "8.0", "16.0", "128.0", "256.0", + "32768.0", "65536.0", "inf", "nan" +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local pos = ctx.pos + local extra = "" + if ctx.rel then + local sym = ctx.symtab[ctx.rel] + if sym then extra = "\t->"..sym end + end + if ctx.hexdump > 0 then + ctx.out:write((format("%08x %s %-7s %s%s\n", + ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra))) + else + ctx.out(format("%08x %-7s %s%s\n", + ctx.addr+pos, text, concat(operands, ", "), extra)) + end + local pos = ctx.pos + local first_byte = byte(ctx.code, ctx.pos+1) + --Examine if the next instruction is 16-bits or 32-bits + if(band(first_byte, 3) < 3) then + ctx.pos = pos + 2 + else + ctx.pos = pos + 4 + end +end + +-- Fallback for unknown opcodes. +local function unknown(ctx) + return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) +end + +local function get_le(ctx) + local pos = ctx.pos + --Examine if the next instruction is 16-bits or 32-bits + local first_byte = byte(ctx.code, pos+1) + if(band(first_byte, 3) < 3) then --checking first two bits of opcode + local b0, b1 = byte(ctx.code, pos+1, pos+2) + return bor(lshift(b1, 8), b0) + else + local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) + return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) + end +end + +local function parse_W(opcode) + local part1 = band(rshift(opcode, 7), 15) --9:6 + local part2 = band(rshift(opcode, 11), 3) --5:4 + local part3 = band(rshift(opcode, 5), 1)--3 + local part4 = band(rshift(opcode, 6), 1)--2 + return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4), + lshift(part3, 3), lshift(part4, 2)) +end + +local function parse_x(opcode) + local part1 = band(rshift(opcode, 12), 1) --5 + local part2 = band(rshift(opcode, 2), 31) --4:0 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +local function parse_X(opcode) + local part1 = band(rshift(opcode, 12), 1) --12 + local part2 = band(rshift(opcode, 3), 3) --8:7 + local part3 = band(rshift(opcode, 5), 1) --6 + local part4 = band(rshift(opcode, 2), 1) --5 + local part5 = band(rshift(opcode, 6), 1) --4 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7), + lshift(part3, 6), lshift(part4, 5), lshift(part5, 4)) + else + return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6), + lshift(part4, 5), lshift(part5, 4)) + end +end + +local function parse_S(opcode) + local part1 = band(rshift(opcode, 25), 127) --11:5 + local sign = band(rshift(part1, 6), 1) + local part2 = band(rshift(opcode, 7), 31) --4:0 + if (sign == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +local function parse_B(opcode) + local part1 = band(rshift(opcode, 7), 1) --11 + local part2 = band(rshift(opcode, 25), 63) --10:5 + local part3 = band(rshift(opcode, 8), 15) -- 4 : 1 + if (part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), + lshift(part2, 5), lshift(part3, 1), 0) + else + return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5), + lshift(part3, 1), 0) + end +end + +local function parse_q(opcode) + local part1 = band(rshift(opcode, 12), 1) --8 + local part2 = band(rshift(opcode, 5), 3) --7:6 + local part3 = band(rshift(opcode, 2), 1) --5 + local part4 = band(rshift(opcode, 10), 3) --4:3 + local part5 = band(rshift(opcode, 3), 3) --2:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6), + lshift(part3, 5), lshift(part4, 3), lshift(part5, 1)) + else + return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5), + lshift(part4, 3), lshift(part5, 1)) + end +end + +local function parse_J(opcode) + local part1 = band(rshift(opcode, 31), 1) --20 + local part2 = band(rshift(opcode, 12), 255) -- 19:12 + local part3 = band(rshift(opcode, 20), 1) --11 + local part4 = band(rshift(opcode, 21), 1023) --10:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12), + lshift(part3, 11), lshift(part4, 1)) + else + return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12), + lshift(part3, 11), lshift(part4, 1)) + end +end + +local function parse_T(opcode) + local part1 = band(rshift(opcode, 12), 1) --11 + local part2 = band(rshift(opcode, 8), 1) --10 + local part3 = band(rshift(opcode, 9), 3)--9:8 + local part4 = band(rshift(opcode, 6), 1) --7 + local part5 = band(rshift(opcode, 7), 1) -- 6 + local part6 = band(rshift(opcode, 2), 1) --5 + local part7 = band(rshift(opcode, 11), 1) --4 + local part8 = band(rshift(opcode, 3), 7) --3:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), + lshift(part2, 10), lshift(part3, 8), lshift(part4, 7), + lshift(part5, 6), lshift(part6, 5), lshift(part7, 4), + lshift(part8, 1)) + else + return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10), + lshift(part3, 8), lshift(part4, 7), lshift(part5, 6), + lshift(part6, 5), lshift(part7, 4), lshift(part8, 1)) + end +end + +local function parse_K(opcode) + local part1 = band(rshift(opcode, 12), 1) --5 17 + local part2 = band(rshift(opcode, 2), 31) --4:0 16:12 + if(part1 == 1) then + return bor(lshift(0, 31), lshift(0x7fff, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +-- Disassemble a single instruction. +local function disass_ins(ctx) + local op = ctx:get() + local operands = {} + local last = nil + ctx.op = op + ctx.rel =nil + + local opat = 0 + --for compressed instructions + if(band(op, 3) < 3) then + opat = ctx.map_compr[band(op, 3)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + local test = band(rshift(op, opat.shift), opat.mask) + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + else + opat = ctx.map_pri[band(op,127)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + end + local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") + local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") + local a1, a2 = 0 + if altname then + pat = pat2 + end + + local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0 + + for p in gmatch(pat, ".") do + local x = nil + if p == "D" then + x = map_gpr[band(rshift(op, 7), 31)] + elseif p == "F" then + x = map_fgpr[band(rshift(op, 7), 31)] + elseif p == "R" then + x = map_gpr[band(rshift(op, 15), 31)] + elseif p == "G" then + x = map_fgpr[band(rshift(op, 15), 31)] + elseif p == "r" then + x = map_gpr[band(rshift(op, 20), 31)] + if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then + local temp = last --because of the diffrent order of the characters + operands[#operands] = x + x = temp + end + elseif p == "g" then + x = map_fgpr[band(rshift(op, 20), 31)] + if(name == "fsw" or name == "fsd") then + local temp = last + operands[#operands] = x + x = temp + end + elseif p == "Z" then + x = map_gpr[8 + band(rshift(op, 2), 7)] + elseif p == "N" then + x = map_fgpr[8 + band(rshift(op, 2), 7)] + elseif p == "M" then + x = map_gpr[8 + band(rshift(op, 7), 7)] + elseif p == "E" then + x = map_gpr[band(rshift(op, 2), 31)] + elseif p == "W" then + local uimm = parse_W(op) + x = format("%s,%d", "sp", uimm) + elseif p == "x" then + x = parse_x(op) + elseif p == "h" then + local part1 = band(rshift(op, 5), 3) --7:6 + local part2 = band(rshift(op, 10), 7) --5:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3)) + operands[#operands] = format("%d(%s)", uimm, last) + elseif p == "X" then + local imm = parse_X(op) + x = format("%s,%d", "sp", imm) + elseif p == "O" then + x = format("(%s)", map_gpr[band(rshift(op, 15), 31)]) + elseif p == "H" then + x = map_fgpr[band(rshift(op, 27), 31)] + elseif p == "L" then + local register = map_gpr[band(rshift(op, 15), 31)] + local disp = arshift(op, 20) + x = format("%d(%s)", disp, register) + elseif p == "P" then -- XTheadMemPair + local register = map_gpr[band(rshift(op, 15), 31)] + local disp = band(arshift(op, 25), 3) + local isword = bxor(band(arshift(op, 26), 1), 1) + x = format("(%s), %d, %d", register, disp, isword and 3 or 4) + elseif p == "I" then + x = arshift(op, 20) + --different for jalr + if(name == "jalr") then + local reg = map_gpr[band(rshift(op, 15), 31)] + if(ctx.reltab[reg] == nil) then + operands[#operands] = format("%d(%s)", x, last) + else + local target = ctx.reltab[reg] + x + operands[#operands] = format("%d(%s) #0x%08x", x, last, target) + ctx.rel = target + ctx.reltab[reg] = nil --assume no reuses of the register + end + x = nil --not to add additional operand + end + elseif p == "i" then + --both for RV32I AND RV64I + local value = band(arshift(op, 20), 63) + x = string.format("%d", value) + elseif p == "j" then -- XThead imm1[31..26] + local value = band(rshift(op, 26), 63) + x = string.format("%d", value) + elseif p == "v" then --XThead imm[2][26..25] + local value = band(rshift(op, 25), 3) + x = string.format("%d", value) + elseif p == "S" then + local register = map_gpr[band(rshift(op, 15), 31)] --register + local imm = parse_S(op) + x = format("%d(%s)", imm, register) + elseif p == "n" then + local part1 = band(rshift(op, 5), 1) --6 + local part2 = band(rshift(op, 10), 7) --5:3 + local part3 = band(rshift(op, 6), 1) --2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3), + lshift(part3, 2)) + operands[#operands] = format("%d(%s)", uimm, last) + elseif p == "A" then + local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)] + ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12) + x = format("0x%x", value) + elseif p == "B" then + x = ctx.addr + ctx.pos + parse_B(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "U" then + local value = band(rshift(op, 12), 0xfffff) + x = string.format("0x%x", value) + elseif p == "Q" then + local part1 = band(rshift(op, 2), 7) --8:6 + local part2 = band(rshift(op, 12), 1) --5 + local part3 = band(rshift(op, 5), 3) --4:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 3)) + x = format("%d(%s)", uimm, "sp") + elseif p == "q" then + x = ctx.addr + ctx.pos + parse_q(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "J" then + x = ctx.addr + ctx.pos + parse_J(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "K" then + local value = parse_K(op) + x = string.format("0x%x", value) + elseif p == "Y" then + local part1 = band(rshift(op, 2), 3) --7:6 + local part2 = band(rshift(op, 12), 1) --5 + local part3 = band(rshift(op, 4), 7) --4:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 2)) + x = format("%d(%s)", uimm, "sp") + elseif p == "o" then -- rounding mode + x = map_rm[band(rshift(op, 12), 7)] + elseif p == "y" then -- fli lut + x = map_fli[band(rshift(op, 15), 31)] + elseif p == "1" then + local part1 = band(rshift(op, 12), 1) --5 + local part2 = band(rshift(op, 2), 31) --4:0 + local uimm = bor(lshift(0, 31), lshift(part1, 5), part2) + x = string.format("0x%x", uimm) + elseif p == "T" then + x = ctx.addr + ctx.pos + parse_T(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "t" then + local part1 = band(rshift(op, 7), 7) --8:6 + local part2 = band(rshift(op, 10), 7) --5:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3)) + x = format("%d(%s)", uimm, "sp") + elseif p == "u" then + local part1 = band(rshift(op, 7), 3) --7:6 + local part2 = band(rshift(op, 9), 15) --5:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2)) + x = format("%d(%s)", uimm, "sp") + elseif p == "V" then + x = map_fgpr[band(rshift(op, 2), 31)] + elseif p == "0" then --PSEUDOINSTRUCTIONS + if (last == "zero" or last == 0) then + local n = #operands + operands[n] = nil + last = operands[n-1] + local a1, a2 = match(altname, "([^|]*)|(.*)") + if a1 then name, altname = a1, a2 + else name = altname end + alias_done = true + end + elseif (p == "4") then + if(last == -1) then + name = altname + operands[#operands] = nil + end + elseif (p == "5") then + if(last == 1) then + name = altname + operands[#operands] = nil + end + elseif (p == "6") then + if(last == operands[#operands - 1]) then + name = altname + operands[#operands] = nil + end + elseif (p == "7") then --jalr rs + local value = string.sub(operands[#operands], 1, 1) + local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1) + if(value == "0" and + (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then + if(operands[#operands - 1] == "zero") then + name = altname + end + operands[#operands] = nil + operands[#operands] = reg + end + elseif (p == "2" and alias_done == false) then + if (last == "zero" or last == 0) then + local a1, a2 = match(altname, "([^|]*)|(.*)") + name = a2 + operands[#operands] = nil + end + end + if x then operands[#operands+1] = x; last = x end + end + return putop(ctx, name, operands) +end + +------------------------------------------------------------------------------ + +-- Disassemble a block of code. +local function disass_block(ctx, ofs, len) + if not ofs then + ofs = 0 + end + local stop = len and ofs+len or #ctx.code + --instructions can be both 32 and 16 bits + stop = stop - stop % 2 + ctx.pos = ofs - ofs % 2 + ctx.rel = nil + while ctx.pos < stop do disass_ins(ctx) end +end + +-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). +local function create(code, addr, out) + local ctx = {} + ctx.code = code + ctx.addr = addr or 0 + ctx.out = out or io.write + ctx.symtab = {} + ctx.disass = disass_block + ctx.hexdump = 8 + ctx.get = get_le + ctx.map_pri = map_pri + ctx.map_compr = map_compr + ctx.reltab = {} + return ctx +end + +-- Simple API: disassemble code (a string) at address and output via out. +local function disass(code, addr, out) + create(code, addr, out):disass(addr) +end + +-- Return register name for RID. +local function regname(r) + if r < 32 then return map_gpr[r] end + return "f"..(r-32) +end + +-- Public module functions. +return { + create = create, + disass = disass, + regname = regname +} diff --git a/src/jit/dis_riscv64.lua b/src/jit/dis_riscv64.lua new file mode 100644 index 000000000..fd6ce2768 --- /dev/null +++ b/src/jit/dis_riscv64.lua @@ -0,0 +1,16 @@ +---------------------------------------------------------------------------- +-- LuaJIT RISC-V 64 disassembler wrapper module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the default riscv little-endian functions from the +-- RISC-V disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv") +return { + create = dis_riscv.create, + disass = dis_riscv.disass, + regname = dis_riscv.regname +} \ No newline at end of file diff --git a/src/lib_jit.c b/src/lib_jit.c index d1f0213ae..1f5c81d1d 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -697,6 +697,104 @@ JIT_PARAMDEF(JIT_PARAMINIT) #include #endif +#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX + +#if LJ_TARGET_LINUX +#include + +struct riscv_hwprobe hwprobe_requests[] = { + {RISCV_HWPROBE_KEY_IMA_EXT_0} +}; + +const uint64_t *hwprobe_ext = &hwprobe_requests[0].value; + +int hwprobe_ret = 0; +#endif + +static int riscv_compressed() +{ +#if defined(__riscv_c) || defined(__riscv_compressed) + /* Don't bother checking for RVC -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_IMA_C)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_zba() +{ +#if defined(__riscv_b) || defined(__riscv_zba) + /* Don't bother checking for Zba -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBA)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_zbb() +{ +#if defined(__riscv_b) || defined(__riscv_zbb) + /* Don't bother checking for Zbb -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBB)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_zicond() +{ +#if defined(__riscv_zicond) + /* Don't bother checking for Zicond -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZICOND)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_zfa() +{ +#if defined(__riscv_zfa) + /* Don't bother checking for Zfa -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZFA)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_xthead() +{ +#if (defined(__riscv_xtheadba) \ + && defined(__riscv_xtheadbb) \ + && defined(__riscv_xtheadcondmov) \ + && defined(__riscv_xtheadmac)) + /* Don't bother checking for XThead -- would crash before getting here. */ + return 1; +#else +/* +** Hardcoded as there's no easy way of detection: +** - SIGILL have some trouble with libluajit as we speak +** - Checking mvendorid looks good, but might not be reliable. +*/ + return 0; +#endif +} + +static uint32_t riscv_probe(int (*func)(void), uint32_t flag) +{ + return func() ? flag : 0; +} +#endif + /* Arch-dependent CPU feature detection. */ static uint32_t jit_cpudetect(void) { @@ -769,6 +867,29 @@ static uint32_t jit_cpudetect(void) #endif #elif LJ_TARGET_S390X /* No optional CPU features to detect (for now). */ + +#elif LJ_TARGET_RISCV64 +#if LJ_HASJIT + +#if LJ_TARGET_LINUX + /* HWPROBE-based detection of RVC, Zba, Zbb and Zicond. */ + hwprobe_ret = syscall(__NR_riscv_hwprobe, &hwprobe_requests, + sizeof(hwprobe_requests) / sizeof(struct riscv_hwprobe), 0, + NULL, 0); + + flags |= riscv_probe(riscv_compressed, JIT_F_RVC); + flags |= riscv_probe(riscv_zba, JIT_F_RVZba); + flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); + flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond); + flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa); + flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); + +#endif + + /* Detect V/P? */ + /* V have no hardware available, P not ratified yet. */ +#endif + #else #error "Missing CPU detection for this architecture" #endif diff --git a/src/lj_alloc.c b/src/lj_alloc.c index cb704f7b3..9039d8053 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) #define CALL_MREMAP_NOMOVE 0 #define CALL_MREMAP_MAYMOVE 1 -#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) +#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64) #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE #else #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE diff --git a/src/lj_arch.h b/src/lj_arch.h index 112c23269..b85d29e16 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -33,6 +33,8 @@ #define LUAJIT_ARCH_mips64 7 #define LUAJIT_ARCH_S390X 8 #define LUAJIT_ARCH_s390x 8 +#define LUAJIT_ARCH_RISCV64 9 +#define LUAJIT_ARCH_riscv64 9 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -69,6 +71,8 @@ #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 +#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 +#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 #else #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #endif @@ -470,6 +474,20 @@ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_TARGET_GC64 1 #define LJ_ARCH_NOJIT 1 /* NYI */ +#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64 + +#define LJ_ARCH_NAME "riscv64" +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ +#define LJ_TARGET_RISCV64 1 +#define LJ_TARGET_GC64 1 +#define LJ_TARGET_EHRETREG 10 +#define LJ_TARGET_EHRAREG 1 +#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ + AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ +#define LJ_TARGET_MASKSHIFT 1 +#define LJ_TARGET_MASKROT 1 +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #else #error "No target architecture defined" @@ -554,6 +572,10 @@ #error "Only n64 ABI supported for MIPS64" #undef LJ_TARGET_MIPS #endif +#elif LJ_TARGET_RISCV64 +#if !defined(__riscv_float_abi_double) +#error "Only RISC-V 64 double float supported for now" +#endif #endif #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index 33287af1e..06c813c50 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -227,6 +227,8 @@ static Reg rset_pickrandom(ASMState *as, RegSet rs) #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS #include "lj_emit_mips.h" +#elif LJ_TARGET_RISCV64 +#include "lj_emit_riscv.h" #else #error "Missing instruction emitter for target CPU" #endif @@ -1710,6 +1712,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_mips.h" #elif LJ_TARGET_S390X #include "lj_asm_s390x.h" +#elif LJ_TARGET_RISCV64 +#include "lj_asm_riscv64.h" #else #error "Missing assembler for target CPU" #endif diff --git a/src/lj_asm_riscv64.h b/src/lj_asm_riscv64.h new file mode 100644 index 000000000..2ee63fa10 --- /dev/null +++ b/src/lj_asm_riscv64.h @@ -0,0 +1,2037 @@ +/* +** RISC-V IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by gns from PLCT Lab, ISCAS. +*/ + +/* -- Register allocator extensions --------------------------------------- */ + +/* Allocate a register with a hint. */ +static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; +} + +/* Allocate a register or RID_ZERO. */ +static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) + return RID_ZERO; + r = ra_allocref(as, ref, allow); + } else { + ra_noweak(as, r); + } + return r; +} + +/* Allocate two source registers for three-operand instructions. */ +static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + Reg left = irl->r, right = irr->r; + if (ra_hasreg(left)) { + ra_noweak(as, left); + if (ra_noreg(right)) + right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); + else + ra_noweak(as, right); + } else if (ra_hasreg(right)) { + ra_noweak(as, right); + left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); + } else if (ra_hashint(right)) { + right = ra_alloc1z(as, ir->op2, allow); + left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); + } else { + left = ra_alloc1z(as, ir->op1, allow); + right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); + } + return left | (right << 8); +} + +/* -- Guard handling ------------------------------------------------------ */ + +/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */ +#define RISCV_SPAREJUMP 4 + +/* Setup spare long-range jump (trampoline?) slots per mcarea. */ + +static void asm_sparejump_setup(ASMState *as) +{ + MCode *mxp = as->mctop; + if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { + for (int i = RISCV_SPAREJUMP*2; i--; ) + *--mxp = RISCVI_EBREAK; + as->mctop = mxp; + } +} + +static MCode *asm_sparejump_use(MCode *mcarea, MCode *target) +{ + MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); + int slot = RISCV_SPAREJUMP; + RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr; + while (slot--) { + mxp -= 2; + ptrdiff_t delta = (char *)target - (char *)mxp; + tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)), + tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + if (mxp[0] == tauipc && mxp[1] == tjalr) { + return mxp; + } else if (mxp[0] == tslot) { + mxp[0] = tauipc, mxp[1] = tjalr; + return mxp; + } + } + return NULL; +} + +/* Setup exit stub after the end of each trace. */ +static void asm_exitstub_setup(ASMState *as, ExitNo nexits) +{ + ExitNo i; + MCode *mxp = as->mctop; + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) + asm_mclimit(as); + for (i = nexits-1; (int32_t)i >= 0; i--) + *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i))); + ptrdiff_t delta = (char *)lj_vm_exit_handler - (char *)(mxp-3); + /* 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... */ + *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno); + *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) + | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta)); + *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP) + | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta)); + *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP); + as->mctop = mxp; +} + +static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) +{ + /* Keep this in-sync with exitstub_trace_addr(). */ + return as->mctop + exitno + 4; +} + +/* Emit conditional branch to exit for guard. */ +static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + as->mcp = ++p; + *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p); + riscvi = riscvi^RISCVF_FUNCT3(1); /* Invert cond. */ + target = p - 1; /* Patch target later in asm_loop_fixup. */ + } + ptrdiff_t delta = (char *)target - (char *)(p - 1); + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); + *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); + as->mcp = p; +} + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse the array base of colocated arrays. */ +static int32_t asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) + return (int32_t)sizeof(GCtab); + return 0; +} + +/* Fuse array/hash/upvalue reference into register+offset operand. */ +static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (checki12(ofs)) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } + } + } else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (checki12(ofs)) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; + intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g)); + if (checki12(ofs)) { + *ofsp = (int32_t)ofs; + return RID_GL; + } + } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)offsetof(global_State, tmptv); + return RID_GL; + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref, + RegSet allow, int32_t ofs) +{ + IRIns *ir = IR(ref); + Reg base; + if (ra_noreg(ir->r) && canfuse(as, ir)) { + intptr_t ofs2; + if (ir->o == IR_ADD) { + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), + checki12(ofs2))) { + ref = ir->op1; + ofs = (int32_t)ofs2; + } + } else if (ir->o == IR_STRREF) { + ofs2 = 4096; + lj_assertA(ofs == 0, "bad usage"); + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { + ofs2 = ofs + get_kval(as, ir->op2); + ref = ir->op1; + } else if (irref_isk(ir->op1)) { + ofs2 = ofs + get_kval(as, ir->op1); + ref = ir->op2; + } + if (!checki12(ofs2)) { + /* NYI: Fuse ADD with constant. */ + Reg right, left = ra_alloc2(as, ir, allow); + right = (left >> 8); left &= 255; + emit_lso(as, riscvi, rd, RID_TMP, ofs); + emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right); + return; + } + ofs = ofs2; + } + } + base = ra_alloc1(as, ref, allow); + emit_lso(as, riscvi, rd, base, ofs); +} + +/* Fuse Integer multiply-accumulate. */ + +static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_GPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_GPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_ds1s2(as, riscvi, dest, left, right); + if (dest != add) emit_mv(as, dest, add); + return 1; + } + return 0; +} + +/* Fuse FP multiply-add/sub. */ + +static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if ((as->flags & JIT_F_OPT_FMA) && + lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_ds1s2s3(as, riscvi, dest, left, right, add); + return 1; + } + return 0; +} +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 0; + Reg gpr, fpr = REGARG_FIRSTFPR; + if ((void *)ci->func) + emit_call(as, (void *)ci->func, 1); + for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) + as->cost[gpr] = REGCOST(~0u, ASMREF_L); + gpr = REGARG_FIRSTGPR; + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + IRIns *ir = IR(ref); + if (ref) { + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Must have been evicted. */ + ra_leftov(as, fpr, ref); + fpr++; if(ci->flags & CCI_VARARG) gpr++; + } else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_FPR); + emit_spstore(as, ir, r, ofs); + ofs += 8; + } + } else { + if (gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; if(ci->flags & CCI_VARARG) fpr++; + } else { + Reg r = ra_alloc1z(as, ref, RSET_GPR); + emit_spstore(as, ir, r, ofs); + ofs += 8; + } + } + } + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + RegSet drop = RSET_SCRATCH; + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + if (hiop && ra_hasreg((ir+1)->r)) + rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (irt_isfp(ir->t)) { + if ((ci->flags & CCI_CASTU64)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, + dest, RID_RET); + } else { + ra_destreg(as, ir, RID_FPRET); + } + } else if (hiop) { + ra_destpair(as, ir); + } else { + ra_destreg(as, ir, RID_RET); + } + } +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX*2]; + CCallInfo ci; + IRRef func; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { /* Call to constant address. */ + ci.func = (ASMFunction)(void *)get_kval(as, func); + } else { /* Need specific register for indirect calls. */ + Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); + MCode *p = as->mcp; + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r); + if (r == RID_CFUNCADDR) + *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); + else + *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); + as->mcp = p; + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + +/* -- Returns ------------------------------------------------------------- */ + +/* Return to lower frame. Guard that it goes to the right spot. */ +static void asm_retf(ASMState *as, IRIns *ir) +{ + Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); + void *pc = ir_kptr(IR(ir->op2)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); + as->topslot -= (BCReg)delta; + if ((int32_t)as->topslot < 0) as->topslot = 0; + irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ + emit_setgl(as, base, jit_base); + emit_addptr(as, base, -8*delta); + asm_guard(as, RISCVI_BNE, RID_TMP, + ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); + emit_lso(as, RISCVI_LD, RID_TMP, base, -8); +} + +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp); + emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG); + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); + asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO); + emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left); + emit_ds(as, RISCVI_FCVT_D_W, tmp, dest); + emit_ds(as, RISCVI_FCVT_W_D, dest, left); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_FPR; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, allow); + Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); + Reg tmp = ra_scratch(as, rset_clear(allow, right)); + emit_ds(as, RISCVI_FMV_X_W, dest, tmp); + emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right); +} + +static void asm_conv(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + IRRef lref = ir->op1; + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + /* Use GPR to pass floating-point arguments */ + if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg ftmp = ra_scratch(as, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp); + emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, + ftmp, ra_alloc1(as, lref, RSET_FPR)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : + (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : + (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); + emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp); + emit_ds(as, riscvi, ftmp, left); + } + } else if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, + dest, ra_alloc1(as, lref, RSET_FPR)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : + (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : + (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); + emit_ds(as, riscvi, dest, left); + } + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_GPR); + RISCVIns riscvi = irt_is64(ir->t) ? + (st == IRT_NUM ? + (irt_isi64(ir->t) ? RISCVI_FCVT_L_D : RISCVI_FCVT_LU_D) : + (irt_isi64(ir->t) ? RISCVI_FCVT_L_S : RISCVI_FCVT_LU_S)) : + (st == IRT_NUM ? + (irt_isint(ir->t) ? RISCVI_FCVT_W_D : RISCVI_FCVT_WU_D) : + (irt_isint(ir->t) ? RISCVI_FCVT_W_S : RISCVI_FCVT_WU_S)); + emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left); + } + } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B : + st == IRT_U8 ? RISCVI_ZEXT_B : + st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H; + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + emit_ext(as, riscvi, dest, left); + } else { /* 32/64 bit integer conversions. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irt_is64(ir->t)) { + if (st64) { + /* 64/64 bit no-op (cast)*/ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } else { /* 32 to 64 bit sign extension. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ + emit_ext(as, RISCVI_SEXT_W, dest, left); + } else { /* 32 to 64 bit zero extension. */ + emit_ext(as, RISCVI_ZEXT_W, dest, left); + } + } + } else { + if (st64 && !(ir->op2 & IRCONV_NONE)) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_ext(as, RISCVI_ZEXT_W, dest, left); + } else { /* 32/32 bit no-op (cast). */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } + } + } +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + int32_t ofs = SPOFS_TMP; + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ + ra_evictset(as, drop); + if (ir->s) ofs = sps_scale(ir->s); + asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + /* Store the result to the spill slot or temp slots. */ + Reg tmp = ra_releasetmp(as, ASMREF_TMP1); + emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs); +} + +/* -- Memory references --------------------------------------------------- */ + +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + rset_clear(allow, src); + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + emit_lso(as, RISCVI_SD, RID_TMP, base, ofs); + if (irt_isinteger(ir->t)) { + if (as->flags & JIT_F_RVZba) { + emit_ds1s2(as, RISCVI_ADD_UW, RID_TMP, src, type); + } else { + emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type); + emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src); + } + } else { + emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type); + } + } +} + +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new +{ + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, igcptr(ir_knum(ir)), dest); + return; + } + emit_lso(as, RISCVI_FSD, ra_alloc1(as, ref, RSET_FPR), dest, 0); + } else { + asm_tvstore64(as, dest, 0, ref); + } + } + /* g->tmptv holds the TValue(s). */ + emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv)); +} + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx, base; + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (checki12(ofs)) { + base = ra_alloc1(as, refa, RSET_GPR); + emit_dsi(as, RISCVI_ADDI, dest, base, ofs); + return; + } + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); + emit_sh3add(as, dest, base, idx, RID_TMP); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir, IROp merge) +{ + RegSet allow = RSET_GPR; + int destused = ra_used(ir); + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; + Reg cmp64 = RID_NONE; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); + IRType1 kt = irkey->t; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + rset_clear(allow, tab); + tmp1 = ra_scratch(as, allow); + rset_clear(allow, tmp1); + tmp2 = ra_scratch(as, allow); + rset_clear(allow, tmp2); + + if (irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); + } else { + /* Allocate cmp64 register used for 64-bit comparisons */ + if (!isk && irt_isaddr(kt)) { + cmp64 = tmp2; + } else { + int64_t k; + if (isk && irt_isaddr(kt)) { + k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + k = ~((int64_t)~irt_toitype(kt) << 47); + } + cmp64 = ra_allock(as, k, allow); + rset_clear(allow, cmp64); + } + if (!irt_ispri(kt)) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } + } + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + int is_lend_exit = 0; + as->invmcp = NULL; + if (merge == IR_NE) + asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO); + else if (destused) + emit_loada(as, dest, niltvg(J2G(as->J))); + + /* Follow hash chain until the end. */ + l_loop = --as->mcp; + emit_mv(as, dest, tmp1); + emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next)); + l_next = emit_label(as); + + /* Type and value comparison. */ + if (merge == IR_EQ) { /* Must match asm_guard(). */ + l_end = asm_exitstub_addr(as, as->snapno); + is_lend_exit = 1; + } + if (irt_isnum(kt)) { + emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit); + emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key); + emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, -1); + emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM)); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47); + emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1); + } else { + emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit); + } + emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO) + | RISCVF_IMMB((char *)as->mcp-(char *)l_loop); + if (!isk && irt_isaddr(kt)) { + type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); + emit_ds1s2(as, RISCVI_ADD, tmp2, key, type); + rset_clear(allow, type); + } + + /* Load main position relative to tab->node into dest. */ + khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); + } else { + Reg tmphash = tmp1; + if (isk) + tmphash = ra_allock(as, khash, allow); + /* node = tab->node + (idx*32-idx*8) */ + emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1); + lj_assertA(sizeof(Node) == 24, "bad Node size"); + emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1); + emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3); + emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5); + emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask + emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); + if (isk) { + /* Nothing to do. */ + } else if (irt_isstr(kt)) { + emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2); + emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f); + emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2); + emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f); + emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest); + emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1); + emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f); + if (irt_isnum(kt)) { + emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi + emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1); // lo + emit_ds(as, RISCVI_FMV_X_D, tmp1, key); + } else { + checkmclim(as); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi + emit_ext(as, RISCVI_SEXT_W, tmp2, key); // lo + emit_ds1s2(as, RISCVI_ADD, tmp1, key, type); + } + } + } +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + int bigofs = !checki12(kofs); + Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; + int64_t k; + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (bigofs) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } else if (ra_hasreg(dest)) { + emit_dsi(as, RISCVI_ADDI, dest, node, ofs); + } + if (irt_ispri(irkey->t)) { + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = (int64_t)ir_knum(irkey)->u64; + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); + } + asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow)); + emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs); + if (bigofs) + emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow)); +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR); + } else { + if (guarded) + asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO); + if (ir->o == IR_UREFC) + emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); + else + emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loada(as, dest, o); + } else { + emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); + } + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); + lj_assertA(!ra_used(ir), "unfused FREF"); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + rset_clear(allow, base); + if (irref_isk(ir->op2) && checki12(ofs + irr->i)) { + emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i); + } else { + emit_dsi(as, RISCVI_ADDI, dest, dest, ofs); + emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow)); + } +} + +/* -- Loads and stores ---------------------------------------------------- */ + +static RISCVIns asm_fxloadins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: return RISCVI_LB; + case IRT_U8: return RISCVI_LBU; + case IRT_I16: return RISCVI_LH; + case IRT_U16: return RISCVI_LHU; + case IRT_NUM: return RISCVI_FLD; + case IRT_FLOAT: return RISCVI_FLW; + default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW; + } +} + +static RISCVIns asm_fxstoreins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return RISCVI_SB; + case IRT_I16: case IRT_U16: return RISCVI_SH; + case IRT_NUM: return RISCVI_FSD; + case IRT_FLOAT: return RISCVI_FSW; + default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg idx, dest = ra_dest(as, ir, allow); + rset_clear(allow, dest); + RISCVIns riscvi = asm_fxloadins(ir); + int32_t ofs; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_GL; + ofs = (ir->op2 << 2) - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, allow); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dsi(as, RISCVI_ADDI, dest, idx, ofs); + return; + } + } + ofs = field_ofs[ir->op2]; + lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); + } + rset_clear(allow, idx); + emit_lso(as, riscvi, dest, idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); + emit_lso(as, asm_fxstoreins(ir), src, idx, ofs); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); +} + +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src), ofs); + } +} + +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + +static void asm_ahuvload(ASMState *as, IRIns *ir) +{ + Reg dest = RID_NONE, type = RID_TMP, idx; + RegSet allow = RSET_GPR; + int32_t ofs = 0; + IRType1 t = ir->t; + if (ra_used(ir)) { + lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + rset_clear(allow, dest); + if (irt_isaddr(t)) { + emit_cleartp(as, dest, dest); + } else if (irt_isint(t)) + emit_ext(as, RISCVI_SEXT_W, dest, dest); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + rset_clear(allow, idx); + if (irt_isnum(t)) { + asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); + } else { + asm_guard(as, RISCVI_BNE, type, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + if (ra_hasreg(dest)) { + if (irt_isnum(t)) { + emit_lso(as, RISCVI_FLD, dest, idx, ofs); + dest = type; + } + } else { + dest = type; + } + emit_dsshamt(as, RISCVI_SRAI, type, dest, 47); + emit_lso(as, RISCVI_LD, dest, idx, ofs); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, type = RID_NONE; + int32_t ofs = 0; + if (ir->r == RID_SINK) + return; + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_lso(as, RISCVI_FSD, src, idx, ofs); + } else { + Reg tmp = RID_TMP; + if (irt_ispri(ir->t)) { + tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, tmp); + } else { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_lso(as, RISCVI_SD, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + if (as->flags & JIT_F_RVZba) { + emit_ds1s2(as, RISCVI_ADD_UW, tmp, src, type); + } else { + emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type); + emit_ext(as, RISCVI_ZEXT_W, tmp, src); + } + } else { + emit_ds1s2(as, RISCVI_ADD, tmp, src, type); + } + } + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + Reg dest = RID_NONE, type = RID_NONE, base; + RegSet allow = RSET_GPR; + IRType1 t = ir->t; + int32_t ofs = 8*((int32_t)ir->op1-2); + lj_assertA(checki12(ofs), "sload IR operand out of range"); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else if (ra_used(ir)) { + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); + lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); + if (irt_isaddr(t)) { /* Clear type from pointers. */ + emit_cleartp(as, dest, dest); + } else if (ir->op2 & IRSLOAD_CONVERT) { + if (irt_isint(t)) { + emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp); + /* If value is already loaded for type check, move it to FPR. */ + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_ds(as, RISCVI_FMV_D_X, tmp, dest); + else + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + emit_ds(as, RISCVI_FCVT_D_W, dest, tmp); + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + /* Sign-extend integers. */ + emit_ext(as, RISCVI_SEXT_W, dest, dest); + } + goto dotypecheck; + } + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); +dotypecheck: + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + type = dest < RID_MAX_GPR ? dest : RID_TMP; + if (irt_ispri(t)) { + asm_guard(as, RISCVI_BNE, type, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); + } else if ((ir->op2 & IRSLOAD_KEYINDEX)) { + asm_guard(as, RISCVI_BNE, RID_TMP, + ra_allock(as, (int32_t)LJ_KEYINDEX, allow)); + emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 32); + } else { + if (irt_isnum(t)) { + asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM); + if (ra_hasreg(dest)) { + emit_lso(as, RISCVI_FLD, dest, base, ofs); + } + } else { + asm_guard(as, RISCVI_BNE, RID_TMP, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47); + } + emit_lso(as, RISCVI_LD, type, base, ofs); + } else if (ra_hasreg(dest)) { + emit_lso(as, irt_isnum(t) ? RISCVI_FLD : + irt_isint(t) ? RISCVI_LW : RISCVI_LD, + dest, base, ofs); + } +} + +/* -- Allocations --------------------------------------------------------- */ + +#if LJ_HASFFI +static void asm_cnew(ASMState *as, IRIns *ir) +{ + CTState *cts = ctype_ctsG(J2G(as->J)); + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet drop = RSET_SCRATCH; + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); + + as->gcsteps++; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + if (ra_used(ir)) + ra_destreg(as, ir, RID_RET); /* GCcdata * */ + + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow), + RID_RET, (sizeof(GCcdata))); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; + } + + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ + emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))); + emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))); + emit_loadk12(as, RID_RET+1, ~LJ_TCDATA); + emit_loadk32(as, RID_TMP, id); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ + asm_gencall(as, ci, args); + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); +} +#endif + +/* -- Write barriers ------------------------------------------------------ */ + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + Reg link = RID_TMP; + MCLabel l_end = emit_label(as); + emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_setgl(as, tab, gc.grayagain); // make tab gray again + emit_getgl(as, link, gc.grayagain); + emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: not jump + emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP); // mark=0: gray + emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK); + emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked))); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; + MCLabel l_end; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); + emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); + emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, -1); + emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: jump + emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK); + emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); + val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); + emit_lso(as, RISCVI_LBU, tmp, obj, + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))); + emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))); +} + +/* -- Arithmetic and logic operations ------------------------------------- */ + +static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, riscvi, dest, left, right); +} + +static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + switch(riscvi) { + case RISCVI_FROUND_S_RTZ: case RISCVI_FROUND_S_RDN: case RISCVI_FROUND_S_RUP: + case RISCVI_FROUND_D_RTZ: case RISCVI_FROUND_D_RDN: case RISCVI_FROUND_D_RUP: + case RISCVI_FSQRT_S: case RISCVI_FSQRT_D: + emit_ds(as, riscvi, dest, left); + break; + case RISCVI_FMV_S: case RISCVI_FMV_D: + case RISCVI_FABS_S: case RISCVI_FABS_D: + case RISCVI_FNEG_S: case RISCVI_FNEG_D: + emit_ds1s2(as, riscvi, dest, left, left); + break; + default: + lj_assertA(0, "bad fp unary instruction"); + return; + } +} + +static void asm_fpround(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + MCLabel l_end = emit_label(as); + + if (dest != left) { + emit_ds1s2(as, RISCVI_FSGNJ_D, dest, dest, left); + emit_ds(as, RISCVI_FCVT_D_L, dest, RID_TMP); + } else { + Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); + emit_ds1s2(as, RISCVI_FSGNJ_D, dest, ftmp, left); + emit_ds(as, RISCVI_FCVT_D_L, ftmp, RID_TMP); + } + emit_ds(as, riscvi, RID_TMP, left); + emit_branch(as, RISCVI_BLT, RID_ZERO, RID_TMP, l_end, 0); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1075); + emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, 0x7ff); + emit_dsi(as, RISCVI_SRLI, RID_TMP, RID_TMP, 52); + if (dest != left) + emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); + emit_ds(as, RISCVI_FMV_X_D, RID_TMP, left); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + IRFPMathOp fpm = (IRFPMathOp)ir->op2; + if (fpm <= IRFPM_TRUNC) + if (as->flags & JIT_F_RVZfa) { + asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FROUND_D_RDN : + fpm == IRFPM_CEIL ? RISCVI_FROUND_D_RUP : RISCVI_FROUND_D_RTZ); + } else { + asm_fpround(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RDN) : + fpm == IRFPM_CEIL ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RUP) : + RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RTZ)); + } + else if (fpm == IRFPM_SQRT) + asm_fpunary(as, ir, RISCVI_FSQRT_D); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); +} + +static void asm_add(ASMState *as, IRIns *ir) +{ + IRType1 t = ir->t; + if (irt_isnum(t)) { + if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D)) + asm_fparith(as, ir, RISCVI_FADD_D); + return; + } else { + if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA)) + return; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if (checki12(k)) { + if (irt_is64(t)) { + emit_dsi(as, RISCVI_ADDI, dest, left, k); + } else { + emit_dsi(as, RISCVI_ADDIW, dest, left, k); + } + return; + } + } + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest, + left, right); + } +} + +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D)) + asm_fparith(as, ir, RISCVI_FSUB_D); + return; + } else { + if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS)) + return; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, + left, right); + } +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, RISCVI_FMUL_D); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest, + left, right); + } +} + +static void asm_fpdiv(ASMState *as, IRIns *ir) +{ + asm_fparith(as, ir, RISCVI_FDIV_D); +} + +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, RISCVI_FNEG_D); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, + RID_ZERO, left); + } +} + +#define asm_abs(as, ir) asm_fpunary(as, ir, RISCVI_FABS_D) + +static void asm_arithov(ASMState *as, IRIns *ir) +{ + Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); + lj_assertA(!irt_is64(ir->t), "bad usage"); + if (irref_isk(ir->op2)) { + int k = IR(ir->op2)->i; + if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); + if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ + left = ra_alloc1(as, ir->op1, RSET_GPR); + asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left); + emit_dsi(as, RISCVI_ADDI, dest, left, k); + if (dest == left) emit_mv(as, RID_TMP, left); + return; + } + } + left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), + right), dest)); + asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO); + emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp); + if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ + emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); + } else { /* ((dest^left) & (dest^~right)) < 0 */ + emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right); + } + emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left); + emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right); + if (dest == left || dest == right) + emit_mv(as, RID_TMP, dest == left ? left : right); +} + +#define asm_addov(as, ir) asm_arithov(as, ir) +#define asm_subov(as, ir) asm_arithov(as, ir) + +static void asm_mulov(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + asm_guard(as, RISCVI_BNE, dest, RID_TMP); + emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP); // dest: [31:0]+signextend + emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right); // RID_TMP: [63:0] +} + +static void asm_bnot(ASMState *as, IRIns *ir) +{ + Reg left, right, dest = ra_dest(as, ir, RSET_GPR); + IRIns *irl = IR(ir->op1); + if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) { + left = ra_alloc2(as, irl, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, RISCVI_XNOR, dest, left, right); + } else { + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_ds(as, RISCVI_NOT, dest, left); + } +} + +static void asm_bswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left); + if (as->flags & JIT_F_RVZbb) { + if (!irt_is64(ir->t)) + emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32); + emit_ds(as, RISCVI_REV8, dest, left); + } else if (as->flags & JIT_F_RVXThead) { + emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW, + dest, left); + } else if (irt_is64(ir->t)) { + Reg tmp1, tmp2, tmp3, tmp4; + tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); + tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2); + tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3); + tmp4 = ra_scratch(as, allow); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); + emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40); + emit_dsshamt(as, RISCVI_SLLI, dest, left, 56); + emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3); + emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP); + emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32); + emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24); + emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2); + emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1); + emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3); + emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24); + emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24); + emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1); + emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8); + emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3); + emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u)); + emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP); + emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); + emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); + emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40); + } else { + Reg tmp1, tmp2; + tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); + tmp2 = ra_scratch(as, allow); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1); + emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8); + emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2); + emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP); + emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP); + emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); + emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); + emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8); + } +} + +static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left, right; + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if (checki12(k)) { + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dsi(as, riscvik, dest, left, k); + return; + } + } else if (as->flags & JIT_F_RVZbb) { + if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) { + left = ra_alloc1(as, irl->op1, RSET_GPR); + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvin, dest, right, left); + return; + } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) { + left = ra_alloc1(as, ir->op1, RSET_GPR); + right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvin, dest, left, right); + return; + } + } + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvi, dest, left, right); +} + +#define asm_band(as, ir) asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN) +#define asm_bor(as, ir) asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN) +#define asm_bxor(as, ir) asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR) + +static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + uint32_t shmsk = irt_is64(ir->t) ? 63 : 31; + if (irref_isk(ir->op2)) { /* Constant shifts. */ + uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk); + switch (riscvik) { + case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI: + case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW: + emit_dsshamt(as, riscvik, dest, left, shift); + break; + case RISCVI_ADDI: shift = (-shift) & shmsk; + case RISCVI_RORI: + emit_roti(as, RISCVI_RORI, dest, left, RID_TMP, shift); + break; + case RISCVI_ADDIW: shift = (-shift) & shmsk; + case RISCVI_RORIW: + emit_roti(as, RISCVI_RORIW, dest, left, RID_TMP, shift); + break; + default: + lj_assertA(0, "bad shift instruction"); + return; + } + } else { + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + switch (riscvi) { + case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL: + case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW: + emit_ds1s2(as, riscvi, dest, left, right); + break; + case RISCVI_ROR: case RISCVI_ROL: + case RISCVI_RORW: case RISCVI_ROLW: + emit_rot(as, riscvi, dest, left, right, RID_TMP); + break; + default: + lj_assertA(0, "bad shift instruction"); + return; + } + } +} + +#define asm_bshl(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \ + asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW)) +#define asm_bshr(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \ + asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW)) +#define asm_bsar(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \ + asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW)) +#define asm_brol(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_ROL, RISCVI_ADDI) : \ + asm_bitshift(as, ir, RISCVI_ROLW, RISCVI_ADDIW)) + // ROLI -> ADDI, ROLIW -> ADDIW; Hacky but works. +#define asm_bror(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \ + asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW)) + +static void asm_min_max(ASMState *as, IRIns *ir, int ismax) +{ + if (irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + MCLabel l_ret_left, l_end; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + l_end = emit_label(as); + + if (dest != left) + emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); + l_ret_left = emit_label(as); + + if (dest != left) + emit_jump(as, l_end, -1); + if (dest != right) + emit_ds1s2(as, RISCVI_FMV_D, dest, right, right); + + emit_branch(as, RISCVI_BNE, RID_TMP, RID_ZERO, l_ret_left, -1); + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, ismax ? right : left, + ismax ? left : right); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right); + } else { + if (as->flags & JIT_F_RVXThead) { + if (left == right) { + if (dest != left) emit_mv(as, dest, left); + } else { + if (dest == left) { + emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP); + } else { + emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP); + if (dest != right) emit_mv(as, dest, right); + } + } + } else if (as->flags & JIT_F_RVZicond) { + emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP); + if (dest != right) { + emit_ds1s2(as, RISCVI_CZERO_EQZ, RID_TMP, right, RID_TMP); + emit_ds1s2(as, RISCVI_CZERO_NEZ, dest, left, RID_TMP); + } else { + emit_ds1s2(as, RISCVI_CZERO_NEZ, RID_TMP, left, RID_TMP); + emit_ds1s2(as, RISCVI_CZERO_EQZ, dest, right, RID_TMP); + } + } else { + if (dest != right) { + emit_ds1s2(as, RISCVI_XOR, dest, right, dest); + emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); + emit_ds1s2(as, RISCVI_XOR, dest, right, left); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1); + } else { + emit_ds1s2(as, RISCVI_XOR, dest, left, dest); + emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); + emit_ds1s2(as, RISCVI_XOR, dest, left, right); + emit_ds1s2(as, RISCVI_SUB, RID_TMP, RID_ZERO, RID_TMP); + } + } + emit_ds1s2(as, RISCVI_SLT, RID_TMP, + ismax ? left : right, ismax ? right : left); + } + } +} + +#define asm_min(as, ir) asm_min_max(as, ir, 0) +#define asm_max(as, ir) asm_min_max(as, ir, 1) + +/* -- Comparisons --------------------------------------------------------- */ + +/* FP comparisons. */ +static void asm_fpcomp(ASMState *as, IRIns *ir) +{ + IROp op = ir->o; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + asm_guard(as, (op < IR_EQ ? (op&4) : (op&1)) + ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); + switch (op) { + case IR_LT: case IR_UGE: + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right); + break; + case IR_LE: case IR_UGT: case IR_ABC: + emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right); + break; + case IR_GT: case IR_ULE: + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left); + break; + case IR_GE: case IR_ULT: + emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left); + break; + case IR_EQ: case IR_NE: + emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right); + break; + default: + break; + } +} + +/* Integer comparisons. */ +static void asm_intcomp(ASMState *as, IRIns *ir) +{ + /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ + /* 00 01 10 11 100 101 110 111 */ + IROp op = ir->o; + Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); + if (op == IR_ABC) op = IR_UGT; + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { + switch (op) { + case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break; + case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break; + case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break; + case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break; + default: break; + } + return; + } + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if ((op&2)) k++; + if (checki12(k)) { + asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k); + return; + } + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1), + (op&2) ? right : left, (op&2) ? left : right); +} + +static void asm_comp(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else + asm_intcomp(as, ir); +} + +static void asm_equal(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpcomp(as, ir); + } else { + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right); + } +} + +/* -- Split register ops -------------------------------------------------- */ + +/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +static void asm_hiop(ASMState *as, IRIns *ir) +{ + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { + case IR_CALLN: + case IR_CALLL: + case IR_CALLS: + case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); + emit_lsglptr(as, RISCVI_LBU, RID_TMP, + (int32_t)offsetof(global_State, hookmask)); +} + +/* -- Stack handling ------------------------------------------------------ */ + +/* Check Lua stack size for overflow. Use exit handler as fallback. */ +static void asm_stack_check(ASMState *as, BCReg topslot, + IRIns *irp, RegSet allow, ExitNo exitno) +{ + /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */ + Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; + ExitNo oldsnap = as->snapno; + rset_clear(allow, pbase); + as->snapno = exitno; + asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); + as->snapno = oldsnap; + if (allow) { + tmp = rset_pickbot(allow); + ra_modified(as, tmp); + } else { // allow == RSET_EMPTY + tmp = RID_RET; + emit_lso(as, RISCVI_LD, tmp, RID_SP, 0); /* Restore tmp1 register. */ + } + emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); + emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase); + emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack)); + if (pbase == RID_TMP) + emit_getgl(as, RID_TMP, jit_base); + emit_getgl(as, tmp, cur_L); + if (allow == RSET_EMPTY) /* Spill temp register. */ + emit_lso(as, RISCVI_SD, tmp, RID_SP, 0); +} + +/* Restore Lua stack from on-trace state. */ +static void asm_stack_restore(ASMState *as, SnapShot *snap) +{ + SnapEntry *map = &as->T->snapmap[snap->mapofs]; +#ifdef LUA_USE_ASSERT + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif + MSize n, nent = snap->nent; + /* Store the value of all modified slots to the Lua stack. */ + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + BCReg s = snap_slot(sn); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); + IRRef ref = snap_ref(sn); + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs); + } else { + if ((sn & SNAP_KEYINDEX)) { + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + int64_t kki = (int64_t)LJ_KEYINDEX << 32; + if (irref_isk(ref)) { + emit_lso(as, RISCVI_SD, + ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow), + RID_BASE, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + Reg rki = ra_allock(as, kki, rset_exclude(allow, src)); + emit_lso(as, RISCVI_SD, RID_TMP, RID_BASE, ofs); + emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, rki); + } + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } + } + checkmclim(as); + } + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); +} + +/* -- GC handling --------------------------------------------------------- */ + +/* Marker to prevent patching the GC check exit. */ +#define RISCV_NOPATCH_GC_CHECK \ + (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP)) + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; + Reg tmp; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */ + *--as->mcp = RISCV_NOPATCH_GC_CHECK; + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); + emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + tmp = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, -1); + emit_getgl(as, tmp, gc.threshold); + emit_getgl(as, RID_TMP, gc.total); + as->gcsteps = 0; + checkmclim(as); +} + +/* -- Loop handling ------------------------------------------------------- */ + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + MCode *p = as->mctop; + MCode *target = as->mcp; + ptrdiff_t delta; + if (as->loopinv) { /* Inverted loop branch? */ + delta = (char *)target - (char *)(p - 2); + /* asm_guard* already inverted the branch, and patched the final b. */ + lj_assertA(checki21(delta), "branch target out of range"); + p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta); + } else { + /* J */ + delta = (char *)target - (char *)(p - 1); + p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta); + } +} + +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do(?) */ +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Coalesce BASE register for a root trace. */ +static void asm_head_root_base(ASMState *as) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (r != RID_BASE) + emit_mv(as, r, RID_BASE); + } +} + +/* Coalesce BASE register for a side trace. */ +static Reg asm_head_side_base(ASMState *as, IRIns *irp) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { + return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */ + return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } + } + return RID_NONE; +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + MCode *p = as->mctop; + MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; + int32_t spadj = as->T->spadjust; + if (spadj == 0) { + p[-3] = RISCVI_NOP; + // as->mctop = p-2; + } else { + /* Patch stack adjustment. */ + p[-3] = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj); + } + /* Patch exit jump. */ + ptrdiff_t delta = (char *)target - (char *)(p - 2); + p[-2] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + p[-1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); +} + +/* Prepare tail of code. */ +static void asm_tail_prep(ASMState *as) +{ + MCode *p = as->mctop - 2; /* Leave room for exitstub. */ + if (as->loopref) { + as->invmcp = as->mcp = p; + } else { + as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + as->invmcp = NULL; + } + p[0] = p[1] = RISCVI_NOP; /* Prevent load/store merging. */ +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX*2]; + uint32_t i, nargs = CCI_XNARGS(ci); + int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + if (args[i] && irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) { + nfpr--; if(ci->flags & CCI_VARARG) ngpr--; + } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--; + else nslots += 2; + } else { + if (ngpr > 0) { + ngpr--; if(ci->flags & CCI_VARARG) nfpr--; + } else nslots += 2; + } + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); +} + +static void asm_setup_target(ASMState *as) +{ + asm_sparejump_setup(as); + asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); +} + +/* -- Trace patching ------------------------------------------------------ */ + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) +{ + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); + MCode *px = exitstub_trace_addr(T, exitno); + MCode *cstart = NULL; + MCode *mcarea = lj_mcode_patch(J, p, 0); + + for (; p < pe; p++) { + /* Look for exitstub branch, replace with branch to target. */ + ptrdiff_t odelta = (char *)px - (char *)(p+1), + ndelta = (char *)target - (char *)(p+1); + if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 && + ((p[0] & 0x0000007fu) == 0x63u) && + ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && + ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) || + (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && + ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) { + lj_assertJ(checki32(ndelta), "branch target out of range"); + /* Patch jump, if within range. */ + patchbranch: + if (checki21(ndelta)) { /* Patch jump */ + p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta); + if (!cstart) cstart = p + 1; + } else { /* Branch out of range. Use spare jump slot in mcarea. */ + MCode *mcjump = asm_sparejump_use(mcarea, target); + if (mcjump) { + lj_mcode_sync(mcjump, mcjump+2); + ndelta = (char *)mcjump - (char *)(p+1); + if (checki21(ndelta)) { + goto patchbranch; + } else { + lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); + } + } + /* Ignore jump slot overflow. Child trace is simply not attached. */ + } + } else if (p+2 == pe) { + if (p[0] == RISCVI_NOP && p[1] == RISCVI_NOP) { + ptrdiff_t delta = (char *)target - (char *)p; + lj_assertJ(checki32(delta), "jump target out of range"); + p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + if (!cstart) cstart = p; + } + } + } + if (cstart) lj_mcode_sync(cstart, px+1); + lj_mcode_patch(J, mcarea, 1); +} diff --git a/src/lj_ccall.c b/src/lj_ccall.c index c613db2cc..e4bed4f84 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -687,6 +687,97 @@ if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ } +#elif LJ_TARGET_RISCV64 +/* -- RISC-V lp64d calling conventions ------------------------------------ */ + +#define CCALL_HANDLE_STRUCTRET \ + /* Return structs of size > 16 by reference. */ \ + cc->retref = !(sz <= 16); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + unsigned int cl = ccall_classify_struct(cts, ctr); \ + if ((cl & 4) && (cl >> 8) <= 2) { \ + CTSize i = (cl >> 8) - 1; \ + do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \ + } else { \ + if (cl > 1) { \ + sp = (uint8_t *)&cc->fpr[0]; \ + if ((cl >> 8) > 2) \ + sp = (uint8_t *)&cc->gpr[0]; \ + } \ + memcpy(dp, sp, ctr->size); \ + } \ + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 1 or 2 FPRs. */ \ + cc->retref = 0; + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass long double complex by reference. */ \ + if (sz == 2*sizeof(long double)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass complex in two FPRs or on stack. */ \ + else if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + sz = 2*CTSIZE_PTR; \ + } else { \ + isfp = 1; \ + sz = 2*CTSIZE_PTR; \ + } + +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size >16 by reference. */ \ + unsigned int cl = ccall_classify_struct(cts, d); \ + nff = cl >> 8; \ + if (sz > 16) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass struct in FPRs. */ \ + if (cl > 1) { \ + isfp = (cl & 4) ? 2 : 1; \ + } + + +#define CCALL_HANDLE_REGARG \ + if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \ + int n2 = ctype_isvector(d->info) ? 1 : \ + isfp == 1 ? n : 2; \ + if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \ + dp = &cc->fpr[nfpr]; \ + nfpr += n2; \ + goto done; \ + } else { \ + if (ngpr + n2 <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n2; \ + goto done; \ + } \ + } \ + } else { /* Try to pass argument in GPRs. */ \ + if (ngpr + n <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -1047,6 +1138,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, #endif +/* -- RISC-V ABI struct classification ---------------------------- */ + +#if LJ_TARGET_RISCV64 + +static unsigned int ccall_classify_struct(CTState *cts, CType *ct) +{ + CTSize sz = ct->size; + unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + r |= sct->size; + if (!isu) n++; else if (n == 0) n = 1; + } else if (ctype_iscomplex(sct->info)) { + r |= (sct->size >> 1); + if (!isu) n += 2; else if (n < 2) n = 2; + } else if (ctype_isstruct(sct->info)) { + goto substruct; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info)) { + goto noth; + } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + sct = ctype_rawchild(cts, ct); + substruct: + if (sct->size > 0) { + unsigned int s = ccall_classify_struct(cts, sct); + if (s <= 1) goto noth; + r |= (s & 255); + if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + } + } + } + if ((r == 4 || r == 8) && n <= 4) + return r + (n << 8); +noth: /* Not a homogeneous float/double aggregate. */ + return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ +} + +#endif + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -1093,6 +1229,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #endif #endif +#if LJ_TARGET_RISCV64 + int nff = 0; +#endif + /* Clear unused regs to get some determinism in case of misdeclaration. */ memset(cc->gpr, 0, sizeof(cc->gpr)); #if CCALL_NUM_FPR @@ -1282,7 +1422,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int64_t *)dp = (int64_t)*(int32_t *)dp; } #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_RISCV64 + if (isfp && d->size == sizeof(float)) + ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ +#endif +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 || (isfp && nsp == 0) @@ -1322,6 +1466,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CTSize i = (sz >> 2) - 1; do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); } +#elif LJ_TARGET_RISCV64 + if (isfp == 2 && nff <= 2) { + /* Split complex float into separate registers. */ + CTSize i = (sz >> 2) - 1; + do { + ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i]; + } while (i--); + } #else UNUSED(isfp); #endif @@ -1331,7 +1483,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if ((int32_t)nsp < 0) nsp = 0; #endif -#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 cc->nfpr = nfpr; /* Required for vararg functions. */ #endif cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 38d35dc52..609effa0b 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -157,6 +157,21 @@ typedef union FPRArg { float f; } FPRArg; +#elif LJ_TARGET_RISCV64 + +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR 8 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR 2 +#define CCALL_SPS_EXTRA 3 +#define CCALL_SPS_FREE 1 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif @@ -204,7 +219,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ #elif LJ_TARGET_ARM64 void *retp; /* Aggregate return pointer in x8. */ -#elif LJ_TARGET_PPC +#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 uint8_t nfpr; /* Number of arguments in FPRs. */ #endif #if LJ_32 diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index ae0934587..ef9c13ffc 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -91,6 +91,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #define CALLBACK_MCODE_HEAD 52 +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_MCODE_HEAD 68 + #else /* Missing support for this architecture. */ @@ -293,6 +297,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) } return p; } +#elif LJ_TARGET_RISCV64 +static void *callback_mcode_init(global_State *g, uint32_t *page) +{ + uint32_t *p = page; + uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; + uintptr_t ug = (uintptr_t)(void *)g; + uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL; + uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL; + MSize slot; + *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi)); + *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi)); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi)); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi)); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff); + *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0); + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot); + *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p)); + p++; + } + return p; +} #else /* Missing support for this architecture. */ #define callback_mcode_init(g, p) (p) @@ -580,6 +617,31 @@ void lj_ccallback_mcode_free(CTState *cts) if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ } +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr + n <= CCALL_NARG_FPR) { \ + sp = &cts->cb.fpr[nfpr]; \ + nfpr += n; \ + goto done; \ + } else if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } else { \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + +#define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; + #else #error "Missing calling convention definitions for this architecture" #endif @@ -735,7 +797,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int64_t *)dp = (int64_t)*(int32_t *)dp; } #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) diff --git a/src/lj_emit_riscv.h b/src/lj_emit_riscv.h new file mode 100644 index 000000000..d4160663e --- /dev/null +++ b/src/lj_emit_riscv.h @@ -0,0 +1,574 @@ +/* +** RISC-V instruction emitter. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by gns from PLCT Lab, ISCAS. +*/ + +static intptr_t get_k64val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_KINT64) { + return (intptr_t)ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); + } else { + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); + return ir->i; /* Sign-extended. */ + } +} + +#define get_kval(as, ref) get_k64val(as, ref) + +/* -- Emit basic instructions --------------------------------------------- */ + +static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2); +} + +#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0) +#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2) +#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2) + +static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3); +} + +#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3) + +static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff); +} + +#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i) +#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i) +#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f) + +static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff); +} + +#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i) + +/* +static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe); +} +*/ + +static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff); +} + +#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i) + +/* +static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe); +} +*/ + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); +static void ra_allockreg(ASMState *as, intptr_t k, Reg r); +static Reg ra_scratch(ASMState *as, RegSet allow); + +static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs) +{ + lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs); + switch (riscvi) { + case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB: + case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU: + case RISCVI_FLW: case RISCVI_FLD: + emit_dsi(as, riscvi, data, base, ofs); + break; + case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB: + case RISCVI_FSW: case RISCVI_FSD: + emit_s1s2i(as, riscvi, base, data, ofs); + break; + default: lj_assertA(0, "invalid lso"); break; + } +} + +static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp, + int32_t shamt) +{ + if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) { + if (!(as->flags & JIT_F_RVZbb)) switch (riscvi) { + case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break; + case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break; + default: lj_assertA(0, "invalid roti op"); break; + } + emit_dsshamt(as, riscvi, rd, rs1, shamt); + } else { + RISCVIns ai, bi; + int32_t shwid, shmsk; + switch (riscvi) { + case RISCVI_RORI: + ai = RISCVI_SRLI, bi = RISCVI_SLLI; + shwid = 64, shmsk = 63; + break; + case RISCVI_RORIW: + ai = RISCVI_SRLIW, bi = RISCVI_SLLIW; + shwid = 32, shmsk = 31; + break; + default: + lj_assertA(0, "invalid roti op"); + return; + } + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk); + emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk); + } +} + +static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, riscvi, rd, rs1, rs2); + } else { + RISCVIns sai, sbi; + switch (riscvi) { + case RISCVI_ROL: + sai = RISCVI_SLL, sbi = RISCVI_SRL; + break; + case RISCVI_ROR: + sai = RISCVI_SRL, sbi = RISCVI_SLL; + break; + case RISCVI_ROLW: + sai = RISCVI_SLLW, sbi = RISCVI_SRLW; + break; + case RISCVI_RORW: + sai = RISCVI_SRLW, sbi = RISCVI_SLLW; + break; + default: + lj_assertA(0, "invalid rot op"); + return; + } + if (rd == rs2) { + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_ds1s2(as, sbi, tmp, rs1, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); + emit_ds2(as, RISCVI_NEG, tmp, rs2); + } else { + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); + emit_ds1s2(as, sbi, tmp, rs1, tmp); + emit_ds2(as, RISCVI_NEG, tmp, rs2); + } + } +} + +static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1) +{ + if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) || + (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) { + emit_ds(as, riscvi, rd, rs1); + } else if (as->flags & JIT_F_RVXThead) { + uint32_t hi, sext; + switch (riscvi) { + case RISCVI_ZEXT_B: + case RISCVI_SEXT_W: + emit_ds(as, riscvi, rd, rs1); + return; + case RISCVI_ZEXT_H: + hi = 15, sext = 0; + break; + case RISCVI_ZEXT_W: + hi = 31, sext = 0; + break; + case RISCVI_SEXT_B: + hi = 7, sext = 1; + break; + case RISCVI_SEXT_H: + hi = 15, sext = 1; + break; + default: + lj_assertA(0, "invalid ext op"); + return; + } + emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU, + rd, rs1, hi << 6); + } else { + RISCVIns sli, sri; + int32_t shamt; + switch (riscvi) { + case RISCVI_ZEXT_B: + case RISCVI_SEXT_W: + emit_ds(as, riscvi, rd, rs1); + return; + case RISCVI_ZEXT_H: + sli = RISCVI_SLLI, sri = RISCVI_SRLI; + shamt = 48; + break; + case RISCVI_ZEXT_W: + sli = RISCVI_SLLI, sri = RISCVI_SRLI; + shamt = 32; + break; + case RISCVI_SEXT_B: + sli = RISCVI_SLLI, sri = RISCVI_SRAI; + shamt = 56; + break; + case RISCVI_SEXT_H: + sli = RISCVI_SLLI, sri = RISCVI_SRAI; + shamt = 48; + break; + default: + lj_assertA(0, "invalid ext op"); + return; + } + emit_dsshamt(as, sri, rd, rd, shamt); + emit_dsshamt(as, sli, rd, rs1, shamt); + } +} + +static void emit_cleartp(ASMState *as, Reg rd, Reg rs1) +{ + if (as->flags & JIT_F_RVXThead) { + emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6); + } else { + emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17); + emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17); + } +} + +/* +static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp); + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} +*/ + +/* +static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp); + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} +*/ + +static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2); + } else { + emit_ds(as, RISCVI_NOT, rd, rd); + emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2); + } +} + +static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt) +{ + if (as->flags & JIT_F_RVZba) { + switch (shamt) { + case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break; + case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break; + case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break; + default: return; + } + } else if (as->flags & JIT_F_RVXThead) { + emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp); + emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt); + } +} + +#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1) +#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2) +#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3) + +static void emit_loadk12(ASMState *as, Reg rd, int32_t i) +{ + emit_di(as, RISCVI_ADDI, rd, i); +} + +static void emit_loadk32(ASMState *as, Reg rd, int32_t i) +{ + if (checki12((int64_t)i)) { + emit_loadk12(as, rd, i); + } else { + if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0)) + emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i)); + else + emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i)); + emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i)); + } +} + +/* -- Emit loads/stores --------------------------------------------------- */ + +/* Prefer rematerialization of BASE/L from global_State over spills. */ +#define emit_canremat(ref) ((ref) <= REF_BASE) + + +/* Load a 32 bit constant into a GPR. */ +#define emit_loadi(as, r, i) emit_loadk32(as, r, i); + +/* Load a 64 bit constant into a GPR. */ +static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) +{ + int64_t u64_delta = (int64_t)((intptr_t)u64 - (intptr_t)(as->mcp - 2)); + if (checki32((int64_t)u64)) { + emit_loadk32(as, r, (int32_t)u64); + } else if (checki32auipc(u64_delta)) { + emit_dsi(as, RISCVI_ADDI, r, r, RISCVF_LO(u64_delta)); + emit_du(as, RISCVI_AUIPC, r, RISCVF_HI(u64_delta)); + } else { + uint32_t lo32 = u64 & 0xfffffffful; + if (checku11(lo32)) { + if (lo32 > 0) emit_dsi(as, RISCVI_ADDI, r, r, lo32); + emit_dsshamt(as, RISCVI_SLLI, r, r, 32); + } else { + RISCVIns li_insn[7] = {0}; + int shamt = 0, step = 0; + for(int bit = 0; bit < 32; bit++) { + if (lo32 & (1u << bit)) { + if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); + int inc = bit+10 > 31 ? 31-bit : 10; + bit += inc, shamt = inc+1; + uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1); + uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0); + li_insn[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload); + } else shamt++; + } + if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); + + if (step < 6) { + for(int i = 0; i < step; i++) + *--as->mcp = li_insn[i]; + } else { + emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 10); + emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 11); + emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 11); + } + } + + uint32_t hi32 = u64 >> 32; + if (hi32 & 0xfff) emit_loadk32(as, r, hi32); + else emit_du(as, RISCVI_LUI, r, hi32 >> 12); + } +} + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) + +/* Get/set from constant pointer. */ +static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow) +{ + emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0); +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + Reg r64 = r; + if (rset_test(RSET_FPR, r)) { + if (as->flags & JIT_F_RVZfa) { + uint8_t sign = (*k >> 63) & 1; + uint16_t k_hi16 = (*k >> 48) & 0xffff; + uint64_t k_lo48 = *k & 0xffffffffffff; + uint16_t mk_hi16 = k_hi16 & 0x7fff; + if (!k_lo48) { + if (riscv_fli_map_hi16[0] == k_hi16) { + emit_ds(as, RISCVI_FLI_D, r, 0); + return; + } + for (int i = 1; i < 32; i++) { + if (riscv_fli_map_hi16[i] == mk_hi16) { + if (sign) + emit_ds1s2(as, RISCVI_FNEG_D, r, r, r); + emit_ds(as, RISCVI_FLI_D, r, i); + return; + } + } + } + } + r64 = RID_TMP; + emit_ds(as, RISCVI_FMV_D_X, r, r64); + } + emit_loadu64(as, r64, *k); +} + +/* Get/set global_State fields. */ +static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs) +{ + emit_lso(as, riscvi, r, RID_GL, ofs); +} + +#define emit_getgl(as, r, field) \ + emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field)) +#define emit_setgl(as, r, field) \ + emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field)) + +/* Trace number is determined from per-trace exit stubs. */ +#define emit_setvmstate(as, i) UNUSED(i) + +/* -- Emit control-flow instructions -------------------------------------- */ + +/* Label for internal jumps. */ +typedef MCode *MCLabel; + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (char *)target - (char *)(p - 1); + switch (jump) { + case -1: + lj_assertA(((delta + 0x10000) >> 13) == 0, "branch target out of range"); /* B */ + *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); + break; + case 0: case 1: + lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); /* ^B+J */ + if (checki13(delta) && !jump) { + *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); + *--p = RISCVI_NOP; + } else { + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */ + *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); + } + break; + default: + lj_assertA(0, "invalid jump type"); + break; + } + as->mcp = p; +} + +static void emit_jump(ASMState *as, MCode *target, int jump) +{ + MCode *p = as->mcp; + ptrdiff_t delta; + switch(jump) { + case -1: + delta = (char *)target - (char *)(p - 1); + lj_assertA(((delta + 0x100000) >> 21) == 0, "jump target out of range"); /* J */ + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); + break; + case 0: case 1: + delta = (char *)target - (char *)(p - 2); + lj_assertA(checki32auipc(delta), "jump target out of range"); /* AUIPC+JALR */ + if (checki21(delta) && !jump) { + *--p = RISCVI_NOP; + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); + } else { + *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + } + break; + default: + lj_assertA(0, "invalid jump type"); + break; + } + as->mcp = p; +} + +#define emit_jmp(as, target) emit_jump(as, target, 0) + +#define emit_mv(as, dst, src) \ + emit_ds(as, RISCVI_MV, (dst), (src)) + +static void emit_call(ASMState *as, void *target, int needcfa) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (char *)target - (char *)(p - 2); + if (checki21(delta)) { + *--p = RISCVI_NOP; + *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta); + } else if (checki32(delta)) { + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + needcfa = 1; + } else { + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0); + needcfa = 2; + } + as->mcp = p; + if (needcfa > 1) + ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); +} + +/* -- Emit generic operations --------------------------------------------- */ + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) +{ + if (src < RID_MAX_GPR && dst < RID_MAX_GPR) + emit_mv(as, dst, src); + else if (src < RID_MAX_GPR) + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src); + else if (dst < RID_MAX_GPR) + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src); + else + emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src); +} + +/* Emit an arithmetic operation with a constant operand. */ +static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src, + Reg tmp, intptr_t k) +{ + if (checki12(k)) emit_dsi(as, riscvi, dest, src, k); + else { + switch (riscvi) { + case RISCVI_ADDI: riscvi = RISCVI_ADD; break; + case RISCVI_XORI: riscvi = RISCVI_XOR; break; + case RISCVI_ORI: riscvi = RISCVI_OR; break; + case RISCVI_ANDI: riscvi = RISCVI_AND; break; + default: lj_assertA(0, "NYI arithmetic RISCVIns"); return; + } + emit_ds1s2(as, riscvi, dest, src, tmp); + emit_loadu64(as, tmp, (uintptr_t)k); + } +} + +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r < RID_MAX_GPR) + emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs); + else + emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs); +} + +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r < RID_MAX_GPR) + emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs); + else + emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs); +} + +/* Add offset to pointer. */ +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) + emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs); +} + + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) diff --git a/src/lj_frame.h b/src/lj_frame.h index 2fb1b2f3e..440e83c36 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -287,6 +287,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ ** need to change to 3. */ #define CFRAME_SHIFT_MULTRES 0 +#elif LJ_TARGET_RISCV64 +#define CFRAME_OFS_ERRF 252 +#define CFRAME_OFS_NRES 248 +#define CFRAME_OFS_PREV 240 +#define CFRAME_OFS_L 232 +#define CFRAME_OFS_PC 224 +#define CFRAME_OFS_MULTRES 0 +#define CFRAME_SIZE 256 +#define CFRAME_SHIFT_MULTRES 3 #else #error "Missing CFRAME_* definitions for this architecture" #endif diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 56094cf10..f1a208bd4 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -306,6 +306,9 @@ enum { #elif LJ_TARGET_MIPS DW_REG_SP = 29, DW_REG_RA = 31, +#elif LJ_TARGET_RISCV64 + DW_REG_SP = 2, + DW_REG_RA = 1, #else #error "Unsupported target architecture" #endif @@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = { .machine = 20, #elif LJ_TARGET_MIPS .machine = 8, +#elif LJ_TARGET_RISCV64 + .machine = 243, #else #error "Unsupported target architecture" #endif @@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } } +#elif LJ_TARGET_RISCV64 + { + int i; + for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); } + DB(DW_CFA_offset|9); DUV(17); + DB(DW_CFA_offset|8); DUV(18); + for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); } + DB(DW_CFA_offset|32|9); DUV(29); + DB(DW_CFA_offset|32|8); DUV(30); + } #else #error "Unsupported target architecture" #endif diff --git a/src/lj_jit.h b/src/lj_jit.h index a11ef7292..bf1a625c3 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -68,6 +68,46 @@ #endif #endif +#elif LJ_TARGET_RISCV64 + +#define JIT_F_RVC (JIT_F_CPU << 0) +#define JIT_F_RVZba (JIT_F_CPU << 1) +#define JIT_F_RVZbb (JIT_F_CPU << 2) +#define JIT_F_RVZicond (JIT_F_CPU << 3) +#define JIT_F_RVZfa (JIT_F_CPU << 4) +#define JIT_F_RVXThead (JIT_F_CPU << 5) + +#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead" + +#if LJ_TARGET_LINUX +#include + +#ifndef __NR_riscv_hwprobe +#ifndef __NR_arch_specific_syscall +#define __NR_arch_specific_syscall 244 +#endif +#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14) +#endif + +struct riscv_hwprobe { + int64_t key; + uint64_t value; +}; + +#define RISCV_HWPROBE_KEY_MVENDORID 0 +#define RISCV_HWPROBE_KEY_MARCHID 1 +#define RISCV_HWPROBE_KEY_MIMPID 2 +#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3 +#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 + +#define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_EXT_ZBA (1 << 3) +#define RISCV_HWPROBE_EXT_ZBB (1 << 4) +#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) +#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35) + +#endif + #else #define JIT_F_CPUSTRING "" diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 0f87caf75..44ef1d48f 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -38,6 +38,12 @@ void sys_icache_invalidate(void *start, size_t len); #endif +#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX +#include +#include +#include +#endif + /* Synchronize data/instruction cache. */ void lj_mcode_sync(void *start, void *end) { @@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *end) sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC lj_vm_cachesync(start, end); +#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX +#if (defined(__GNUC__) || defined(__clang__)) + __asm__ volatile("fence rw, rw"); +#else + lj_vm_fence_rw_rw(); +#endif +#ifdef __GLIBC__ + __riscv_flush_icache(start, end, 0); +#else + syscall(__NR_riscv_flush_icache, start, end, 0UL); +#endif #elif defined(__GNUC__) || defined(__clang__) __clear_cache(start, end); #else diff --git a/src/lj_target.h b/src/lj_target.h index d00554d4b..a79f5d6a0 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -55,7 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 typedef uint64_t RegSet; #define RSET_BITS 6 #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) @@ -145,6 +145,8 @@ typedef uint32_t RegCost; #include "lj_target_mips.h" #elif LJ_TARGET_S390X #include "lj_target_s390x.h" +#elif LJ_TARGET_RISCV64 +#include "lj_target_riscv.h" #else #error "Missing include for target CPU" #endif diff --git a/src/lj_target_riscv.h b/src/lj_target_riscv.h new file mode 100644 index 000000000..22948dc5a --- /dev/null +++ b/src/lj_target_riscv.h @@ -0,0 +1,542 @@ +/* +** Definitions for RISC-V CPUs. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_RISCV_H +#define _LJ_TARGET_RISCV_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#define GPRDEF(_) \ + _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \ + _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ + _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ + _(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31) +#define FPRDEF(_) \ + _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ + _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ + _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ + _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) +#define VRIDDEF(_) + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + RID_ZERO = RID_X0, + RID_TMP = RID_RA, + RID_GP = RID_X3, + RID_TP = RID_X4, + + /* Calling conventions. */ + RID_RET = RID_X10, + RID_RETLO = RID_X10, + RID_RETHI = RID_X11, + RID_FPRET = RID_F10, + RID_CFUNCADDR = RID_X5, + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_X18, /* Interpreter BASE. */ + RID_LPC = RID_X20, /* Interpreter PC. */ + RID_GL = RID_X21, /* Interpreter GL. */ + RID_LREG = RID_X23, /* Interpreter L. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_X0, + RID_MAX_GPR = RID_X31+1, + RID_MIN_FPR = RID_MAX_GPR, + RID_MAX_FPR = RID_F31+1, + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ +}; + +#define RID_NUM_KREF RID_NUM_GPR +#define RID_MIN_KREF RID_X0 + +/* -- Register sets ------------------------------------------------------- */ + +/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */ +#define RSET_FIXED \ + (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ + RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL)) +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) +#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) + +#define RSET_ALL (RSET_GPR|RSET_FPR) +#define RSET_INIT RSET_ALL + +#define RSET_SCRATCH_GPR \ + (RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\ + RSET_RANGE(RID_X10, RID_X17+1)) + +#define RSET_SCRATCH_FPR \ + (RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\ + RSET_RANGE(RID_F28, RID_F31+1)) +#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) + +#define REGARG_FIRSTGPR RID_X10 +#define REGARG_LASTGPR RID_X17 +#define REGARG_NUMGPR 8 + +#define REGARG_FIRSTFPR RID_F10 +#define REGARG_LASTFPR RID_F17 +#define REGARG_NUMFPR 8 + +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the *.dasc file(s). +** +** SPS_FIRST: First spill slot for general use. +*/ +#if LJ_32 +#define SPS_FIXED 5 +#else +#define SPS_FIXED 4 +#endif +#define SPS_FIRST 4 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) + +/* -- Exit state ---------------------------------------------------------- */ +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +/* Highest exit + 1 indicates stack check. */ +#define EXITSTATE_CHECKEXIT 1 + +/* Return the address of a per-trace exit stub. */ +static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) +{ + while (*p == 0x00000013) p++; /* Skip RISCVI_NOP. */ + return p + 4 + exitno; +} +/* Avoid dependence on lj_jit.h if only including lj_target.h. */ +#define exitstub_trace_addr(T, exitno) \ + exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) + +/* -- Instructions -------------------------------------------------------- */ + +/* Instruction fields. */ +#define RISCVF_D(d) (((d)&31) << 7) +#define RISCVF_S1(r) (((r)&31) << 15) +#define RISCVF_S2(r) (((r)&31) << 20) +#define RISCVF_S3(r) (((r)&31) << 27) +#define RISCVF_FUNCT2(f) (((f)&3) << 25) +#define RISCVF_FUNCT3(f) (((f)&7) << 12) +#define RISCVF_FUNCT7(f) (((f)&127) << 25) +#define RISCVF_SHAMT(s) ((s) << 20) +#define RISCVF_RM(m) (((m)&7) << 12) +#define RISCVF_IMMI(i) ((i) << 20) +#define RISCVF_IMMS(i) (((i)&0xfe0) << 20 | ((i)&0x1f) << 7) +#define RISCVF_IMMB(i) (((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7) +#define RISCVF_IMMU(i) (((i)&0xfffff) << 12) +#define RISCVF_IMMJ(i) (((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20) + +/* Encode helpers. */ +#define RISCVF_W_HI(w) ((w) - ((((w)&0xfff)^0x800) - 0x800)) +#define RISCVF_W_LO(w) ((w)&0xfff) +#define RISCVF_HI(i) ((RISCVF_W_HI(i) >> 12) & 0xfffff) +#define RISCVF_LO(i) RISCVF_W_LO(i) + +/* Check for valid field range. */ +#define RISCVF_SIMM_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) +#define RISCVF_UIMM_OK(x, b) (((x) >> (b)) == 0) +#define checku11(i) RISCVF_UIMM_OK(i, 11) +#define checki12(i) RISCVF_SIMM_OK(i, 12) +#define checki13(i) RISCVF_SIMM_OK(i, 13) +#define checki20(i) RISCVF_SIMM_OK(i, 20) +#define checki21(i) RISCVF_SIMM_OK(i, 21) +#define checki32auipc(i) (checki32(i) && (int32_t)(i) < 0x7ffff800) + +typedef enum RISCVIns { + + /* --- RVI --- */ + RISCVI_LUI = 0x00000037, + RISCVI_AUIPC = 0x00000017, + + RISCVI_JAL = 0x0000006f, + RISCVI_JALR = 0x00000067, + + RISCVI_ADDI = 0x00000013, + RISCVI_SLTI = 0x00002013, + RISCVI_SLTIU = 0x00003013, + RISCVI_XORI = 0x00004013, + RISCVI_ORI = 0x00006013, + RISCVI_ANDI = 0x00007013, + + RISCVI_SLLI = 0x00001013, + RISCVI_SRLI = 0x00005013, + RISCVI_SRAI = 0x40005013, + + RISCVI_ADD = 0x00000033, + RISCVI_SUB = 0x40000033, + RISCVI_SLL = 0x00001033, + RISCVI_SLT = 0x00002033, + RISCVI_SLTU = 0x00003033, + RISCVI_XOR = 0x00004033, + RISCVI_SRL = 0x00005033, + RISCVI_SRA = 0x40005033, + RISCVI_OR = 0x00006033, + RISCVI_AND = 0x00007033, + + RISCVI_LB = 0x00000003, + RISCVI_LH = 0x00001003, + RISCVI_LW = 0x00002003, + RISCVI_LBU = 0x00004003, + RISCVI_LHU = 0x00005003, + RISCVI_SB = 0x00000023, + RISCVI_SH = 0x00001023, + RISCVI_SW = 0x00002023, + + RISCVI_BEQ = 0x00000063, + RISCVI_BNE = 0x00001063, + RISCVI_BLT = 0x00004063, + RISCVI_BGE = 0x00005063, + RISCVI_BLTU = 0x00006063, + RISCVI_BGEU = 0x00007063, + + RISCVI_ECALL = 0x00000073, + RISCVI_EBREAK = 0x00100073, + + RISCVI_NOP = 0x00000013, + RISCVI_MV = 0x00000013, + RISCVI_NOT = 0xfff04013, + RISCVI_NEG = 0x40000033, + RISCVI_RET = 0x00008067, + RISCVI_ZEXT_B = 0x0ff07013, + +#if LJ_TARGET_RISCV64 + RISCVI_LWU = 0x00007003, + RISCVI_LD = 0x00003003, + RISCVI_SD = 0x00003023, + + RISCVI_ADDIW = 0x0000001b, + + RISCVI_SLLIW = 0x0000101b, + RISCVI_SRLIW = 0x0000501b, + RISCVI_SRAIW = 0x4000501b, + + RISCVI_ADDW = 0x0000003b, + RISCVI_SUBW = 0x4000003b, + RISCVI_SLLW = 0x0000103b, + RISCVI_SRLW = 0x0000503b, + RISCVI_SRAW = 0x4000503b, + + RISCVI_NEGW = 0x4000003b, + RISCVI_SEXT_W = 0x0000001b, +#endif + + /* --- RVM --- */ + RISCVI_MUL = 0x02000033, + RISCVI_MULH = 0x02001033, + RISCVI_MULHSU = 0x02002033, + RISCVI_MULHU = 0x02003033, + RISCVI_DIV = 0x02004033, + RISCVI_DIVU = 0x02005033, + RISCVI_REM = 0x02006033, + RISCVI_REMU = 0x02007033, +#if LJ_TARGET_RISCV64 + RISCVI_MULW = 0x0200003b, + RISCVI_DIVW = 0x0200403b, + RISCVI_DIVUW = 0x0200503b, + RISCVI_REMW = 0x0200603b, + RISCVI_REMUW = 0x0200703b, +#endif + + /* --- RVF --- */ + RISCVI_FLW = 0x00002007, + RISCVI_FSW = 0x00002027, + + RISCVI_FMADD_S = 0x00000043, + RISCVI_FMSUB_S = 0x00000047, + RISCVI_FNMSUB_S = 0x0000004b, + RISCVI_FNMADD_S = 0x0000004f, + + RISCVI_FADD_S = 0x00000053, + RISCVI_FSUB_S = 0x08000053, + RISCVI_FMUL_S = 0x10000053, + RISCVI_FDIV_S = 0x18000053, + RISCVI_FSQRT_S = 0x58000053, + + RISCVI_FSGNJ_S = 0x20000053, + RISCVI_FSGNJN_S = 0x20001053, + RISCVI_FSGNJX_S = 0x20002053, + + RISCVI_FMIN_S = 0x28000053, + RISCVI_FMAX_S = 0x28001053, + + RISCVI_FCVT_W_S = 0xc0000053, + RISCVI_FCVT_WU_S = 0xc0100053, + + RISCVI_FMV_X_W = 0xe0000053, + + RISCVI_FEQ_S = 0xa0002053, + RISCVI_FLT_S = 0xa0001053, + RISCVI_FLE_S = 0xa0000053, + + RISCVI_FCLASS_S = 0xe0001053, + + RISCVI_FCVT_S_W = 0xd0000053, + RISCVI_FCVT_S_WU = 0xd0100053, + RISCVI_FMV_W_X = 0xf0000053, + + RISCVI_FMV_S = 0x20000053, + RISCVI_FNEG_S = 0x20001053, + RISCVI_FABS_S = 0x20002053, +#if LJ_TARGET_RISCV64 + RISCVI_FCVT_L_S = 0xc0200053, + RISCVI_FCVT_LU_S = 0xc0300053, + RISCVI_FCVT_S_L = 0xd0200053, + RISCVI_FCVT_S_LU = 0xd0300053, +#endif + + /* --- RVD --- */ + RISCVI_FLD = 0x00003007, + RISCVI_FSD = 0x00003027, + + RISCVI_FMADD_D = 0x02000043, + RISCVI_FMSUB_D = 0x02000047, + RISCVI_FNMSUB_D = 0x0200004b, + RISCVI_FNMADD_D = 0x0200004f, + + RISCVI_FADD_D = 0x02000053, + RISCVI_FSUB_D = 0x0a000053, + RISCVI_FMUL_D = 0x12000053, + RISCVI_FDIV_D = 0x1a000053, + RISCVI_FSQRT_D = 0x5a000053, + + RISCVI_FSGNJ_D = 0x22000053, + RISCVI_FSGNJN_D = 0x22001053, + RISCVI_FSGNJX_D = 0x22002053, + + RISCVI_FMIN_D = 0x2a000053, + RISCVI_FMAX_D = 0x2a001053, + + RISCVI_FCVT_S_D = 0x40100053, + RISCVI_FCVT_D_S = 0x42000053, + + RISCVI_FEQ_D = 0xa2002053, + RISCVI_FLT_D = 0xa2001053, + RISCVI_FLE_D = 0xa2000053, + + RISCVI_FCLASS_D = 0xe2001053, + + RISCVI_FCVT_W_D = 0xc2000053, + RISCVI_FCVT_WU_D = 0xc2100053, + RISCVI_FCVT_D_W = 0xd2000053, + RISCVI_FCVT_D_WU = 0xd2100053, + + RISCVI_FMV_D = 0x22000053, + RISCVI_FNEG_D = 0x22001053, + RISCVI_FABS_D = 0x22002053, +#if LJ_TARGET_RISCV64 + RISCVI_FCVT_L_D = 0xc2200053, + RISCVI_FCVT_LU_D = 0xc2300053, + RISCVI_FMV_X_D = 0xe2000053, + RISCVI_FCVT_D_L = 0xd2200053, + RISCVI_FCVT_D_LU = 0xd2300053, + RISCVI_FMV_D_X = 0xf2000053, +#endif + + /* --- Zifencei --- */ + RISCVI_FENCE = 0x0000000f, + RISCVI_FENCE_I = 0x0000100f, + + /* --- Zicsr --- */ + RISCVI_CSRRW = 0x00001073, + RISCVI_CSRRS = 0x00002073, + RISCVI_CSRRC = 0x00003073, + RISCVI_CSRRWI = 0x00005073, + RISCVI_CSRRSI = 0x00006073, + RISCVI_CSRRCI = 0x00007073, + + /* --- RVB --- */ + /* Zba */ + RISCVI_SH1ADD = 0x20002033, + RISCVI_SH2ADD = 0x20004033, + RISCVI_SH3ADD = 0x20006033, +#if LJ_TARGET_RISCV64 + RISCVI_ADD_UW = 0x0800003b, + + RISCVI_SH1ADD_UW = 0x2000203b, + RISCVI_SH2ADD_UW = 0x2000403b, + RISCVI_SH3ADD_UW = 0x2000603b, + + RISCVI_SLLI_UW = 0x0800101b, + + RISCVI_ZEXT_W = 0x0800003b, +#endif + /* Zbb */ + RISCVI_ANDN = 0x40007033, + RISCVI_ORN = 0x40006033, + RISCVI_XNOR = 0x40004033, + + RISCVI_CLZ = 0x60001013, + RISCVI_CTZ = 0x60101013, + + RISCVI_CPOP = 0x60201013, + + RISCVI_MAX = 0x0a006033, + RISCVI_MAXU = 0x0a007033, + RISCVI_MIN = 0x0a004033, + RISCVI_MINU = 0x0a005033, + + RISCVI_SEXT_B = 0x60401013, + RISCVI_SEXT_H = 0x60501013, +#if LJ_TARGET_RISCV64 + RISCVI_ZEXT_H = 0x0800403b, +#endif + + RISCVI_ROL = 0x60001033, + RISCVI_ROR = 0x60005033, + RISCVI_RORI = 0x60005013, + + RISCVI_ORC_B = 0x28705013, + +#if LJ_TARGET_RISCV64 + RISCVI_REV8 = 0x6b805013, + + RISCVI_CLZW = 0x6000101b, + RISCVI_CTZW = 0x6010101b, + + RISCVI_CPOPW = 0x6020101b, + + RISCVI_ROLW = 0x6000103b, + RISCVI_RORIW = 0x6000501b, + RISCVI_RORW = 0x6000503b, +#endif + /* NYI: Zbc, Zbs */ + + /* --- Zicond --- */ + RISCVI_CZERO_EQZ = 0x0e005033, + RISCVI_CZERO_NEZ = 0x0e007033, + + /* --- Zfa --- */ + RISCVI_FLI_S = 0xf0100053, + RISCVI_FMINM_S = 0x28002053, + RISCVI_FMAXM_S = 0x28003053, + RISCVI_FROUND_S = 0x40400053, + RISCVI_FROUNDNX_S = 0x40500053, + RISCVI_FCVTMOD_W_D = 0xc2801053, + RISCVI_FLEQ_S = 0xa0004053, + RISCVI_FLTQ_S = 0xa0005053, + RISCVI_FLI_D = 0xf2100053, + RISCVI_FMINM_D = 0x2a002053, + RISCVI_FMAXM_D = 0x2a003053, + RISCVI_FROUND_D = 0x42400053, + RISCVI_FROUNDNX_D = 0x42500053, + RISCVI_FLEQ_D = 0xa2004053, + RISCVI_FLTQ_D = 0xa2005053, + + RISCVI_FROUND_S_RTZ = 0x40401053, + RISCVI_FROUND_S_RDN = 0x40402053, + RISCVI_FROUND_S_RUP = 0x40403053, + RISCVI_FROUNDNX_S_RTZ = 0x40501053, + RISCVI_FROUNDNX_S_RDN = 0x40502053, + RISCVI_FROUNDNX_S_RUP = 0x40503053, + RISCVI_FROUND_D_RTZ = 0x42401053, + RISCVI_FROUND_D_RDN = 0x42402053, + RISCVI_FROUND_D_RUP = 0x42403053, + RISCVI_FROUNDNX_D_RTZ = 0x42501053, + RISCVI_FROUNDNX_D_RDN = 0x42502053, + RISCVI_FROUNDNX_D_RUP = 0x42503053, + + /* TBD: RVV?, RVP?, RVJ? */ + + /* --- XThead* --- */ + /* XTHeadBa */ + RISCVI_TH_ADDSL = 0x0000100b, + + /* XTHeadBb */ + RISCVI_TH_SRRI = 0x1000100b, +#if LJ_TARGET_RISCV64 + RISCVI_TH_SRRIW = 0x1400100b, +#endif + RISCVI_TH_EXT = 0x0000200b, + RISCVI_TH_EXTU = 0x0000300b, + RISCVI_TH_FF0 = 0x8400100b, + RISCVI_TH_FF1 = 0x8600100b, + RISCVI_TH_REV = 0x8200100b, +#if LJ_TARGET_RISCV64 + RISCVI_TH_REVW = 0x9000100b, +#endif + RISCVI_TH_TSTNBZ = 0x8000100b, + + /* XTHeadBs */ + RISCVI_TH_TST = 0x8800100b, + + /* XTHeadCondMov */ + RISCVI_TH_MVEQZ = 0x4000100b, + RISCVI_TH_MVNEZ = 0x4200100b, + + /* XTHeadMac */ + RISCVI_TH_MULA = 0x2000100b, + RISCVI_TH_MULAH = 0x2800100b, +#if LJ_TARGET_RISCV64 + RISCVI_TH_MULAW = 0x2400100b, +#endif + RISCVI_TH_MULS = 0x2200100b, + RISCVI_TH_MULSH = 0x2a00100b, + RISCVI_TH_MULSW = 0x2600100b, + + /* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */ +} RISCVIns; + +typedef enum RISCVRM { + RISCVRM_RNE = 0, + RISCVRM_RTZ = 1, + RISCVRM_RDN = 2, + RISCVRM_RUP = 3, + RISCVRM_RMM = 4, + RISCVRM_DYN = 7, +} RISCVRM; + +static const uint16_t riscv_fli_map_hi16[32] = { + 0xbff0u, // -1 + 0x0010u, // min + 0x3ef0u, // 2^-16 + 0x3f00u, // 2^-15 + 0x3f70u, // 2^-8 + 0x3f80u, // 2^-7 + 0x3fb0u, // 2^-4 + 0x3fc0u, // 2^-3, 0.125 + 0x3fd0u, // 2^-2, 0.25 + 0x3fd4u, // 0.3125 + 0x3fd8u, // 0.375 + 0x3fdcu, // 0.4375 + 0x3fe0u, // 0.5 + 0x3fe4u, // 0.625 + 0x3fe8u, // 0.75 + 0x3fecu, // 0.875 + 0x3ff0u, // 1 + 0x3ff4u, // 1.25 + 0x3ff8u, // 1.5 + 0x3ffcu, // 1.75 + 0x4000u, // 2 + 0x4004u, // 2.5 + 0x4008u, // 3 + 0x4010u, // 4 + 0x4020u, // 8 + 0x4030u, // 16 + 0x4060u, // 128 + 0x4070u, // 256 + 0x40e0u, // 2^15, 32768 + 0x40f0u, // 2^16, 65536 + 0x7ff0u, // inf + 0x7ff8u, // canonical nan +}; + +#endif diff --git a/src/lj_vm.h b/src/lj_vm.h index 63d094396..4b1cbf1d9 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); #if LJ_TARGET_PPC void lj_vm_cachesync(void *start, void *end); #endif +#if LJ_TARGET_RISCV64 +void lj_vm_fence_rw_rw(); +#endif LJ_ASMF double lj_vm_foldarith(double x, double y, int op); #if LJ_HASJIT LJ_ASMF double lj_vm_foldfpm(double x, int op); diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 3351e72b4..1ee32d018 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double y, int op) /* -- Helper functions for generated machine code ------------------------- */ -#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS +#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ + || LJ_TARGET_RISCV64 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc new file mode 100644 index 000000000..67f8f2c0a --- /dev/null +++ b/src/vm_riscv64.dasc @@ -0,0 +1,4813 @@ +|// Low-level VM code for RISC-V 64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +|// +|// Contributed by gns from PLCT Lab, ISCAS. +| +|.arch riscv64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|// Note: The ragged indentation of the instructions is intentional. +|// The starting columns indicate data dependencies. +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter. +|// Don't use: x0 = 0, x1 = ra, x2 = sp, x3 = gp, x4 = tp +| +| +|// The following must be C callee-save (but BASE is often refetched). +|.define BASE, x18 // Base of current Lua stack frame. +|.define KBASE, x19 // Constants of current Lua function. +|.define PC, x20 // Next PC. +|.define GLREG, x21 // Global state. +|.define DISPATCH, x22 // Opcode dispatch table. +|.define LREG, x23 // Register holding lua_State (also in SAVE_L). +|.define MULTRES, x24 // Size of multi-result: (nresults+1)*8. +| +|// Constants for type-comparisons, stores and conversions. C callee-save. +|.define TISNIL, x8 +|.define TISNUM, x25 +|.define TOBIT, f27 // 2^52 + 2^51. +| +|// The following temporaries are not saved across C calls, except for RA. +|.define RA, x9 // Callee-save. +|.define RB, x14 +|.define RC, x15 +|.define RD, x16 +|.define INS, x17 +| +|.define TMP0, x6 +|.define TMP1, x7 +|.define TMP2, x28 +|.define TMP3, x29 +|.define TMP4, x30 +| +|// RISC-V lp64d calling convention. +|.define CFUNCADDR, x5 +|.define CARG1, x10 +|.define CARG2, x11 +|.define CARG3, x12 +|.define CARG4, x13 +|.define CARG5, x14 +|.define CARG6, x15 +|.define CARG7, x16 +|.define CARG8, x17 +| +|.define CRET1, x10 +|.define CRET2, x11 +| +|.define FARG1, f10 +|.define FARG2, f11 +|.define FARG3, f12 +|.define FARG4, f13 +|.define FARG5, f14 +|.define FARG6, f15 +|.define FARG7, f16 +|.define FARG8, f17 +| +|.define FRET1, f10 +|.define FRET2, f11 +| +|.define FTMP0, f0 +|.define FTMP1, f1 +|.define FTMP2, f2 +|.define FTMP3, f3 +|.define FTMP4, f4 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|// RISC-V 64 lp64d. +| +|.define CFRAME_SPACE, 256 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 252 // 32 bit values. +|.define SAVE_NRES, 248 +|.define SAVE_CFRAME, 240 // 64 bit values. +|.define SAVE_L, 232 +|.define SAVE_PC, 224 +|//----- 16 byte aligned +|// Padding 216 +|.define SAVE_GPR_, 112 // .. 112+13*8: 64 bit GPR saves. +|.define SAVE_FPR_, 16 // .. 16+12*8: 64 bit FPR saves. +| +| +|.define TMPD, 0 +|//----- 16 byte aligned +| +|.define TMPD_OFS, 0 +| +|//----------------------------------------------------------------------- +| +|.macro saveregs +| addi sp, sp, -CFRAME_SPACE +| fsd f27, SAVE_FPR_+11*8(sp) +| fsd f26, SAVE_FPR_+10*8(sp) +| fsd f25, SAVE_FPR_+9*8(sp) +| fsd f24, SAVE_FPR_+8*8(sp) +| fsd f23, SAVE_FPR_+7*8(sp) +| fsd f22, SAVE_FPR_+6*8(sp) +| fsd f21, SAVE_FPR_+5*8(sp) +| fsd f20, SAVE_FPR_+4*8(sp) +| fsd f19, SAVE_FPR_+3*8(sp) +| fsd f18, SAVE_FPR_+2*8(sp) +| fsd f9, SAVE_FPR_+1*8(sp) +| fsd f8, SAVE_FPR_+0*8(sp) +| sd ra, SAVE_GPR_+12*8(sp) +| sd x27, SAVE_GPR_+11*8(sp) +| sd x26, SAVE_GPR_+10*8(sp) +| sd x25, SAVE_GPR_+9*8(sp) +| sd x24, SAVE_GPR_+8*8(sp) +| sd x23, SAVE_GPR_+7*8(sp) +| sd x22, SAVE_GPR_+6*8(sp) +| sd x21, SAVE_GPR_+5*8(sp) +| sd x20, SAVE_GPR_+4*8(sp) +| sd x19, SAVE_GPR_+3*8(sp) +| sd x18, SAVE_GPR_+2*8(sp) +| sd x9, SAVE_GPR_+1*8(sp) +| sd x8, SAVE_GPR_+0*8(sp) +|.endmacro +| +|.macro restoreregs_ret +| ld ra, SAVE_GPR_+12*8(sp) +| ld x27, SAVE_GPR_+11*8(sp) +| ld x26, SAVE_GPR_+10*8(sp) +| ld x25, SAVE_GPR_+9*8(sp) +| ld x24, SAVE_GPR_+8*8(sp) +| ld x23, SAVE_GPR_+7*8(sp) +| ld x22, SAVE_GPR_+6*8(sp) +| ld x21, SAVE_GPR_+5*8(sp) +| ld x20, SAVE_GPR_+4*8(sp) +| ld x19, SAVE_GPR_+3*8(sp) +| ld x18, SAVE_GPR_+2*8(sp) +| ld x9, SAVE_GPR_+1*8(sp) +| ld x8, SAVE_GPR_+0*8(sp) +| fld f27, SAVE_FPR_+11*8(sp) +| fld f26, SAVE_FPR_+10*8(sp) +| fld f25, SAVE_FPR_+9*8(sp) +| fld f24, SAVE_FPR_+8*8(sp) +| fld f23, SAVE_FPR_+7*8(sp) +| fld f22, SAVE_FPR_+6*8(sp) +| fld f21, SAVE_FPR_+5*8(sp) +| fld f20, SAVE_FPR_+4*8(sp) +| fld f19, SAVE_FPR_+3*8(sp) +| fld f18, SAVE_FPR_+2*8(sp) +| fld f9, SAVE_FPR_+1*8(sp) +| fld f8, SAVE_FPR_+0*8(sp) +| addi sp, sp, CFRAME_SPACE +| ret +|.endmacro +| +|//----------------------------------------------------------------------- +| +|// Pseudo-instruction macros +|// Be cautious with local label 9 since we use them here! +|.macro bxeq, a, b, tgt +| bne a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxne, a, b, tgt +| beq a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxlt, a, b, tgt +| bge a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxge, a, b, tgt +| blt a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxgt, a, b, tgt +| bge b, a, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxle, a, b, tgt +| blt b, a, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxltu, a, b, tgt +| bgeu a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxgeu, a, b, tgt +| bltu a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxgtu, a, b, tgt +| bgeu b, a, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxleu, a, b, tgt +| bltu b, a, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxeqz, a, tgt +| bxeq a, x0, tgt +|.endmacro +| +|.macro bxnez, a, tgt +| bxne a, x0, tgt +|.endmacro +| +|.macro bxlez, a, tgt +| bxge x0, a, tgt +|.endmacro +| +|.macro bxgez, a, tgt +| bxge a, x0, tgt +|.endmacro +| +|.macro bxltz, a, tgt +| bxlt a, x0, tgt +|.endmacro +| +|.macro bxgtz, a, tgt +| bxlt x0, a, tgt +|.endmacro +| +|.macro lxi, a, b +| lui a, (b)&0xfffff +| srai a, a, 12 +|.endmacro +| +|.macro lzi, a, b +| lui a, (b)&0xfffff +| srli a, a, 12 +|.endmacro +| +|.macro addxi, a, b, c +| lui x31, (c)&0xfffff +| srai x31, x31, 12 +| add a, x31, b +|.endmacro +| +|.macro sext.b, a, b +| slli a, b, 56 +| srai a, a, 56 +|.endmacro +| +|.macro sext.h, a, b +| slli a, b, 48 +| srai a, a, 48 +|.endmacro +| +|.macro zext.h, a, b +| slli a, b, 48 +| srli a, a, 48 +|.endmacro +| +|.macro zext.w, a, b +| slli a, b, 32 +| srli a, a, 32 +|.endmacro +| +|.macro bfextri, a, b, c, d +| slli a, b, (63-c) +| srli a, a, (d+63-c) +|.endmacro +| +|//----------------------------------------------------------------------- +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State, GLREG +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; .long 0x00100073; .endmacro +| +|//----------------------------------------------------------------------- +| +|// Access to frame relative to BASE. +|.define FRAME_PC, -8 +|.define FRAME_FUNC, -16 +| +|//----------------------------------------------------------------------- +| +|// Endian-specific defines. RISC-V only has little endian ABI for now. +|.define OFS_RD, 2 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +| +|// Instruction decode. +|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro +|.macro decode_BC4b, dst; slliw dst, dst, 2; .endmacro +|.macro decode_BC8b, dst; slliw dst, dst, 3; .endmacro +|.macro decode_RX8b, dst; andi dst, dst, 0x7f8; .endmacro +| +|.macro decode_OP8a, dst, ins; decode_OP1 dst, ins; .endmacro +|.macro decode_OP8b, dst; decode_BC8b dst; .endmacro +|.macro decode_RA8a, dst, ins; srliw dst, ins, 5; .endmacro +|.macro decode_RA8b, dst; decode_RX8b dst; .endmacro +|.macro decode_RB8a, dst, ins; srliw dst, ins, 21; .endmacro +|.macro decode_RB8b, dst; decode_RX8b dst; .endmacro +|.macro decode_RC8a, dst, ins; srliw dst, ins, 13; .endmacro +|.macro decode_RC8b, dst; decode_RX8b dst; .endmacro +|.macro decode_RD8a, dst, ins; srliw dst, ins, 16; .endmacro +|.macro decode_RD4b, dst; decode_BC4b dst; .endmacro +|.macro decode_RD8b, dst; decode_BC8b dst; .endmacro +|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro +| +|.macro decode_OP8, dst, ins; decode_OP1 dst, ins; decode_BC8b dst; .endmacro +|.macro decode_RA8, dst, ins; decode_RA8a dst, ins; decode_RA8b dst; .endmacro +|.macro decode_RB8, dst, ins; decode_RB8a dst, ins; decode_RB8b dst; .endmacro +|.macro decode_RC8, dst, ins; decode_RC8a dst, ins; decode_RC8b dst; .endmacro +|.macro decode_RD8, dst, ins; decode_RD8a dst, ins; decode_RD8b dst; .endmacro +| +|// Instruction fetch. +|.macro ins_NEXT1 +| lw INS, 0(PC) +| addi PC, PC, 4 +|.endmacro +|// Instruction decode+dispatch. +|.macro ins_NEXT2 +| decode_OP8 TMP1, INS +| add TMP0, DISPATCH, TMP1 +| decode_RD8a RD, INS +| ld TMP4, 0(TMP0) +| decode_RA8a RA, INS +| decode_RD8b RD +| decode_RA8b RA +| jr TMP4 +|.endmacro +|.macro ins_NEXT +| ins_NEXT1 +| ins_NEXT2 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +| .define ins_next1, ins_NEXT1 +| .define ins_next2, ins_NEXT2 +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| j ->ins_next +| .endmacro +| .macro ins_next1 +| .endmacro +| .macro ins_next2 +| j ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ld PC, LFUNC:RB->pc +| lw INS, 0(PC) +| addi PC, PC, 4 +| decode_OP8 TMP1, INS +| decode_RA8 RA, INS +| add TMP0, DISPATCH, TMP1 +| ld TMP0, 0(TMP0) +| add RA, RA, BASE +| jr TMP0 +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| sd PC, FRAME_PC(BASE) +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|.macro branch_RD +| srliw TMP0, RD, 1 +| lui TMP4, (-(BCBIAS_J*4 >> 12)) & 0xfffff +| addw TMP0, TMP0, TMP4 +| add PC, PC, TMP0 +|.endmacro +| +|// Assumes J is relative to GL. Some J members might be out of range though. +#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro call_intern, curfunc, func +|->curfunc .. _pcrel_ .. func: +| auipc CFUNCADDR, extern %pcrel_hi(func) +| jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func) +|.endmacro +|.macro call_extern, curfunc, func +|->curfunc .. _got_pcrel_ .. func: +| auipc CFUNCADDR, extern %got_pcrel_hi(func) +| ld CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _got_pcrel_ .. func)(CFUNCADDR) +| jalr CFUNCADDR +|.endmacro +| +|// Set current VM state. Uses TMP0. +|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro +|.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro +| +|.macro hotcheck, delta, target +| srli TMP1, PC, 1 +| andi TMP1, TMP1, 126 +| add TMP1, TMP1, DISPATCH +| lhu TMP2, GG_DISP2HOT(TMP1) +| addiw TMP2, TMP2, -delta +| sh TMP2, GG_DISP2HOT(TMP1) +| bxltz TMP2, target +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP, ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL, ->vm_hotcall +|.endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp, target +| ld tmp, GL->gc.grayagain +| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) +| sd tab, GL->gc.grayagain +| sb mark, tab->marked +| sd tmp, tab->gclist +| j target +|.endmacro +| +|// Clear type tag. Isolate lowest 64-17=47 bits of reg. +|.macro cleartp, reg; slli reg, reg, 17; srli reg, reg, 17; .endmacro +|.macro cleartp, dst, reg; slli dst, reg, 17; srli dst, dst, 17; .endmacro +| +|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. +|.macro settp_a, dst; cleartp dst; .endmacro +|.macro settp_a, dst, src; cleartp dst, src; .endmacro +|.macro settp_b, dst, tp; +| slli x31, tp, 47 +| or dst, dst, x31 +|.endmacro +|.macro settp_b, dst, src, tp; +| slli x31, tp, 47 +| or dst, src, x31 +|.endmacro +|.macro settp, dst, tp; settp_a dst; settp_b dst, tp; .endmacro +|.macro settp, dst, src, tp; settp_a dst, src; settp_b dst, dst, tp; .endmacro +| +|// Extract (negative) type tag. +|.macro gettp, dst, src; srai dst, src, 47; .endmacro +| +|// Macros to check the TValue type and extract the GCobj. Branch on failure. +|.macro checktp, reg, tp, target +| gettp TMP4, reg +| addi TMP4, TMP4, tp +| cleartp reg +| bxnez TMP4, target +|.endmacro +|.macro checktp, dst, reg, tp, target +| gettp TMP4, reg +| addi TMP4, TMP4, tp +| cleartp dst, reg +| bxnez TMP4, target +|.endmacro +|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro +|.macro checkint, reg, target +| gettp TMP4, reg +| bxne TMP4, TISNUM, target +|.endmacro +|.macro checknum, reg, target +| gettp TMP4, reg +| sltiu TMP4, TMP4, LJ_TISNUM +| bxeqz TMP4, target +|.endmacro +| +|.macro mov_false, reg +| li reg, 0x001 +| slli reg, reg, 47 +| not reg, reg +|.endmacro +|.macro mov_true, reg +| li reg, 0x001 +| slli reg, reg, 48 +| not reg, reg +|.endmacro +| +|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: TMP2 = previous base. + | andi TMP0, PC, FRAME_P + | + | // Return from pcall or xpcall fast func. + | mov_true TMP1 + | beqz TMP0, ->cont_dispatch + | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | mv BASE, TMP2 // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. + | sd TMP1, -8(RA) // Prepend true to results. + | addi RA, RA, -8 + | + |->vm_returnc: + | addiw RD, RD, 8 // RD = (nresults+1)*8. + | andi TMP0, PC, FRAME_TYPE + | li CRET1, LUA_YIELD + | beqz RD, ->vm_unwind_c_eh + | mv MULTRES, RD + | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return + | // TMP0 = PC & FRAME_TYPE + | andi TMP2, PC, ~FRAME_TYPEP + | xori TMP0, TMP0, FRAME_C + | sub TMP2, BASE, TMP2 // TMP2 = previous base. + | bnez TMP0, ->vm_returnp + | + | addiw TMP1, RD, -8 + | sd TMP2, L->base + | li_vmstate C + | lw TMP2, SAVE_NRES(sp) + | addi BASE, BASE, -16 + | st_vmstate + | slliw TMP2, TMP2, 3 + | beqz TMP1, >2 + |1: + | addiw TMP1, TMP1, -8 + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | sd CRET1, 0(BASE) + | addi BASE, BASE, 8 + | bnez TMP1, <1 + | + |2: + | bne TMP2, RD, >6 + |3: + | sd BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ld TMP0, SAVE_CFRAME(sp) // Restore previous C frame. + | mv CRET1, x0 // Ok return status for vm_pcall. + | sd TMP0, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | ld TMP1, L->maxstack + | blt TMP2, RD, >7 + | // More results wanted. Check stack size and fill up results with nil. + | bge BASE, TMP1, >9 + | sd TISNIL, 0(BASE) + | addiw RD, RD, 8 + | addi BASE, BASE, 8 + | j <2 + | + |7: // Less results wanted. + | subw TMP0, RD, TMP2 + | sub TMP0, BASE, TMP0 // Either keep top or shrink it. + | beqz TMP2, >8 + | mv BASE, TMP0 // LUA_MULTRET+1 case + |8: + | j <3 + | + |9: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | + | sd BASE, L->top // Save current top held in BASE (yes). + | mv MULTRES, RD + | srliw CARG2, TMP2, 3 + | mv CARG1, L + | call_intern vm_leave_unw, lj_state_growstack // (lua_State *L, int n) + | lw TMP2, SAVE_NRES(sp) + | ld BASE, L->top // Need the (realloced) L->top in BASE. + | mv RD, MULTRES + | slliw TMP2, TMP2, 3 + | j <2 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mv sp, CARG1 + | mv CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ld L, SAVE_L(sp) + | li TMP0, ~LJ_VMST_C + | ld GL, L->glref + | sw TMP0, GL->vmstate + | j ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | andi sp, CARG1, CFRAME_RAWMASK + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ld L, SAVE_L(sp) + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | ld BASE, L->base + | ld GL, L->glref // Setup pointer to global state. + | slli TMP3, TMP3, 32 + | mov_false TMP1 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | fmv.d.x TOBIT, TMP3 + | addi RA, BASE, -8 // Results start at BASE-8. + | addxi DISPATCH, GL, GG_G2DISP + | sd TMP1, -8(BASE) // Prepend false to error message. + | st_vmstate + | li RD, 16 // 2 results: false + error message. + | j ->vm_returnc + | + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | li CARG2, LUA_MINSTACK + | j >2 + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | add RC, BASE, RC + | sub RA, RA, BASE + | sd BASE, L->base + | addi PC, PC, 4 // Must point after first instruction. + | sd RC, L->top + | srliw CARG2, RA, 3 + |2: + | // L->base = new base, L->top = top + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | call_intern vm_growstack_l, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | ld RC, L->top + | ld LFUNC:RB, FRAME_FUNC(BASE) + | sub RC, RC, BASE + | cleartp LFUNC:RB + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mv L, CARG1 + | ld GL, L->glref // Setup pointer to global state. + | mv BASE, CARG2 + | lbu TMP1, L->status + | sd L, SAVE_L(sp) + | li PC, FRAME_CP + | addi TMP0, sp, CFRAME_RESUME + | addxi DISPATCH, GL, GG_G2DISP + | sw x0, SAVE_NRES(sp) + | sw x0, SAVE_ERRF(sp) + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sd x0, SAVE_CFRAME(sp) + | sd TMP0, L->cframe + | beqz TMP1, >3 + | + | // Resume after yield (like a return). + | sd L, GL->cur_L + | mv RA, BASE + | ld BASE, L->base + | ld TMP1, L->top + | ld PC, FRAME_PC(BASE) + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | sub RD, TMP1, BASE + | slli TMP3, TMP3, 32 + | sb x0, L->status + | fmv.d.x TOBIT, TMP3 + | li_vmstate INTERP + | addi RD, RD, 8 + | st_vmstate + | mv MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | beqz TMP0, ->BC_RET_Z + | j ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | sw CARG4, SAVE_ERRF(sp) + | li PC, FRAME_CP + | j >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | li PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ld TMP1, L:CARG1->cframe + | mv L, CARG1 + | sw CARG3, SAVE_NRES(sp) + | ld GL, L->glref // Setup pointer to global state. + | sd CARG1, SAVE_L(sp) + | mv BASE, CARG2 + | addxi DISPATCH, GL, GG_G2DISP + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sd TMP1, SAVE_CFRAME(sp) + | sd sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | sd L, GL->cur_L + | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | ld TMP1, L->top + | slli TMP3, TMP3, 32 + | add PC, PC, BASE + | sub NARGS8:RC, TMP1, BASE + | li TISNUM, LJ_TISNUM + | sub PC, PC, TMP2 // PC = frame delta + frame type + | fmv.d.x TOBIT, TMP3 + | li_vmstate INTERP + | li TISNIL, LJ_TNIL + | st_vmstate + | + |->vm_call_dispatch: + | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ld LFUNC:RB, FRAME_FUNC(BASE) + | checkfunc LFUNC:RB, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | mv L, CARG1 + | ld TMP0, L:CARG1->stack + | sd CARG1, SAVE_L(sp) + | ld TMP1, L->top + | ld GL, L->glref // Setup pointer to global state. + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). + | ld TMP1, L->cframe + | addxi DISPATCH, GL, GG_G2DISP + | sw TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame. + | sw x0, SAVE_ERRF(sp) // No error function. + | sd TMP1, SAVE_CFRAME(sp) + | sd sp, L->cframe // Add our C frame to cframe chain. + | sd L, GL->cur_L + | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | mv BASE, CRET1 + | li PC, FRAME_CP + | bnez CRET1, <3 // Else continue with the call. + | j ->vm_leave_cp // No base? Just remove C frame. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 + | ld TMP0, -32(BASE) // Continuation. + | mv RB, BASE + | mv BASE, TMP2 // Restore caller BASE. + | ld LFUNC:TMP1, FRAME_FUNC(TMP2) + | ld PC, -24(RB) // Restore PC from [cont|PC]. + |.if FFI + | sltiu TMP3, TMP0, 2 + |.endif + | cleartp LFUNC:TMP1 + | add TMP2, RA, RD + | ld TMP1, LFUNC:TMP1->pc + | sd TISNIL, -8(TMP2) // Ensure one valid arg. + |.if FFI + | bnez TMP3, >1 + |.endif + | // BASE = base, RA = resultptr, RB = meta base + | ld KBASE, PC2PROTO(k)(TMP1) + | jr TMP0 // Jump to continuation. + | + |.if FFI + |1: + | addi TMP1, RB, -32 + | bxnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + | sub RC, TMP1, BASE + | j ->vm_call_tail + |.endif + | + |->cont_cat: // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | addi CARG2, RB, -32 + | ld TMP0, 0(RA) + | decode_RB8 MULTRES, INS + | decode_RA8 RA, INS + | add TMP1, BASE, MULTRES + | sd BASE, L->base + | sub CARG3, CARG2, TMP1 + | sd TMP0, 0(CARG2) + | bxne TMP1, CARG2, ->BC_CAT_Z + | add RA, BASE, RA + | sd TMP0, 0(RA) + | j ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | addi CARG3, GL, offsetof(global_State, tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tgets: + | addi CARG2, GL, offsetof(global_State, tmptv) + | addi CARG3, GL, offsetof(global_State, tmptv2) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | settp STR:RC, TMP1 + | sd TAB:RB, 0(CARG2) + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tgetb: // TMP0 = index + | addi CARG3, GL, offsetof(global_State, tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tgetv: + |1: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, TValue *k) + | call_intern vmeta_tgetv, lj_meta_tget + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + | ld TMP0, 0(CRET1) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | addi TMP1, BASE, -FRAME_CONT + | li NARGS8:RC, 16 // 2 args for func(t, k). + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | sub PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | call_intern vmeta_tgetr, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | mv TMP1, TISNIL + | bxeqz CRET1, ->BC_TGETR_Z + | ld TMP1, 0(CRET1) + | j ->BC_TGETR_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | addi, CARG3, GL, offsetof(global_State, tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tsets: + | addi CARG2, GL, offsetof(global_State, tmptv) + | addi CARG3, GL, offsetof(global_State, tmptv2) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | settp STR:RC, TMP1 + | sd TAB:RB, 0(CARG2) + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tsetb: // TMP0 = index + | addi CARG3, GL, offsetof(global_State, tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tsetv: + |1: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, TValue *k) + | call_intern vmeta_tsetv, lj_meta_tset + | // Returns TValue * (finished) or NULL (metamethod). + | ld TMP2, 0(RA) + | beqz CRET1, >3 + | ins_next1 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | sd TMP2, 0(CRET1) + | ins_next2 + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | addi TMP1, BASE, -FRAME_CONT + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | sub PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | li NARGS8:RC, 24 // 3 args for func(t, k, v) + | cleartp LFUNC:RB + | sd TMP2, 16(BASE) // Copy value to third argument. + | j ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, GCtab *t, int32_t key) + | call_intern vmeta_tsetr, lj_tab_setinth + | // Returns TValue *. + | j ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | // RA/RD point to o1/o2. + | mv CARG2, RA + | mv CARG3, RD + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | decode_OP1 CARG4, INS + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o1, *o2, int op) + | call_intern vmeta_comp, lj_meta_comp + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu TMP1, CRET1, 2 + | beqz TMP1, ->vmeta_binop + | negw TMP2, CRET1 + |4: + | lhu RD, OFS_RD(PC) + | addi PC, PC, 4 + | lui TMP1, (-(BCBIAS_J*4 >> 12)) & 0xfffff + | slliw RD, RD, 2 + | addw RD, RD, TMP1 + | and RD, RD, TMP2 + | add PC, PC, RD + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | lbu TMP1, -4+OFS_RA(PC) + | ld TMP2, 0(RA) + | slliw TMP1, TMP1, 3 + | add TMP1, BASE, TMP1 + | sd TMP2, 0(TMP1) + | j ->cont_nop + | + |->cont_condt: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu TMP1, TMP0, LJ_TISTRUECOND + | negw TMP2, TMP1 // Branch if result is true. + | j <4 + | + |->cont_condf: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu TMP1, TMP0, LJ_TISTRUECOND + | addiw TMP2, TMP1, -1 // Branch if result is false. + | j <4 + | + |->vmeta_equal: + | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. + | cleartp LFUNC:CARG3, CARG2 + | cleartp LFUNC:CARG2, CARG1 + | mv CARG4, TMP0 + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, GCobj *o1, *o2, int ne) + | call_intern vmeta_equal, lj_meta_equal + | // Returns 0/1 or TValue * (metamethod). + | j <3 + | + |->vmeta_equal_cd: + |.if FFI + | addi PC, PC, -4 + | mv CARG1, L + | mv CARG2, INS + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | j <3 + |.endif + | + |->vmeta_istype: + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | srliw CARG2, RA, 3 + | srliw CARG3, RD, 3 + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, BCReg tp) + | call_intern vmeta_istype, lj_meta_istype + | j ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_unm: + | mv RC, RB + | + |->vmeta_arith: + | mv CARG1, L + | sd BASE, L->base + | mv CARG2, RA + | sd PC, SAVE_PC(sp) + | mv CARG3, RB + | mv CARG4, RC + | decode_OP1 CARG5, INS + | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | call_intern vmeta_arith, lj_meta_arith + | // Returns NULL (finished) or TValue * (metamethod). + | beqz CRET1, ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | sub TMP1, CRET1, BASE + | sd PC, -24(CRET1) // [cont|PC] + | mv TMP2, BASE + | addi PC, TMP1, FRAME_CONT + | mv BASE, CRET1 + | li NARGS8:RC, 16 // 2 args for func(o1, o2). + | j ->vm_call_dispatch + | + |->vmeta_len: + | // CARG2 already set by BC_LEN. +#if LJ_52 + | mv MULTRES, CARG1 +#endif + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bnez CRET1, ->vmeta_binop // Binop call for compatibility. + | mv CARG1, MULTRES + | j ->BC_LEN_Z +#else + | j ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // TMP2 = old base, BASE = new base, RC = nargs*8 + | mv CARG1, L + | sd TMP2, L->base // This is the callers base! + | addi CARG2, BASE, -16 + | sd PC, SAVE_PC(sp) + | add CARG3, BASE, RC + | mv MULTRES, NARGS8:RC + | // (lua_State *L, TValue *func, TValue *top) + | call_intern vmeta_call, lj_meta_call + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:RB + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | mv CARG1, L + | sd BASE, L->base + | addi CARG2, RA, -16 + | sd PC, SAVE_PC(sp) + | add CARG3, RA, RC + | mv MULTRES, NARGS8:RC + | // (lua_State *L, TValue *func, TValue *top) + | call_intern vmeta_callt, lj_meta_call + | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ld TMP1, FRAME_PC(BASE) + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:CARG3, RB + | j ->BC_CALLT_Z + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mv CARG1, L + | sd BASE, L->base + | mv CARG2, RA + | sd PC, SAVE_PC(sp) + | mv MULTRES, INS + | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base) + |.if JIT + | decode_OP1 TMP0, MULTRES + | li TMP1, BC_JFORI + |.endif + | decode_RA8 RA, MULTRES + | decode_RD8 RD, MULTRES + |.if JIT + | bxeq TMP0, TMP1, =>BC_JFORI + |.endif + | j =>BC_FORI + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | bxeqz NARGS8:RC, ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | sltiu TMP0, NARGS8:RC, 16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | bxnez TMP0, ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) + | beqz NARGS8:RC, ->fff_fallback + | checknum CARG1, ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | sltiu TMP0, NARGS8:RC, 16 + | ld CARG2, 8(BASE) + | bnez TMP0, ->fff_fallback + | gettp TMP1, CARG1 + | gettp TMP2, CARG2 + | sltiu TMP1, TMP1, LJ_TISNUM + | sltiu TMP2, TMP2, LJ_TISNUM + | fld FARG1, 0(BASE) + | and TMP1, TMP1, TMP2 + | fld FARG2, 8(BASE) + | beqz TMP1, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. + |.macro ffgccheck + | ld TMP0, GL->gc.total + | ld TMP1, GL->gc.threshold + | bltu TMP0, TMP1, >1 + | jal ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + |.ffunc_1 assert + | gettp TMP1, CARG1 + | sltiu TMP1, TMP1, LJ_TISTRUECOND + | addi RA, BASE, -16 + | bxeqz TMP1, ->fff_fallback + | ld PC, FRAME_PC(BASE) + | addiw RD, NARGS8:RC, 8 // Compute (nresults+1)*8. + | addi TMP1, BASE, 8 + | add TMP2, RA, RD + | sd CARG1, -16(BASE) + | bne BASE, TMP2, >1 + | j ->fff_res // Done if exactly 1 argument. + |1: + | ld TMP0, 0(TMP1) + | sd TMP0, -16(TMP1) + | mv TMP3, TMP1 + | addi TMP1, TMP1, 8 + | bne TMP3, TMP2, <1 + | j ->fff_res + | + |.ffunc_1 type + | gettp TMP0, CARG1 + | not TMP3, TMP0 + | bltu TISNUM, TMP0, >1 + | li TMP3, ~LJ_TISNUM + |1: + | slli TMP3, TMP3, 3 + | add TMP3, CFUNC:RB, TMP3 + | ld CARG1, CFUNC:TMP3->upvalue + | j ->fff_restv + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | gettp TMP2, CARG1 + | addi TMP0, TMP2, -LJ_TTAB + | addi TMP1, TMP2, -LJ_TUDATA + | snez TMP0, TMP0 + | neg TMP0, TMP0 + | and TMP0, TMP0, TMP1 + | cleartp TAB:CARG1 + | bnez TMP0, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:RB, TAB:CARG1->metatable + |2: + | ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] + | li CARG1, LJ_TNIL + | beqz TAB:RB, ->fff_restv + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slli TMP0, TMP1, 5 + | slli TMP1, TMP1, 3 + | sub TMP1, TMP0, TMP1 + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | li CARG4, LJ_TSTR + | settp STR:RC, CARG4 // Tagged key to look for. + |3: // Rearranged logic, because we expect _not_ to find the key. + | ld TMP0, NODE:TMP2->key + | ld CARG1, NODE:TMP2->val + | ld NODE:TMP2, NODE:TMP2->next + | li TMP3, LJ_TTAB + | beq RC, TMP0, >5 + | bnez NODE:TMP2, <3 + |4: + | settp CARG1, RB, TMP3 + | j ->fff_restv // Not found, keep default result. + |5: + | bne CARG1, TISNIL, ->fff_restv + | j <4 // Ditto for nil value. + | + |6: + | sltiu TMP3, TMP2, LJ_TISNUM + | neg TMP4, TMP3 + | xor TMP0, TMP2, TISNUM // TMP2 = TMP3 ? TISNUM : TMP2 + | and TMP0, TMP0, TMP4 + | xor TMP2, TMP0, TMP2 + | slli TMP2, TMP2, 3 + | sub TMP0, GL, TMP2 + | ld TAB:RB, (offsetof(global_State, gcroot[GCROOT_BASEMT])-8)(TMP0) + | j <2 + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | gettp TMP3, CARG2 + | ld TAB:TMP0, TAB:TMP1->metatable + | lbu TMP2, TAB:TMP1->marked + | addi TMP3, TMP3, -LJ_TTAB + | cleartp TAB:CARG2 + | or TMP3, TMP3, TAB:TMP0 + | bxnez TMP3, ->fff_fallback + | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table) + | sd TAB:CARG2, TAB:TMP1->metatable + | beqz TMP3, ->fff_restv + | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv + | + |.ffunc rawget + | ld CARG2, 0(BASE) + | sltiu TMP0, NARGS8:RC, 16 + | gettp TMP1, CARG2 + | cleartp CARG2 + | addi TMP1, TMP1, -LJ_TTAB + | or TMP0, TMP0, TMP1 + | addi CARG3, BASE, 8 + | bxnez TMP0, ->fff_fallback + | mv CARG1, L + | call_intern ff_rawget, lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | ld CARG1, 0(CRET1) + | j ->fff_restv + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ld CARG1, 0(BASE) + | xori TMP0, NARGS8:RC, 8 // Exactly one number argument. + | gettp TMP1, CARG1 + | sltu TMP1, TISNUM, TMP1 + | or TMP0, TMP0, TMP1 + | bxnez TMP0, ->fff_fallback // No args or CARG1 is not number + | j ->fff_restv + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | gettp TMP0, CARG1 + | addi TMP1, TMP0, -LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | beqz TMP1, ->fff_restv // String key? + | // Handle numbers inline, unless a number base metatable is present. + | ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM] + | sltu TMP0, TISNUM, TMP0 + | sd BASE, L->base // Add frame since C call can throw. + | or TMP0, TMP0, TMP1 + | bxnez TMP0, ->fff_fallback + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | ffgccheck + | mv CARG1, L + | mv CARG2, BASE + | call_intern ff_tostring, lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | li TMP1, LJ_TSTR + | ld BASE, L->base + | settp CARG1, TMP1 + | j ->fff_restv + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | checktp CARG1, -LJ_TTAB, ->fff_fallback + | add TMP0, BASE, NARGS8:RC + | ld PC, FRAME_PC(BASE) + | sd TISNIL, 0(TMP0) // Set missing 2nd arg to nil. + | addi CARG2, BASE, 8 + | addi CARG3, BASE, -16 + | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | li RD, (2+1)*8 + | bgtz CRET1, ->fff_res // Found key/value. + | mv TMP1, CRET1 + | mv CARG1, TISNIL + | beqz TMP1, ->fff_restv // End of traversal: return nil. + | ld CFUNC:RB, FRAME_FUNC(BASE) + | li RC, 2*8 + | cleartp CFUNC:RB + | j ->fff_fallback // Invalid key. + | + |.ffunc_1 pairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld TMP0, CFUNC:RB->upvalue[0] + | bxnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + | sd TISNIL, 0(BASE) + | sd CARG1, -8(BASE) + | sd TMP0, -16(BASE) + | li RD, (3+1)*8 + | j ->fff_res + | + |.ffunc_2 ipairs_aux + | checktab CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | ld PC, FRAME_PC(BASE) + | sext.w TMP2, CARG2 + | addiw TMP2, TMP2, 1 + | sltu TMP3, TMP2, TMP0 + | zext.w TMP0, TMP2 + | settp_b TMP0, TISNUM + | sd TMP0, -16(BASE) + | beqz TMP3, >2 // Not in array part? + | slli TMP3, TMP2, 3 + | add TMP3, TMP1, TMP3 + | ld TMP1, 0(TMP3) + |1: + | li RD, (0+1)*8 + | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. + | sd TMP1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | lw TMP0, TAB:CARG1->hmask + | li RD, (0+1)*8 + | beqz TMP0, ->fff_res + | mv CARG2, TMP2 + | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | li RD, (0+1)*8 + | beqz CRET1, ->fff_res + | ld TMP1, 0(CRET1) + | j <1 + | + |.ffunc_1 ipairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable +#endif + | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] +#if LJ_52 + | bxnez TAB:TMP2, ->fff_fallback +#endif + | slli TMP1, TISNUM, 47 + | sd CARG1, -8(BASE) + | sd TMP1, 0(BASE) + | sd CFUNC:TMP0, -16(BASE) + | li RD, (3+1)*8 + | j ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | ld TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC + | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:TMP0, NARGS8:RC, -8 + | lbu TMP3, GL->hookmask + | mv TMP2, BASE + | bxltz NARGS8:TMP0, ->fff_fallback + | mv NARGS8:RC, NARGS8:TMP0 + | addi BASE, BASE, 16 + | // Remember active hook before pcall. + | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT + | andi TMP3, TMP3, 1 + | addi PC, TMP3, 16+FRAME_PCALL + | beqz NARGS8:RC, ->vm_call_dispatch + |1: + | add TMP0, BASE, NARGS8:RC + |2: + | ld TMP1, -16(TMP0) + | sd TMP1, -8(TMP0) + | addi TMP0, TMP0, -8 + | bne TMP0, BASE, <2 + | j ->vm_call_dispatch + | + |.ffunc xpcall + | ld TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC + | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:TMP0, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | lbu TMP1, GL->hookmask + | bxltz NARGS8:TMP0, ->fff_fallback + | gettp TMP2, CARG2 + | addi TMP2, TMP2, -LJ_TFUNC + | bxnez TMP2, ->fff_fallback // Traceback must be a function. + | mv TMP2, BASE + | mv NARGS8:RC, NARGS8:TMP0 + | addi BASE, BASE, 24 + | // Remember active hook before pcall. + | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT + | sd CARG2, 0(TMP2) // Swap function and traceback. + | andi TMP3, TMP3, 1 + | sd CARG1, 8(TMP2) + | addi PC, TMP3, 24+FRAME_PCALL + | bnez NARGS8:RC, <1 + | j ->vm_call_dispatch + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ld L:CARG1, CFUNC:RB->upvalue[0].gcr + | cleartp L:CARG1 + |.endif + | lbu TMP0, L:CARG1->status + | ld TMP1, L:CARG1->cframe + | ld CARG2, L:CARG1->top + | ld TMP2, L:CARG1->base + | addiw CARG4, TMP0, -LUA_YIELD + | add CARG3, CARG2, TMP0 + | addi TMP3, CARG2, 8 + | seqz TMP4, CARG4 + | neg TMP4, TMP4 + | xor CARG2, CARG2, TMP3 // CARG2 = TMP4 ? CARG2 : TMP3 + | and CARG2, CARG2, TMP4 + | xor CARG2, TMP3, CARG2 + | bgtz CARG4, ->fff_fallback // st > LUA_YIELD? + | xor TMP2, TMP2, CARG3 + | or CARG4, TMP2, TMP0 + | bnez TMP1, ->fff_fallback // cframe != 0? + | ld TMP0, L:CARG1->maxstack + | ld PC, FRAME_PC(BASE) + | beqz CARG4, ->fff_fallback // base == top && st == 0? + | add TMP2, CARG2, NARGS8:RC + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | bltu TMP0, TMP2, ->fff_fallback // Stack overflow? + |1: + |.if resume + | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. + | addi NARGS8:RC, NARGS8:RC, -8 + | addi TMP2, TMP2, -8 + |.endif + | sd TMP2, L:CARG1->top + | sd BASE, L->top + | add TMP1, BASE, NARGS8:RC + | mv CARG3, CARG2 + |2: // Move args to coroutine. + | ld TMP0, 0(BASE) + | sltu TMP3, BASE, TMP1 + | addi BASE, BASE, 8 + | beqz TMP3, >3 + | sd TMP0, 0(CARG3) + | addi CARG3, CARG3, 8 + | j <2 + |3: + | mv L:RA, L:CARG1 + | jal ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | // Returns thread status. + |4: + | ld TMP2, L:RA->base + | sltiu TMP1, CRET1, LUA_YIELD+1 + | ld TMP3, L:RA->top + | li_vmstate INTERP + | ld BASE, L->base + | sd L, GL->cur_L + | st_vmstate + | sub RD, TMP3, TMP2 + | beqz TMP1, >8 + | ld TMP0, L->maxstack + | add TMP1, BASE, RD + | beqz RD, >6 // No results? + | add TMP3, TMP2, RD + | bltu TMP0, TMP1, >9 // Need to grow stack? + | sd TMP2, L:RA->top // Clear coroutine stack. + | mv TMP1, BASE + |5: // Move results from coroutine. + | ld TMP0, 0(TMP2) + | addi TMP2, TMP2, 8 + | sd TMP0, 0(TMP1) + | addi TMP1, TMP1, 8 + | bltu TMP2, TMP3, <5 + |6: + |.if resume + | mov_true TMP1 + | addi RD, RD, 16 + |7: + | sd TMP1, -8(BASE) // Prepend true/false to results. + | addi RA, BASE, -8 + |.else + | mv RA, BASE + | addi RD, RD, 8 + |.endif + | andi TMP0, PC, FRAME_TYPE + | sd PC, SAVE_PC(sp) + | mv MULTRES, RD + |// bxeqz TMP0, ->BC_RET_Z // Local label 9 in use + | bnez TMP0, >6 + | j ->BC_RET_Z + |6: + | j ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | addi TMP3, TMP3, -8 + | mov_false TMP1 + | li RD, (2+1)*8 + | ld TMP0, 0(TMP3) + | sd TMP3, L:RA->top // Remove error from coroutine stack. + | sd TMP0, 0(BASE) // Copy error message. + | j <7 + |.else + | mv CARG1, L + | mv CARG2, L:RA + | // (lua_State *L, lua_State *co) + | call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err + |.endif + | + |9: // Handle stack expansion on return from yield. + | mv CARG1, L + | srliw CARG2, RD, 3 + | // (lua_State *L, int n) + |.if resume + | call_intern ff_coroutine_resume, lj_state_growstack + |.else + | call_intern ff_coroutine_wrap_aux, lj_state_growstack + |.endif + | mv CRET1, x0 + | j <4 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ld TMP0, L->cframe + | add TMP1, BASE, NARGS8:RC + | li CRET1, LUA_YIELD + | sd BASE, L->base + | andi TMP0, TMP0, CFRAME_RESUME + | sd TMP1, L->top + | beqz TMP0, ->fff_fallback + | sd x0, L->cframe + | sb CRET1, L->status + | j ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.macro math_round, func, rm + |->ff_math_ .. func: + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | beqz NARGS8:RC, ->fff_fallback + | fmv.d.x FARG1, CARG1 + | beq TMP0, TISNUM, ->fff_restv + | srli TMP1, CARG1, 52 // Extract exponent (and sign). + | bgeu TMP0, TISNUM, ->fff_fallback + | andi TMP1, TMP1, 0x7ff // Extract exponent. + | slti TMP2, TMP1, 1023 + 52 + 1 // 1023: Bias, 52: Max fraction + | beqz TMP2, ->fff_resn // Less than 2^52 / Not NaN? + | fcvt.l.d TMP3, FARG1, rm + | fcvt.d.l FTMP1, TMP3 + | fsgnj.d FRET1, FTMP1, FARG1 + | j ->fff_resn + |.endmacro + | + | math_round floor, rdn + | math_round ceil, rup + | + |.ffunc_1 math_abs + | gettp CARG2, CARG1 + | addi TMP2, CARG2, -LJ_TISNUM + | sext.w TMP1, CARG1 + | bnez TMP2, >1 + | sraiw TMP0, TMP1, 31 // Extract sign. int + | xor TMP1, TMP1, TMP0 + | sub CARG1, TMP1, TMP0 + | slli TMP3, CARG1, 32 + | settp CARG1, TISNUM + | bgez TMP3, ->fff_restv + | lui CARG1, 0x41e00 // 2^31 as a double. + | slli CARG1, CARG1, 32 + | j ->fff_restv + |1: + | sltiu TMP2, CARG2, LJ_TISNUM + | slli CARG1, CARG1, 1 + | srli CARG1, CARG1, 1 + | beqz TMP2, ->fff_fallback // int + |// fallthrough + | + |->fff_restv: + | // CARG1 = TValue result. + | ld PC, FRAME_PC(BASE) + | sd CARG1, -16(BASE) + |->fff_res1: + | // RA = results, PC = return. + | li RD, (1+1)*8 + |->fff_res: + | // RA = results, RD = (nresults+1)*8, PC = return. + | andi TMP0, PC, FRAME_TYPE + | mv MULTRES, RD + | addi RA, BASE, -16 + | bxnez TMP0, ->vm_return + | lw INS, -4(PC) + | decode_RB8 RB, INS + |5: + | bltu RD, RB, >6 // More results expected? + | decode_RA8a TMP0, INS + | ins_next1 + | decode_RA8b TMP0 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | sub BASE, RA, TMP0 + | ins_next2 + | + |6: // Fill up results with nil. + | add TMP1, RA, RD + | addi RD, RD, 8 + | sd TISNIL, -8(TMP1) + | j <5 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | call_extern ff_math_extern, func + | j ->fff_resn + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | call_extern ff_math_extern2, func + | j ->fff_resn + |.endmacro + | + |.ffunc_n math_sqrt + | fsqrt.d FRET1, FARG1 + |->fff_resn: + | ld PC, FRAME_PC(BASE) + | fsd FRET1, -16(BASE) + | j ->fff_res1 + | + |.ffunc math_log + | li TMP1, 8 + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) + | bne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. + | checknum CARG1, ->fff_fallback + | call_extern ff_math_log, log + | j ->fff_resn + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | fld FARG1, 0(BASE) + | lw CARG1, 8(BASE) + | call_extern ff_math_ldexp, ldexp // (double x, int exp) + | j ->fff_resn + | + |.ffunc_n math_frexp + | ld PC, FRAME_PC(BASE) + | addi CARG1, GL, offsetof(global_State, tmptv) + | call_extern ff_math_frexp, frexp + | lw TMP1, GL->tmptv + | fcvt.d.w FARG2, TMP1 + | fsd FRET1, -16(BASE) + | fsd FARG2, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + | + |.ffunc_n math_modf + | addi CARG1, BASE, -16 + | ld PC, FRAME_PC(BASE) + | call_extern ff_math_modf, modf + | fsd FRET1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + | + |.macro math_minmax, name, ismax + | .ffunc_1 name + | add RB, BASE, NARGS8:RC + | addi RA, BASE, 8 + | checkint CARG1, >4 + |1: // Handle integers. + | ld CARG2, 0(RA) + | beq RA, RB, ->fff_restv + | sext.w CARG1, CARG1 + | checkint CARG2, >3 + | sext.w CARG2, CARG2 + | slt TMP0, CARG1, CARG2 + |.if ismax + | addi TMP1, TMP0, -1 + |.else + | neg TMP1, TMP0 + |.endif + | xor TMP2, CARG1, CARG2 // CARG1 = TMP1 ? CARG1 : CARG2 + | and TMP2, TMP2, TMP1 + | xor CARG1, CARG2, TMP2 + | addi RA, RA, 8 + | zext.w CARG1, CARG1 + | settp_b CARG1, TISNUM + | j <1 + |3: // Convert intermediate result to number and continue below. + | fcvt.d.w FARG1, CARG1 + | checknum CARG2, ->fff_fallback + | fld FARG2, 0(RA) + | j >6 + | + |4: + | fld FARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |5: // Handle numbers. + | ld CARG2, 0(RA) + | fld FARG2, 0(RA) + | bgeu RA, RB, ->fff_resn + | checknum CARG2, >7 + |6: + |.if ismax + | flt.d TMP0, FARG2, FARG1 + |.else // min + | flt.d TMP0, FARG1, FARG2 + |.endif + | bnez TMP0, >8 // skip swap + | fmv.d FARG1, FARG2 + |8: + | addi RA, RA, 8 + | j <5 + |7: // Convert integer to number and continue above. + | checkint CARG2, ->fff_fallback + | fcvt.d.w FARG2, CARG2 + | j <6 + |.endmacro + | + | math_minmax math_min, 0 + | math_minmax math_max, 1 + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori TMP1, NARGS8:RC, 8 + | addi TMP0, TMP0, -LJ_TSTR + | or TMP1, TMP1, TMP0 + | cleartp STR:CARG1 + | bnez TMP1, ->fff_fallback // Need exactly 1 string argument. + | lw TMP0, STR:CARG1->len + | ld PC, FRAME_PC(BASE) + | snez RD, TMP0 + | lbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). + | addiw RD, RD, 1 + | slliw RD, RD, 3 // RD = ((str->len != 0)+1)*8 + | settp_b TMP2, TISNUM + | sd TMP2, -16(BASE) + | j ->fff_res + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument. + | addi TMP0, TMP0, -LJ_TISNUM // Integer. + | li TMP2, 255 + | sext.w CARG1, CARG1 + | or TMP1, TMP1, TMP0 + | sltu TMP2, TMP2, CARG1 // !(255 < n). + | or TMP1, TMP1, TMP2 + | li CARG3, 1 + | bnez TMP1, ->fff_fallback + | addi CARG2, sp, TMPD_OFS + | sb CARG1, TMPD(sp) + |->fff_newstr: + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | // (lua_State *L, const char *str, size_t l) + | call_intern fff_newstr, lj_str_new + | // Returns GCstr *. + | ld BASE, L->base + |->fff_resstr: + | li TMP1, LJ_TSTR + | settp CRET1, TMP1 + | j ->fff_restv + | + |.ffunc string_sub + | ffgccheck + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | ld CARG3, 16(BASE) + | addi TMP0, NARGS8:RC, -16 + | gettp TMP1, CARG1 + | bltz TMP0, ->fff_fallback + | cleartp STR:CARG1, CARG1 + | li CARG4, -1 + | beqz TMP0, >1 + | sext.w CARG4, CARG3 + | checkint CARG3, ->fff_fallback + |1: + | checkint CARG2, ->fff_fallback + | addi TMP0, TMP1, -LJ_TSTR + | sext.w CARG3, CARG2 + | bnez TMP0, ->fff_fallback + | lw CARG2, STR:CARG1->len + | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end + | addiw TMP0, CARG2, 1 + | bgez CARG4, >2 + | addw CARG4, CARG4, TMP0 // if (end < 0) end += len+1 + |2: + | bgez CARG3, >3 + | addw CARG3, CARG3, TMP0 // if (start < 0) start += len+1 + |3: + | bgez CARG4, >4 + | mv CARG4, x0 // if (end < 0) end = 0 + |4: + | bgtz CARG3, >5 + | li CARG3, 1 // if (start < 1) start = 1 + |5: + | ble CARG4, CARG2, >6 + | mv CARG4, CARG2 // if (end > len) end = len + |6: + | add CARG2, STR:CARG1, CARG3 + | sub CARG3, CARG4, CARG3 // len = end - start + | addi CARG2, CARG2, sizeof(GCstr)-1 + | addiw CARG3, CARG3, 1 // len += 1 + | bgez CARG3, ->fff_newstr + |->fff_emptystr: // Return empty string. + | li TMP1, LJ_TSTR + | addi STR:CARG1, GL, offsetof(global_State, strempty) + | settp CARG1, TMP1 + | j ->fff_restv + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + | ld CARG2, 0(BASE) + | beqz NARGS8:RC, ->fff_fallback + | checkstr STR:CARG2, ->fff_fallback + | addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf) + | ld TMP0, SBUF:CARG1->b + | sd L, SBUF:CARG1->L + | sd BASE, L->base + | sd TMP0, SBUF:CARG1->w + | sd PC, SAVE_PC(sp) + | call_intern ff_string_ .. name, lj_buf_putstr_ .. name + | call_intern ff_string_ .. name, lj_buf_tostr // CARG1 = CRET1 + | ld BASE, L->base + | j ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |->vm_tobit_fb: + | fld FARG1, 0(BASE) + | beqz TMP1, ->fff_fallback + | fadd.d FARG1, FARG1, TOBIT + | fmv.x.w CRET1, FARG1 + | zext.w CRET1, CRET1 + | ret + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | gettp TMP0, CARG1 + | zext.w CRET1, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |1: + |.endmacro + | + |.macro .ffunc_bit_op, name, bins + | .ffunc_bit name + | addi TMP2, BASE, 8 + | add TMP3, BASE, NARGS8:RC + |1: + | ld TMP1, 0(TMP2) + | beq TMP2, TMP3, ->fff_resi + | gettp TMP0, TMP1 + | addi TMP2, TMP2, 8 + | bne TMP0, TISNUM, >2 + | zext.w TMP1, TMP1 + | bins CRET1, CRET1, TMP1 + | j <1 + |2: + | fld FARG1, -8(TMP2) + | sltiu TMP0, TMP0, LJ_TISNUM + | fadd.d FARG1, FARG1, TOBIT + | beqz TMP0, ->fff_fallback + | fmv.x.w TMP1, FARG1 + | zext.w TMP1, TMP1 + | bins CRET1, CRET1, TMP1 + | j <1 + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, or + |.ffunc_bit_op bxor, xor + | + |.ffunc_bit bswap + | srliw CARG2, CARG1, 8 + | lui CARG3, 16 + | addiw CARG3, CARG3, -256 + | and CARG2, CARG2, CARG3 + | srliw CARG3, CARG1, 24 + | or CARG2, CARG2, CARG3 + | slli CARG3, CARG1, 8 + | lui CARG4, 0x00ff0 + | and CARG3, CARG3, CARG4 + | slli CARG1, CARG1, 24 + | or CARG1, CARG1, CARG3 + | or CARG1, CARG1, CARG2 + | slli CARG1, CARG1, 32 + | srli CARG1, CARG1, 32 + | j ->fff_resi + | + |.ffunc_bit tobit + |->fff_resi: + | settp CARG1, TISNUM // CARG1 = CRET1 + | j ->fff_restv + | + |.ffunc_bit bnot + | not CRET1, CRET1 + | zext.w CRET1, CRET1 + | j ->fff_resi + | + |.macro .ffunc_bit_sh, name, shins + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |// mv CARG1, CRET1 // CARG1 = CRET1 + |1: + | gettp TMP0, CARG2 + | zext.w CARG2, CARG2 + | bne TMP0, TISNUM, ->fff_fallback + | sext.w CARG1, CARG1 + | shins CRET1, CARG1, CARG2 + | zext.w CRET1, CRET1 + | j ->fff_resi + |.endmacro + | + |.ffunc_bit_sh lshift, sllw + |.ffunc_bit_sh rshift, srlw + |.ffunc_bit_sh arshift, sraw + | + |.macro .ffunc_bit_rot, name, rotinsa, rotinsb + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |// mv CARG1, CRET1 // CARG1 = CRET1 + |1: + | gettp TMP0, CARG2 + | zext.w CARG2, CARG2 + | bne TMP0, TISNUM, ->fff_fallback + | sext.w CARG1, CARG1 + | neg TMP2, CARG2 + | rotinsa TMP1, CARG1, CARG2 + | rotinsb TMP0, CARG1, TMP2 + | or CRET1, TMP0, TMP1 + | zext.w CRET1, CRET1 + | j ->fff_resi + |.endmacro + | + |.ffunc_bit_rot rol, sllw, srlw + |.ffunc_bit_rot ror, srlw, sllw + | + |//----------------------------------------------------------------------- + | + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RB = CFUNC, RC = nargs*8 + | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. + | ld CARG3, CFUNC:RB->f + | add TMP1, BASE, NARGS8:RC + | sd BASE, L->base + | addi TMP0, TMP1, 8*LUA_MINSTACK + | ld TMP2, L->maxstack + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | sd TMP1, L->top + | mv CARG1, L + | bltu TMP2, TMP0, >5 // Need to grow stack. + | jalr CARG3 // (lua_State *L) + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ld BASE, L->base + | slliw RD, CRET1, 3 + | bgtz CRET1, ->fff_res // Returned nresults+1? + |1: // Returned 0 or -1: retry fast path. + | ld LFUNC:RB, FRAME_FUNC(BASE) + | ld TMP0, L->top + | sub NARGS8:RC, TMP0, BASE + | cleartp LFUNC:RB + | bnez CRET1, ->vm_call_tail // Returned -1? + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | andi TMP0, PC, FRAME_TYPE + | andi TMP1, PC, ~FRAME_TYPEP // TODO + | bnez TMP0, >3 + | lbu TMP1, OFS_RA(PC) + | slliw TMP1, TMP1, 3 + | addiw TMP1, TMP1, 16 + |3: + | sub TMP2, BASE, TMP1 + | j ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | li CARG2, LUA_MINSTACK + | mv CARG1, L + | call_intern vm_call_tail, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | mv CRET1, x0 // Set zero-flag to force retry. + | j <1 + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | mv MULTRES, ra + | add TMP0, BASE, NARGS8:RC // Calculate L->top. + | sd BASE, L->base + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | mv CARG1, L + | sd TMP0, L->top + | call_intern fff_gc_step, lj_gc_step // (lua_State *L) + | ld BASE, L->base + | mv ra, MULTRES // Help return address predictor. + | ld TMP0, L->top + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | sub NARGS8:RC, TMP0, BASE + | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | lbu TMP3, GL->hookmask + | andi TMP1, TMP3, HOOK_VMEVENT // No recording while in vmevent. + | bnez TMP1, >5 + | // Decrement the hookcount for consistency, but always do the call. + | lw TMP2, GL->hookcount + | andi TMP1, TMP3, HOOK_ACTIVE + | bnez TMP1, >1 + | addiw TMP2, TMP2, -1 + | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | beqz TMP1, >1 + | sw TMP2, GL->hookcount + | j >1 + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | lbu TMP3, GL->hookmask + | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? + | beqz TMP1, >1 + |5: // Re-dispatch to static ins. + | ld TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. + | jr TMP1 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | lbu TMP3, GL->hookmask + | lw TMP2, GL->hookcount + | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? + | bnez TMP1, <5 + | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | addiw TMP2, TMP2, -1 + | beqz TMP1, <5 + | sw TMP2, GL->hookcount + | beqz TMP2, >1 + | andi TMP1, TMP3, LUA_MASKLINE + | beqz TMP1, <5 + |1: + | sw MULTRES, TMPD(sp) + | mv CARG2, PC + | sd BASE, L->base + | mv CARG1, L + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call_intern vm_inshook, lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ld BASE, L->base + |4: // Re-dispatch to static ins. + | lw INS, -4(PC) + | decode_OP8 TMP1, INS + | add TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld TMP1, GG_DISP2STATIC(TMP0) + | decode_RA8 RA, INS + | decode_RD8b RD + | jr TMP1 + | + |->cont_hook: // Continue from hook yield. + | addi PC, PC, 4 + | lw MULTRES, -24(RB) // Restore MULTRES for *M ins. + | j <4 + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | ld LFUNC:TMP1, FRAME_FUNC(BASE) + | addi CARG1, GL, GG_G2J + | cleartp LFUNC:TMP1 + | sd PC, SAVE_PC(sp) + | ld TMP1, LFUNC:TMP1->pc + | mv CARG2, PC + | sd L, (offsetof(jit_State, L))(CARG1) + | lbu TMP1, PC2PROTO(framesize)(TMP1) + | sd BASE, L->base + | slli TMP1, TMP1, 3 + | add TMP1, BASE, TMP1 + | sd TMP1, L->top + | call_intern vm_hotloop, lj_trace_hot // (jit_State *J, const BCIns *pc) + | j <3 + |.endif + | + | + |->vm_callhook: // Dispatch target for call hooks. + | mv CARG2, PC + |.if JIT + | j >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | ori CARG2, PC, 1 + |1: + |.endif + | add TMP0, BASE, RC + | sd PC, SAVE_PC(sp) + | sd BASE, L->base + | sub RA, RA, BASE + | sd TMP0, L->top + | mv CARG1, L + | call_intern vm_hotcall, lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // Returns ASMFunction. + | ld BASE, L->base + | ld TMP0, L->top + | sd x0, SAVE_PC(sp) // Invalidate for subsequent line hook. + | add RA, BASE, RA + | sub NARGS8:RC, TMP0, BASE + | ld LFUNC:RB, FRAME_FUNC(BASE) + | cleartp LFUNC:RB + | lw INS, -4(PC) + | jr CRET1 + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | ld TRACE:TMP2, -40(RB) // Save previous trace. + | decode_RA8 RC, INS + | addi TMP1, MULTRES, -8 + | cleartp TRACE:TMP2 + | add RC, BASE, RC // Call base. + | beqz TMP1, >2 + |1: // Move results down. + | ld CARG1, 0(RA) + | addi TMP1, TMP1, -8 + | addi RA, RA, 8 + | sd CARG1, 0(RC) + | addi RC, RC, 8 + | bnez TMP1, <1 + |2: + | decode_RA8 RA, INS + | decode_RB8 RB, INS + | add RA, RA, RB + | add RA, BASE, RA + |3: + | bltu RC, RA, >8 // More results wanted? + | + | lhu TMP3, TRACE:TMP2->traceno + | lhu RD, TRACE:TMP2->link + | bxeq RD, TMP3, ->cont_nop // Blacklisted. + | slliw RD, RD, 3 + | bxnez RD, =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | addi CARG1, GL, GG_G2J + | // addi CARG2, CARG1, 1 // We don't care what's on the verge. + | addi CARG2, CARG1, 2047 // jit_State too large. + | sw TMP3, (offsetof(jit_State, exitno)-2047)(CARG2) + | sd L, (offsetof(jit_State, L)-2047)(CARG2) + | sd BASE, L->base + | mv CARG2, PC + | // (jit_State *J, const BCIns *pc) + | call_intern cont_stitch, lj_dispatch_stitch + | ld BASE, L->base + | j ->cont_nop + | + |8: + | sd TISNIL, 0(RC) + | addi RC, RC, 8 + | j <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mv CARG1, L + | mv CARG2, PC + | sd BASE, L->base + | sw MULTRES, TMPD(sp) + | // (lua_State *L, const BCIns *pc) + | call_intern vm_profhook, lj_dispatch_profile + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | addi PC, PC, -4 + | ld BASE, L->base + | j ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b + | fsd f..a, a*8(sp) + | fsd f..b, b*8(sp) + | sd x..a, 32*8+a*8(sp) + | sd x..b, 32*8+b*8(sp) + |.endmacro + | + |->vm_exit_handler: + |.if JIT + | addi sp, sp, -(32*8+32*8) + | savex_ 0, 5 + | savex_ 6, 7 + | savex_ 8, 9 + | savex_ 10, 11 + | savex_ 12, 13 + | savex_ 14, 15 + | savex_ 16, 17 + | savex_ 18, 19 + | savex_ 20, 21 + | savex_ 22, 23 + | savex_ 24, 25 + | savex_ 26, 27 + | savex_ 28, 29 + | savex_ 30, 31 + | fsd f1, 1*8(sp) + | fsd f2, 2*8(sp) + | fsd f3, 3*8(sp) + | fsd f4, 4*8(sp) + | sd x0, 32*8+1*8(sp) // Clear RID_TMP. + | ld TMP1, 32*8+32*8(sp) // Load exit pc. + | addi TMP2, sp, 32*8+32*8 // Recompute original value of sp. + | addxi DISPATCH, GL, GG_G2DISP + | sd TMP2, 32*8+2*8(sp) // Store sp in RID_SP + | addi CARG1, GL, GG_G2J + | li_vmstate EXIT + | // addi CARG2, CARG1, 1 // We don't care what's on the verge. + | addi CARG2, CARG1, 2047 // jit_State too large. + | sub TMP1, TMP1, ra + | lw TMP2, 0(ra) // Load trace number. + | st_vmstate + | srli TMP1, TMP1, 2 + | ld L, GL->cur_L + | ld BASE, GL->jit_base + | srli TMP2, TMP2, 12 + | addi TMP1, TMP1, -2 + | sd L, (offsetof(jit_State, L)-2047)(CARG2) + | sw TMP2, (offsetof(jit_State, parent)-2047)(CARG2) // Store trace number. + | sd BASE, L->base + | sw TMP1, (offsetof(jit_State, exitno)-2047)(CARG2) // Store exit number. + | sd x0, GL->jit_base + | mv CARG2, sp + | call_intern vm_exit_handler, lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ld TMP1, L->cframe + | ld BASE, L->base + | andi sp, TMP1, CFRAME_RAWMASK + | ld PC, SAVE_PC(sp) // Get SAVE_PC. + | sd L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield). + | j >1 + |.endif + | + |->vm_exit_interp: + |.if JIT + | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. + | ld L, SAVE_L(sp) + | addxi DISPATCH, GL, GG_G2DISP + | sd BASE, L->base + |1: + | ld LFUNC:RB, FRAME_FUNC(BASE) + | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. + | beqz TMP0, >9 + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | slli MULTRES, CRET1, 3 + | cleartp LFUNC:RB + | sw MULTRES, TMPD(sp) + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | slli TMP3, TMP3, 32 + | ld TMP1, LFUNC:RB->pc + | sd x0, GL->jit_base + | ld KBASE, PC2PROTO(k)(TMP1) + | fmv.d.x TOBIT, TMP3 + | // Modified copy of ins_next which handles function header dispatch, too. + | lw INS, 0(PC) + | addi PC, PC, 4 + | addiw CRET1, CRET1, 17 // Static dispatch? + | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 + | sw TISNIL, GL->vmstate + | decode_RD8a RD, INS + | beqz CRET1, >5 + | decode_OP8 TMP1, INS + | add TMP0, DISPATCH, TMP1 + | sltiu TMP2, TMP1, BC_FUNCF*8 + | ld TMP3, 0(TMP0) + | decode_RA8 RA, INS + | beqz TMP2, >2 + | decode_RD8b RD + | jr TMP3 + |2: + | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? + | ld TMP1, FRAME_PC(BASE) + | bnez TMP2, >3 + | // Check frame below fast function. + | andi TMP0, TMP1, FRAME_TYPE + | bnez TMP0, >3 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | lw TMP2, -4(TMP1) + | decode_RA8 TMP0, TMP2 + | sub TMP1, BASE, TMP0 + | ld LFUNC:TMP2, -32(TMP1) + | cleartp LFUNC:TMP2 + | ld TMP1, LFUNC:TMP2->pc + | ld KBASE, PC2PROTO(k)(TMP1) + |3: + | addi RC, MULTRES, -8 + | add RA, RA, BASE + | jr TMP3 + | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ld TMP0, GL_J(trace)(GL) + | decode_RD8b RD + | add TMP0, TMP0, RD + | ld TRACE:TMP2, 0(TMP0) + | lw INS, TRACE:TMP2->startins + | decode_OP8 TMP1, INS + | add TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld TMP3, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | decode_RA8b RA + | jr TMP3 + | + |9: // Rethrow error from the right C frame. + | negw CARG2, CRET1 + | mv CARG1, L + | call_intern vm_exit_interp, lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + | + |// Hard-float round to integer. + |// Modifies TMP0, FARG1, FARG5 + |.macro vm_round, rm + | fmv.x.d TMP0, FARG1 + | srli TMP0, TMP0, 52 // Extract exponent (and sign). + | andi TMP0, TMP0, 0x7ff // Extract exponent. + | addi TMP0, TMP0, -1075 + | bgtz TMP0, >1 // Less than 2^52 / Not NaN? + | fcvt.l.d TMP0, FARG1, rm + | fcvt.d.l FARG5, TMP0 + | fsgnj.d FRET1, FARG5, FARG1 + |1: + | ret + |.endmacro + | + | + |->vm_floor: + | vm_round rdn + |->vm_ceil: + | vm_round rup + |->vm_trunc: + |.if JIT + | vm_round rtz + |.endif + | + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |// void lj_vm_fence_rw_rw() + |->vm_fence_rw_rw: + |.if JIT or FFI + | .long 0x0330000f + | ret + |.endif + | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, TMP0 + |.define NEXT_TMP1, TMP1 + |.define NEXT_TMP2, TMP2 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, 0(sp) + |.define NEXT_RES_KEY, 8(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT + | lw NEXT_ASIZE, NEXT_TAB->asize + | ld NEXT_TMP0, NEXT_TAB->array + | li NEXT_NIL, LJ_TNIL + |1: // Traverse array part. + | bgeu NEXT_IDX, NEXT_ASIZE, >5 + | slliw NEXT_TMP1, NEXT_IDX, 3 + | add NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 + | li TMP3, LJ_TISNUM + | ld NEXT_TMP2, 0(NEXT_TMP1) + | slli TMP3, TMP3, 47 + | or NEXT_TMP1, NEXT_IDX, TMP3 + | addiw NEXT_IDX, NEXT_IDX, 1 + | beq NEXT_TMP2, NEXT_NIL, <1 + | sd NEXT_TMP2, NEXT_RES_VAL + | sd NEXT_TMP1, NEXT_RES_KEY + | mv NEXT_RES_VK, NEXT_RES_PTR + | mv NEXT_RES_IDX, NEXT_IDX + | ret + | + |5: // Traverse hash part. + | subw NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE + | lw NEXT_TMP0, NEXT_TAB->hmask + | ld NODE:NEXT_RES_VK, NEXT_TAB->node + | slliw NEXT_TMP2, NEXT_RES_IDX, 5 + | slliw TMP3, NEXT_RES_IDX, 3 + | subw TMP3, NEXT_TMP2, TMP3 + | add NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3 + |6: + | bltu NEXT_TMP0, NEXT_RES_IDX, >8 + | ld NEXT_TMP2, NODE:NEXT_RES_VK->val + | addiw NEXT_RES_IDX, NEXT_RES_IDX, 1 + | bne NEXT_TMP2, NEXT_NIL, >9 + | // Skip holes in hash part. + | addi NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) + | j <6 + | + |8: // End of iteration. Set the key to nil (not the value). + | sd NEXT_NIL, NEXT_RES_KEY + | mv NEXT_RES_VK, NEXT_RES_PTR + |9: + | addw NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE + | ret + |.endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in x5, g in x7. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs + | ld CTSTATE, GL:x7->ctype_state + | mv GL, x7 + | addxi DISPATCH, x7, GG_G2DISP + | srli x5, x5, 12 + | sw x5, CTSTATE->cb.slot + | sd CARG1, CTSTATE->cb.gpr[0] + | fsd FARG1, CTSTATE->cb.fpr[0] + | sd CARG2, CTSTATE->cb.gpr[1] + | fsd FARG2, CTSTATE->cb.fpr[1] + | sd CARG3, CTSTATE->cb.gpr[2] + | fsd FARG3, CTSTATE->cb.fpr[2] + | sd CARG4, CTSTATE->cb.gpr[3] + | fsd FARG4, CTSTATE->cb.fpr[3] + | sd CARG5, CTSTATE->cb.gpr[4] + | fsd FARG5, CTSTATE->cb.fpr[4] + | sd CARG6, CTSTATE->cb.gpr[5] + | fsd FARG6, CTSTATE->cb.fpr[5] + | sd CARG7, CTSTATE->cb.gpr[6] + | fsd FARG7, CTSTATE->cb.fpr[6] + | sd CARG8, CTSTATE->cb.gpr[7] + | fsd FARG8, CTSTATE->cb.fpr[7] + | addi TMP0, sp, CFRAME_SPACE + | sd TMP0, CTSTATE->cb.stack + | sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok. + | mv CARG1, CTSTATE + | mv CARG2, sp + | call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ld BASE, L:CRET1->base + | ld RC, L:CRET1->top + | mv L, CRET1 + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | ld LFUNC:RB, FRAME_FUNC(BASE) + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | slli TMP3, TMP3, 32 + | li_vmstate INTERP + | subw RC, RC, BASE + | cleartp LFUNC:RB + | st_vmstate + | fmv.d.x TOBIT, TMP3 + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | ld CTSTATE, GL->ctype_state + | sd BASE, L->base + | sd RB, L->top + | sd L, CTSTATE->L + | mv CARG1, CTSTATE + | mv CARG2, RA + | // (CTState *cts, TValue *o) + | call_intern cont_ffi_callback, lj_ccallback_leave + | fld FRET1, CTSTATE->cb.fpr[0] + | ld CRET1, CTSTATE->cb.gpr[0] + | fld FRET2, CTSTATE->cb.fpr[1] + | ld CRET2, CTSTATE->cb.gpr[1] + | j ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, CARG1 + | lw TMP1, CCSTATE->spadj + | lbu CARG2, CCSTATE->nsp + | lbu CARG3, CCSTATE->nfpr + | mv TMP2, sp + | sub sp, sp, TMP1 + | sd ra, -8(TMP2) + | sd x18, -16(TMP2) + | sd CCSTATE, -24(TMP2) + | mv x18, TMP2 + | addi TMP1, CCSTATE, offsetof(CCallState, stack) + | mv TMP2, sp + | add TMP3, TMP1, CARG2 + | beqz CARG2, >2 + |1: + | ld TMP0, 0(TMP1) + | addi TMP1, TMP1, 8 + | sd TMP0, 0(TMP2) + | addi TMP2, TMP2, 8 + | bltu TMP1, TMP3, <1 + |2: + | beqz CARG3, >3 + | fld FARG1, CCSTATE->fpr[0] + | fld FARG2, CCSTATE->fpr[1] + | fld FARG3, CCSTATE->fpr[2] + | fld FARG4, CCSTATE->fpr[3] + | fld FARG5, CCSTATE->fpr[4] + | fld FARG6, CCSTATE->fpr[5] + | fld FARG7, CCSTATE->fpr[6] + | fld FARG8, CCSTATE->fpr[7] + |3: + | ld CFUNCADDR, CCSTATE->func + | ld CARG2, CCSTATE->gpr[1] + | ld CARG3, CCSTATE->gpr[2] + | ld CARG4, CCSTATE->gpr[3] + | ld CARG5, CCSTATE->gpr[4] + | ld CARG6, CCSTATE->gpr[5] + | ld CARG7, CCSTATE->gpr[6] + | ld CARG8, CCSTATE->gpr[7] + | ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | jalr CFUNCADDR + | ld CCSTATE:TMP1, -24(x18) + | ld TMP0, -16(x18) + | ld ra, -8(x18) + | sd CRET1, CCSTATE:TMP1->gpr[0] + | sd CRET2, CCSTATE:TMP1->gpr[1] + | fsd FRET1, CCSTATE:TMP1->fpr[0] + | fsd FRET2, CCSTATE:TMP1->fpr[1] + | mv sp, x18 + | mv x18, TMP0 + | ret + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + | add RA, BASE, RA + | add RD, BASE, RD + if (op == BC_ISLT || op == BC_ISGE) { + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + } else { + | ld CARG2, 0(RA) + | ld CARG1, 0(RD) + | gettp CARG3, CARG2 + | gettp CARG4, CARG1 + } + | lhu TMP2, OFS_RD(PC) // TMP2=jump + | addi PC, PC, 4 + | bne CARG3, TISNUM, >2 + | decode_BC4b TMP2 + | bne CARG4, TISNUM, >5 + | sext.w CARG1, CARG1 + | sext.w CARG2, CARG2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | slt TMP1, CARG1, CARG2 + | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 + if (op == BC_ISLT || op == BC_ISGT) { + | neg TMP1, TMP1 + } else { + | addi TMP1, TMP1, -1 + } + | and TMP2, TMP2, TMP1 + |1: + | add PC, PC, TMP2 + | ins_next + | + |2: // RA is not an integer. + | sltiu TMP1, CARG3, LJ_TISNUM + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | bxeqz TMP1, ->vmeta_comp + | sltiu TMP1, CARG4, LJ_TISNUM + | decode_BC4b TMP2 + | beqz TMP1, >4 + | fmv.d.x FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + |3: // RA and RD are both numbers. + | addw TMP2, TMP2, TMP3 + if (op == BC_ISLT) { + | flt.d TMP3, FTMP0, FTMP2 + | neg TMP3, TMP3 + } else if (op == BC_ISGE) { + | flt.d TMP3, FTMP0, FTMP2 + | addi TMP3, TMP3, -1 + } else if (op == BC_ISLE) { + | fle.d TMP3, FTMP2, FTMP0 + | neg TMP3, TMP3 + } else if (op == BC_ISGT) { + | fle.d TMP3, FTMP2, FTMP0 + | addi TMP3, TMP3, -1 + } + | and TMP2, TMP2, TMP3 + | j <1 + | + |4: // RA is a number, RD is not a number. + | // RA is a number, RD is an integer. Convert RD to a number. + | bxne CARG4, TISNUM, ->vmeta_comp + if (op == BC_ISLT || op == BC_ISGE) { + | fcvt.d.w FTMP2, CARG2 + | fmv.d.x FTMP0, CARG1 + } else { + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + } + | j <3 + | + |5: // RA is an integer, RD is not an integer + | sltiu TMP1, CARG4, LJ_TISNUM + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | bxeqz TMP1, ->vmeta_comp + | // RA is an integer, RD is a number. Convert RA to a number. + if (op == BC_ISLT || op == BC_ISGE) { + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + } else { + | fcvt.d.w FTMP2, CARG2 + | fmv.d.x FTMP0, CARG1 + } + | j <3 + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + | add RA, BASE, RA + | add RD, BASE, RD + | addi PC, PC, 4 + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, -4+OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | sltu TMP0, TISNUM, CARG3 + | sltu TMP1, TISNUM, CARG4 + | or TMP0, TMP0, TMP1 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + if (vk) { + | beqz TMP0, ->BC_ISEQN_Z + } else { + | beqz TMP0, ->BC_ISNEN_Z + } + |// Either or both types are not numbers. + |.if FFI + | // Check if RA or RD is a cdata. + | xori TMP0, CARG3, LJ_TCDATA + | xori TMP1, CARG4, LJ_TCDATA + | and TMP0, TMP0, TMP1 + | bxeqz TMP0, ->vmeta_equal_cd + |.endif + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | decode_BC4b TMP2 + | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 + | bne CARG1, CARG2, >2 + | // Tag and value are equal. + if (vk) { + |->BC_ISEQV_Z: + | add PC, PC, TMP2 + } + |1: + | ins_next + | + |2: // Check if the tags are the same and it's a table or userdata. + | xor TMP3, CARG3, CARG4 // Same type? + | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1 + | beqz TMP3, >3 + | mv TMP0, x0 // TMP0=0: not same type, or same type table/userdata + |3: + | cleartp TAB:TMP1, CARG1 + if (vk) { + | beqz TMP0, <1 + } else { + | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:TMP3, TAB:TMP1->metatable + if (vk) { + | beqz TAB:TMP3, <1 // No metatable? + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<BC_ISEQV_Z // No metatable? + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? + } + | j ->vmeta_equal // Handle __eq metamethod. + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target + | add RA, BASE, RA + | addi PC, PC, 4 + | ld CARG1, 0(RA) + | sub RD, KBASE, RD + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, -8(RD) // KBASE-8-str_const*8 + |.if FFI + | gettp CARG3, CARG1 + | li TMP1, LJ_TCDATA + |.endif + | li TMP0, LJ_TSTR + | decode_BC4b TMP2 + | settp CARG2, TMP0 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + |.if FFI + | bxeq CARG3, TMP1, ->vmeta_equal_cd + |.endif + | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D + | addw TMP2, TMP2, TMP3 + if (vk) { + | seqz TMP4, TMP0 + } else { + | snez TMP4, TMP0 + } + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + | ins_next + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + | add RA, BASE, RA + | add RD, KBASE, RD + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | addi PC, PC, 4 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | decode_BC4b TMP2 + | bne CARG3, TISNUM, >4 + | addw TMP2, TMP2, TMP3 + | bne CARG4, TISNUM, >6 + | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D + |1: + if (vk) { + | seqz TMP4, TMP0 + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + |2: + } else { + | snez TMP4, TMP0 + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + |2: + | add PC, PC, TMP2 + } + |3: + | ins_next + | + |4: // RA is not an integer. + | addw TMP2, TMP2, TMP3 + |.if FFI + | bgeu CARG3, TISNUM, >7 + |.else + | bgeu CARG3, TISNUM, <2 + |.endif + | fmv.d.x FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + | bne CARG4, TISNUM, >5 + |// RA is a number, RD is an integer. + | fcvt.d.w FTMP2, CARG2 + | + |5: // RA and RD are both numbers. + | feq.d TMP0, FTMP0, FTMP2 + | seqz TMP0, TMP0 + | j <1 + | + |6: // RA is an integer, RD is a number. + |.if FFI + | bgeu CARG4, TISNUM, >8 + |.else + | bgeu CARG4, TISNUM, <2 + |.endif + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + | j <5 + | + |.if FFI + |7: // RA not int, not number + | li TMP0, LJ_TCDATA + | bne CARG3, TMP0, <2 + | j ->vmeta_equal_cd + | + |8: // RD not int, not number + | li TMP0, LJ_TCDATA + | bne CARG4, TMP0, <2 + | j ->vmeta_equal_cd + |.endif + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target + | add RA, BASE, RA + | srliw TMP0, RD, 3 + | ld TMP1, 0(RA) + | not TMP0, TMP0 // ~TMP0: ~0 ~1 ~2 + | lhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target + | gettp TMP1, TMP1 + | addi PC, PC, 4 + | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D + |.if FFI + | li TMP3, LJ_TCDATA + | bxeq TMP1, TMP3, ->vmeta_equal_cd + |.endif + | decode_BC4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 + if (vk) { + | seqz TMP4, TMP0 + } else { + | snez TMP4, TMP0 + } + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target + | add RD, BASE, RD + | lhu TMP2, OFS_RD(PC) + | ld TMP0, 0(RD) + | addi PC, PC, 4 + | gettp TMP0, TMP0 + | add RA, BASE, RA + | sltiu TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false + | decode_BC4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | ld CRET1, 0(RD) + | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 + if (op == BC_IST || op == BC_ISTC) { + | beqz TMP0, >1 + if (op == BC_ISTC) { + | sd CRET1, 0(RA) + } + } else { + | bnez TMP0, >1 + if (op == BC_ISFC) { + | sd CRET1, 0(RA) + } + } + | add PC, PC, TMP2 + |1: + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | add TMP0, BASE, RA + | srliw TMP1, RD, 3 + | ld TMP0, 0(TMP0) + | gettp TMP0, TMP0 + | add TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 + | bxnez TMP0, ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | add TMP0, BASE, RA + | ld TMP0, 0(TMP0) + | checknum TMP0, ->vmeta_istype + | ins_next + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RD = src*8 + | add RD, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_NOT: + | // RA = dst*8, RD = src*8 + | add RD, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | li TMP1, LJ_TTRUE + | ins_next1 + | gettp TMP0, TMP0 + | sltu TMP0, TMP1, TMP0 + | addiw TMP0, TMP0, 1 + | slli TMP0, TMP0, 47 + | not TMP0, TMP0 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_UNM: + | // RA = dst*8, RD = src*8 + | add RB, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RB) + | lui TMP1, 0x80000 + | gettp CARG3, TMP0 + | bne CARG3, TISNUM, >1 + | negw TMP0, TMP0 + | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + | zext.w TMP0, TMP0 + | settp_b TMP0, TISNUM + | j >2 + |1: + | sltiu TMP3, CARG3, LJ_TISNUM + | slli TMP1, TMP1, 32 + | bxeqz TMP3, ->vmeta_unm + | xor TMP0, TMP0, TMP1 // sign => ~sign + |2: + | sd TMP0, 0(RA) + | ins_next + break; + case BC_LEN: + | // RA = dst*8, RD = src*8 + | add CARG2, BASE, RD + | ld TMP0, 0(CARG2) + | add RA, BASE, RA + | gettp TMP1, TMP0 + | addi TMP2, TMP1, -LJ_TSTR + | cleartp STR:CARG1, TMP0 + | bnez TMP2, >2 + | lwu CARG1, STR:CARG1->len + |1: + | settp_b CARG1, TISNUM + | sd CARG1, 0(RA) + | ins_next + |2: + | addi TMP2, TMP1, -LJ_TTAB + | bxnez TMP2, ->vmeta_len +#if LJ_52 + | ld TAB:TMP2, TAB:CARG1->metatable + | bnez TAB:TMP2, >9 + |3: +#endif + |->BC_LEN_Z: + | call_intern BC_LEN, lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | j <1 +#if LJ_52 + |9: + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_len +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro fpmod, a, b, c + | fdiv.d FARG1, b, c + | jal ->vm_floor // floor(b/c) + | fmul.d a, FRET1, c + | fsub.d a, b, a // b - floor(b/c)*c + |.endmacro + | + |.macro ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||if (vk == 1) { + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | decode_RB8 RC, INS + | decode_RDtoRC8 RB, RD + ||} else { + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + ||} + ||switch (vk) { + ||case 0: // suffix is VN + | add RB, BASE, RB + | add RC, KBASE, RC + || break; + ||case 1: // suffix is NV + | add RC, BASE, RC + | add RB, KBASE, RB + || break; + ||default: // CAT or suffix is VV + | add RB, BASE, RB + | add RC, BASE, RC + || break; + ||} + |.endmacro + | + |.macro ins_arithfp, fpins, itype1, itype2 + | fld FTMP0, 0(RB) + | sltu itype1, itype1, TISNUM + | sltu itype2, itype2, TISNUM + | fld FTMP2, 0(RC) + | and itype1, itype1, itype2 + | add RA, BASE, RA + | bxeqz itype1, ->vmeta_arith + | fpins FRET1, FTMP0, FTMP2 + | ins_next1 + | fsd FRET1, 0(RA) + | ins_next2 + |.endmacro + | + |.macro ins_arithead, itype1, itype2, tval1, tval2 + | ld tval1, 0(RB) + | ld tval2, 0(RC) + | // Check for two integers. + | gettp itype1, tval1 + | gettp itype2, tval2 + |.endmacro + | + |.macro ins_arithdn, intins, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | bne TMP0, TISNUM, >1 + | bne TMP1, TISNUM, >1 + | sext.w CARG3, CARG1 + | sext.w CARG4, CARG2 + |.if "intins" == "addw" + | intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. + | xor TMP2, CRET1, CARG4 + | and TMP1, TMP1, TMP2 + | add RA, BASE, RA + | bxltz TMP1, ->vmeta_arith + |.elif "intins" == "subw" + | intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. + | xor TMP2, CARG3, CARG4 + | and TMP1, TMP1, TMP2 + | add RA, BASE, RA + | bxltz TMP1, ->vmeta_arith + |.elif "intins" == "mulw" + | mul TMP2, CARG3, CARG4 + | add RA, BASE, RA + | sext.w CRET1, TMP2 + | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow. + |.endif + | zext.w CRET1, CRET1 + | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | + |.macro ins_arithdiv, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | + |.macro ins_arithmod, fpins, BC + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | bne TMP0, TISNUM, >1 + | bne TMP1, TISNUM, >1 + | sext.w CARG1, CARG1 + | sext.w CARG2, CARG2 + | add RA, BASE, RA + | bxeqz CARG2, ->vmeta_arith + | call_intern BC, lj_vm_modi + | zext.w CRET1, CRET1 + | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn addw, fadd.d + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn subw, fsub.d + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithdn mulw, fmul.d + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithdiv fdiv.d + break; + case BC_MODVN: + | ins_arithmod fpmod, BC_MODVN + break; + case BC_MODNV: + | ins_arithmod fpmod, BC_MODNV + break; + case BC_MODVV: + | ins_arithmod fpmod, BC_MODVV + break; + case BC_POW: + | ins_arithpre + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | add RA, BASE, RA + | bxeqz TMP0, ->vmeta_arith + | fld FARG1, 0(RB) + | fld FARG2, 0(RC) + | call_extern BC_POW, pow + | ins_next1 + | fsd FRET1, 0(RA) + | ins_next2 + break; + + case BC_CAT: + | // RA = dst*8, RB = src_start*8, RC = src_end*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | sub CARG3, RC, RB + | sd BASE, L->base + | add CARG2, BASE, RC + | mv MULTRES, RB + |->BC_CAT_Z: + | srliw CARG3, CARG3, 3 + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ld BASE, L->base + | bxnez CRET1, ->vmeta_binop + | add RB, BASE, MULTRES + | ld TMP0, 0(RB) + | add RA, BASE, RA + | sd TMP0, 0(RA) + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RD = str_const*8 (~) + | sub TMP1, KBASE, RD + | li TMP2, LJ_TSTR + | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 + | add RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next + break; + case BC_KCDATA: + |.if FFI + | // RA = dst*8, RD = cdata_const*8 (~) + | sub TMP1, KBASE, RD + | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 + | li TMP2, LJ_TCDATA + | add RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next + |.endif + break; + case BC_KSHORT: + | // RA = dst*8, RD = int16_literal*8 + | sraiw RD, INS, 16 + | add RA, BASE, RA + | zext.w RD, RD + | ins_next1 + | settp_b RD, TISNUM + | sd RD, 0(RA) + | ins_next2 + break; + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | add RD, KBASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KPRI: + | // RA = dst*8, RD = primitive_type*8 (~) + | add RA, BASE, RA + | slli TMP0, RD, 44 // 44+3 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KNIL: + | // RA = base*8, RD = end*8 + | add RA, BASE, RA + | sd TISNIL, 0(RA) + | addi RA, RA, 8 + | add RD, BASE, RD + |1: + | sd TISNIL, 0(RA) + | slt TMP0, RA, RD + | addi RA, RA, 8 + | bnez TMP0, <1 + | ins_next + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RD = uvnum*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RA, BASE, RA + | cleartp LFUNC:TMP0 + | add RD, RD, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RD->uvptr + | ld TMP1, UPVAL:TMP0->v + | ld TMP2, 0(TMP1) + | ins_next1 + | sd TMP2, 0(RA) + | ins_next2 + break; + case BC_USETV: + | // RA = uvnum*8, RD = src*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RD, BASE, RD + | cleartp LFUNC:TMP0 + | add RA, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | lbu TMP3, UPVAL:TMP0->marked + | ld CARG2, UPVAL:TMP0->v + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbu TMP0, UPVAL:TMP0->closed + | gettp TMP2, CRET1 + | sd CRET1, 0(CARG2) + | or TMP3, TMP3, TMP0 + | li TMP0, LJ_GC_BLACK|1 + | addi TMP2, TMP2, -(LJ_TNUMX+1) + | beq TMP3, TMP0, >2 // Upvalue is closed and black? + |1: + | ins_next + | + |2: // Check if new value is collectable. + | sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1) + | cleartp GCOBJ:CRET1, CRET1 + | beqz TMP0, <1 // tvisgcv(v) + | lbu TMP3, GCOBJ:CRET1->gch.marked + | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) + | beqz TMP3, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL + | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETS: + | // RA = uvnum*8, RD = str_const*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | sub TMP1, KBASE, RD + | cleartp LFUNC:TMP0 + | add RA, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RA->uvptr + | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 + | lbu TMP2, UPVAL:TMP0->marked + | ld CARG2, UPVAL:TMP0->v + | lbu TMP3, STR:TMP1->marked + | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv) + | lbu TMP2, UPVAL:TMP0->closed + | li TMP0, LJ_TSTR + | settp TMP1, TMP0 + | sd TMP1, 0(CARG2) + | bnez TMP4, >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | beqz TMP2, <1 + | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str) + | beqz TMP0, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL + | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETN: + | // RA = uvnum*8, RD = num_const*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RD, KBASE, RD + | cleartp LFUNC:TMP0 + | add TMP0, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:TMP0->uvptr + | ld TMP1, 0(RD) + | ld TMP0, UPVAL:TMP0->v + | sd TMP1, 0(TMP0) + | ins_next + break; + case BC_USETP: + | // RA = uvnum*8, RD = primitive_type*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | slli TMP2, RD, 44 + | cleartp LFUNC:TMP0 + | add TMP0, RA, LFUNC:TMP0 + | not TMP2, TMP2 + | ld UPVAL:TMP0, LFUNC:TMP0->uvptr + | ld TMP1, UPVAL:TMP0->v + | sd TMP2, 0(TMP1) + | ins_next + break; + + case BC_UCLO: + | // RA = level*8, RD = target + | ld TMP2, L->openupval + | branch_RD // Do this first since RD is not saved. + | sd BASE, L->base + | mv CARG1, L + | beqz TMP2, >1 + | add CARG2, BASE, RA + | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level) + | ld BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) + | sub TMP1, KBASE, RD + | ld CARG3, FRAME_FUNC(BASE) + | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | cleartp CARG3 + | mv CARG1, L + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call_intern BC_FNEW, lj_func_newL_gc + | // Returns GCfuncL *. + | li TMP0, LJ_TFUNC + | ld BASE, L->base + | settp CRET1, TMP0 + | add RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) + | ld TMP0, GL->gc.total + | ld TMP1, GL->gc.threshold + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | bgeu TMP0, TMP1, >5 + |1: + if (op == BC_TNEW) { + | srliw CARG2, RD, 3 + | andi CARG2, CARG2, 0x7ff + | lzi TMP0, 0x801 + | addiw TMP2, CARG2, -0x7ff + | srliw CARG3, RD, 14 + | seqz TMP3, TMP2 + | neg TMP4, TMP3 + | xor CARG1, TMP0, CARG2 // CARG2 = TMP3 ? TMP0 : CARG2 + | and CARG1, CARG1, TMP4 + | xor CARG2, CARG2, CARG1 + | mv CARG1, L + | // (lua_State *L, int32_t asize, uint32_t hbits) + | call_intern BC_TNEW, lj_tab_new + | // Returns Table *. + } else { + | sub TMP1, KBASE, RD + | mv CARG1, L + | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 + | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt) + | // Returns Table *. + } + | li TMP0, LJ_TTAB + | ld BASE, L->base + | ins_next1 + | settp CRET1, TMP0 + | add RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next2 + |5: + | mv MULTRES, RD + | mv CARG1, L + if (op == BC_TNEW) { + | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L) + } else { + | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L) + } + | mv RD, MULTRES + | j <1 + break; + + case BC_GGET: + | // RA = dst*8, RD = str_const*8 (~) + case BC_GSET: + | // RA = src*8, RD = str_const*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | sub TMP1, KBASE, RD + | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 + | cleartp LFUNC:TMP0 + | ld TAB:RB, LFUNC:TMP0->env + | add RA, BASE, RA + if (op == BC_GGET) { + | j ->BC_TGETS_Z + } else { + | j ->BC_TSETS_Z + } + break; + + case BC_TGETV: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | add RA, BASE, RA + | checktab TAB:RB, ->vmeta_tgetv + | gettp TMP3, TMP2 + | lw TMP0, TAB:RB->asize + | bne TMP3, TISNUM, >5 // Integer key? + | sext.w TMP2, TMP2 + | ld TMP1, TAB:RB->array + | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? + | slliw TMP2, TMP2, 3 + | add TMP2, TMP1, TMP2 + | ld CRET1, 0(TMP2) + | beq CRET1, TISNIL, >2 + |1: + | sd CRET1, 0(RA) + | ins_next + | + |2: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_tgetv + | + |5: + | li TMP0, LJ_TSTR + | cleartp RC, TMP2 + | bxne TMP3, TMP0, ->vmeta_tgetv // String key? + | j ->BC_TGETS_Z + break; + case BC_TGETS: + | // RA = dst*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | sub CARG3, KBASE, RC + | ld TAB:RB, 0(CARG2) + | add RA, BASE, RA + | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 + | checktab TAB:RB, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slliw TMP0, TMP1, 5 + | slliw TMP1, TMP1, 3 + | subw TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |1: + | ld CARG1, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | ld TAB:TMP3, TAB:RB->metatable + | bne CARG1, RC, >4 + | beq CARG2, TISNIL, >5 // Key found, but nil value? + |3: + | sd CARG2, 0(RA) + | ins_next + | + |4: // Follow hash chain. + | mv NODE:TMP2, NODE:TMP1 + | bnez NODE:TMP1, <1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | mv CARG2, TISNIL + | beqz TAB:TMP3, <3 // No metatable: done. + | lbu TMP0, TAB:TMP3->nomm + | andi TMP0, TMP0, 1<vmeta_tgets + break; + case BC_TGETB: + | // RA = dst*8, RB = table*8, RC = index*8 + | decode_RB8 RB, INS + | add CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | add RA, BASE, RA + | srliw TMP0, RC, 3 + | checktab TAB:RB, ->vmeta_tgetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | bxgeu TMP0, TMP1, ->vmeta_tgetb + | add RC, TMP2, RC + | ld CRET1, 0(RC) + | beq CRET1, TISNIL, >5 + |1: + | sd CRET1, 0(RA) + | ins_next + | + |5: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! + break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add RB, BASE, RB + | add RC, BASE, RC + | ld TAB:CARG1, 0(RB) + | lw CARG2, 0(RC) + | add RA, BASE, RA + | cleartp TAB:CARG1 + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | bxgeu CARG2, TMP0, ->vmeta_tgetr // In array part? + | slliw TMP2, CARG2, 3 + | add TMP3, TMP1, TMP2 + | ld TMP1, 0(TMP3) + |->BC_TGETR_Z: + | ins_next1 + | sd TMP1, 0(RA) + | ins_next2 + break; + + case BC_TSETV: + | // RA = src*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | add RA, BASE, RA + | checktab TAB:RB, ->vmeta_tsetv + | sext.w RC, TMP2 + | checkint TMP2, >5 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | bxgeu RC, TMP0, ->vmeta_tsetv // Integer key and in array part? + | slliw TMP2, RC, 3 + | add TMP1, TMP1, TMP2 + | lbu TMP3, TAB:RB->marked + | ld TMP0, 0(TMP1) + | ld CRET1, 0(RA) + | beq TMP0, TISNIL, >3 + |1: + | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) + | sd CRET1, 0(TMP1) + | bnez TMP2, >7 + |2: + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP2, TAB:TMP2->nomm + | andi TMP2, TMP2, 1<vmeta_tsetv + |5: + | gettp TMP0, TMP2 + | addi TMP0, TMP0, -LJ_TSTR + | bxnez TMP0, ->vmeta_tsetv + | cleartp STR:RC, TMP2 + | j ->BC_TSETS_Z // String key? + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETS: + | // RA = src*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | sub CARG3, KBASE, RC + | ld TAB:RB, 0(CARG2) + | ld RC, -8(CARG3) // KBASE-8-str_const*8 + | add RA, BASE, RA + | cleartp STR:RC + | checktab TAB:RB, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | sb x0, TAB:RB->nomm // Clear metamethod cache. + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slliw TMP0, TMP1, 5 + | slliw TMP1, TMP1, 3 + | subw TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + | fld FTMP0, 0(RA) + |1: + | ld TMP0, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | lbu TMP3, TAB:RB->marked + | bne TMP0, RC, >5 + | ld TAB:TMP0, TAB:RB->metatable + | beq CARG2, TISNIL, >4 // Key found, but nil value? + |2: + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table) + | fsd FTMP0, NODE:TMP2->val + | bnez TMP3, >7 + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | beqz TAB:TMP0, <2 // No metatable: done. + | lbu TMP0, TAB:TMP0->nomm + | andi TMP0, TMP0, 1<vmeta_tsets + | + |5: // Follow hash chain. + | mv NODE:TMP2, NODE:TMP1 + | bnez NODE:TMP1, <1 + | // End of hash chain: key not found, add a new one + | + | // But check for __newindex first. + | ld TAB:TMP2, TAB:RB->metatable + | addi CARG3, GL, offsetof(global_State, tmptv) + | beqz TAB:TMP2, >6 // No metatable: continue. + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | sd RC, 0(CARG3) + | sd BASE, L->base + | mv CARG2, TAB:RB + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | // (lua_State *L, GCtab *t, TValue *k) + | call_intern BC_TSETS, lj_tab_newkey + | // Returns TValue *. + | ld BASE, L->base + | fsd FTMP0, 0(CRET1) + | j <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <3 + break; + case BC_TSETB: + | // RA = src*8, RB = table*8, RC = index*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add RA, BASE, RA + | ld TAB:RB, 0(CARG2) + | srliw TMP0, RC, 3 + | checktab RB, ->vmeta_tsetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | bxgeu TMP0, TMP1, ->vmeta_tsetb + | add RC, TMP2, RC + | ld TMP1, 0(RC) + | lbu TMP3, TAB:RB->marked + | beq TMP1, TISNIL, >5 + |1: + | ld CRET1, 0(RA) + | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table) + | sd CRET1, 0(RC) + | bnez TMP1, >7 + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG1, BASE, RB + | add CARG3, BASE, RC + | ld TAB:CARG2, 0(CARG1) + | lw CARG3, 0(CARG3) + | cleartp TAB:CARG2 + | lbu TMP3, TAB:CARG2->marked + | lw TMP0, TAB:CARG2->asize + | ld TMP1, TAB:CARG2->array + | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) + | add RA, BASE, RA + | bnez TMP2, >7 + |2: + | bxgeu CARG3, TMP0, ->vmeta_tsetr // In array part? + | slliw TMP2, CARG3, 3 + | add CRET1, TMP1, TMP2 + |->BC_TSETR_Z: + | ld TMP1, 0(RA) + | ins_next1 + | sd TMP1, 0(CRET1) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, CRET1, <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RD = num_const*8 (start index) + | add RA, BASE, RA + |1: + | add TMP3, KBASE, RD + | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. + | addiw TMP0, MULTRES, -8 + | lw TMP3, 0(TMP3) // Integer constant is in lo-word. + | srliw CARG3, TMP0, 3 + | beqz TMP0, >4 // Nothing to copy? + | cleartp TAB:CARG2 + | addw CARG3, CARG3, TMP3 + | lw TMP2, TAB:CARG2->asize + | slliw TMP1, TMP3, 3 + | lbu TMP3, TAB:CARG2->marked + | ld CARG1, TAB:CARG2->array + | bltu TMP2, CARG3, >5 + | add TMP2, RA, TMP0 + | add TMP1, TMP1, CARG1 + | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |3: // Copy result slots to table. + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | sd CRET1, 0(TMP1) + | addi TMP1, TMP1, 8 + | bltu RA, TMP2, <3 + | bnez TMP0, >7 + |4: + | ins_next + | + |5: // Need to resize array part. + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | mv BASE, RD + | mv CARG1, L + | // (lua_State *L, GCtab *t, int nasize) + | call_intern BC_TSETM, lj_tab_reasize + | // Must not reallocate the stack. + | mv RD, BASE + | ld BASE, L->base // Reload BASE for lack of a saved register. + | j <1 + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP0, <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 + | decode_RDtoRC8 NARGS8:RC, RD + | addw NARGS8:RC, NARGS8:RC, MULTRES + | j ->BC_CALL_Z + break; + case BC_CALL: + | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 + | decode_RDtoRC8 NARGS8:RC, RD + |->BC_CALL_Z: + | mv TMP2, BASE + | add BASE, BASE, RA + | ld LFUNC:RB, 0(BASE) + | addi BASE, BASE, 16 + | addiw NARGS8:RC, NARGS8:RC, -8 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs*8 + | addw NARGS8:RD, NARGS8:RD, MULTRES + | j ->BC_CALLT_Z1 + break; + case BC_CALLT: + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + |->BC_CALLT_Z1: + | add RA, BASE, RA + | ld LFUNC:RB, 0(RA) + | mv NARGS8:RC, RD + | ld TMP1, FRAME_PC(BASE) + | addi RA, RA, 16 + | addiw NARGS8:RC, NARGS8:RC, -8 + | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt + |->BC_CALLT_Z: + | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. + | lbu TMP3, LFUNC:CARG3->ffid + | xori TMP2, TMP1, FRAME_VARG + | bnez TMP0, >7 + |1: + | sd LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. + | sltiu CARG4, TMP3, 2 // (> FF_C) Calling a fast function? + | mv TMP2, BASE + | mv RB, CARG3 + | mv TMP3, NARGS8:RC + | beqz NARGS8:RC, >3 + |2: + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | addiw TMP3, TMP3, -8 + | sd CRET1, 0(TMP2) + | addi TMP2, TMP2, 8 + | bnez TMP3, <2 + |3: + | or TMP0, TMP0, CARG4 + | beqz TMP0, >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | lw INS, -4(TMP1) + | decode_RA8 RA, INS + | sub TMP1, BASE, RA + | ld TMP1, -32(TMP1) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. + | j <4 + | + |7: // Tailcall from a vararg function. + | andi CARG4, TMP2, FRAME_TYPEP + | sub TMP2, BASE, TMP2 // Relocate BASE down. + | bnez CARG4, <1 // Vararg frame below? + | mv BASE, TMP2 + | ld TMP1, FRAME_PC(TMP2) + | andi TMP0, TMP1, FRAME_TYPE + | j <1 + break; + + case BC_ITERC: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) + | mv TMP2, BASE // Save old BASE for vmeta_call. + | add BASE, BASE, RA + | ld RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. + | ld CARG1, -16(BASE) + | ld CARG2, -8(BASE) + | li NARGS8:RC, 16 // Iterators get 2 arguments. + | sd RB, 0(BASE) // Copy callable. + | sd CARG1, 16(BASE) // Copy state. + | sd CARG2, 24(BASE) // Copy control var. + | addi BASE, BASE, 16 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + |.if JIT + | hotloop + |.endif + |->vm_IITERN: + | add RA, BASE, RA + | ld TAB:RB, -16(RA) + | lw RC, -8(RA) // Get index from control var. + | cleartp TAB:RB + | addi PC, PC, 4 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | slli CARG3, TISNUM, 47 + |1: // Traverse array part. + | bleu TMP0, RC, >5 // Index points after array part? + | slliw TMP3, RC, 3 + | add TMP3, TMP1, TMP3 + | ld CARG1, 0(TMP3) + | lhu RD, -4+OFS_RD(PC) // ITERL RD + | or TMP2, RC, CARG3 + | addiw RC, RC, 1 + | beq CARG1, TISNIL, <1 // Skip holes in array part. + | sd TMP2, 0(RA) + | sd CARG1, 8(RA) + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | decode_BC4b RD + | add RD, RD, TMP3 + | sw RC, -8(RA) // Update control var. + | add PC, PC, RD + |3: + | ins_next + | + |5: // Traverse hash part. + | lw TMP1, TAB:RB->hmask + | subw RC, RC, TMP0 + | ld TMP2, TAB:RB->node + |6: + | bltu TMP1, RC, <3 // End of iteration? Branch to ITERL+1. + | slliw TMP3, RC, 5 + | slliw RB, RC, 3 + | subw TMP3, TMP3, RB + | add NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8) + | ld CARG1, 0(NODE:TMP3) + | lhu RD, -4+OFS_RD(PC) // ITERL RD + | addiw RC, RC, 1 + | beq CARG1, TISNIL, <6 // Skip holes in hash part. + | ld CARG2, NODE:TMP3->key + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | sd CARG1, 8(RA) + | addw RC, RC, TMP0 + | decode_BC4b RD + | addw RD, RD, TMP3 + | sd CARG2, 0(RA) + | add PC, PC, RD + | sw RC, -8(RA) // Update control var. + | j <3 + break; + + case BC_ISNEXT: + | // RA = base*8, RD = target (points to ITERN) + | add RA, BASE, RA + | srliw TMP0, RD, 1 + | ld CFUNC:CARG1, -24(RA) + | add TMP0, PC, TMP0 + | ld CARG2, -16(RA) + | ld CARG3, -8(RA) + | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | checkfunc CFUNC:CARG1, >5 + | gettp CARG2, CARG2 + | addi CARG2, CARG2, -LJ_TTAB + | lbu TMP1, CFUNC:CARG1->ffid + | addi CARG3, CARG3, -LJ_TNIL + | or TMP3, CARG2, CARG3 + | addi TMP1, TMP1, -FF_next_N + | or TMP3, TMP3, TMP1 + | lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff + | bnez TMP3, >5 + | add PC, TMP0, TMP2 + | addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800) + | slli TMP1, TMP1, 32 + | sd TMP1, -8(RA) + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | li TMP3, BC_JMP + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) + | add PC, TMP0, TMP2 + |.if JIT + | lb TMP0, OFS_OP(PC) + | li TMP3, BC_ITERN + | lhu TMP2, OFS_RD(PC) + | bne TMP0, TMP3, >6 + |.endif + | sb TMP1, OFS_OP(PC) + | j <1 + |.if JIT + |6: // Unpatch JLOOP. + | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. + | slliw TMP2, TMP2, 3 + | add TMP0, TMP0, TMP2 + | ld TRACE:TMP2, 0(TMP0) + | lw TMP0, TRACE:TMP2->startins + | andi TMP0, TMP0, -256 + | or TMP0, TMP0, TMP1 + | sw TMP0, 0(PC) + | j <1 + |.endif + break; + + case BC_VARG: + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ld TMP0, FRAME_PC(BASE) + | decode_RDtoRC8 RC, RD + | decode_RB8 RB, INS + | add RC, BASE, RC + | add RA, BASE, RA + | addi RC, RC, FRAME_VARG + | add TMP2, RA, RB + | addi TMP3, BASE, -16 // TMP3 = vtop + | sub RC, RC, TMP0 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | sub TMP1, TMP3, RC + | beqz RB, >5 // Copy all varargs? + | addi TMP2, TMP2, -16 + |1: // Copy vararg slots to destination slots. + | ld CARG1, 0(RC) + | sltu TMP0, RC, TMP3 + | addi RC, RC, 8 + | bnez TMP0, >2 + | mv CARG1, TISNIL + |2: + | sd CARG1, 0(RA) + | sltu TMP0, RA, TMP2 + | addi RA, RA, 8 + | bnez TMP0, <1 + |3: + | ins_next + | + |5: // Copy all varargs. + | ld TMP0, L->maxstack + | li MULTRES, 8 // MULTRES = (0+1)*8 + | blez TMP1, <3 // No vararg slots? + | add TMP2, RA, TMP1 + | addi MULTRES, TMP1, 8 + | bltu TMP0, TMP2, >7 + |6: + | ld CRET1, 0(RC) + | addi RC, RC, 8 + | sd CRET1, 0(RA) + | addi RA, RA, 8 + | bltu RC, TMP3, <6 // More vararg slots? + | j <3 + | + |7: // Grow stack for varargs. + | sd RA, L->top + | sub RA, RA, BASE + | sd BASE, L->base + | sub BASE, RC, BASE // Need delta, because BASE may change. + | sd PC, SAVE_PC(sp) + | srliw CARG2, TMP1, 3 + | mv CARG1, L + | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n) + | mv RC, BASE + | ld BASE, L->base + | add RA, BASE, RA + | add RC, BASE, RC + | addi TMP3, BASE, -16 + | j <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RD = extra_nresults*8 + | addw RD, RD, MULTRES + | j ->BC_RET_Z1 + break; + + case BC_RET: + | // RA = results*8, RD = (nresults+1)*8 + |->BC_RET_Z1: + | ld PC, FRAME_PC(BASE) + | add RA, BASE, RA + | mv MULTRES, RD + |1: + | andi TMP0, PC, FRAME_TYPE + | xori TMP1, PC, FRAME_VARG + | bnez TMP0, ->BC_RETV_Z + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return + | lw INS, -4(PC) + | addi TMP2, BASE, -16 + | addi RC, RD, -8 + | decode_RA8 TMP0, INS + | decode_RB8 RB, INS + | sub BASE, TMP2, TMP0 + | add TMP3, TMP2, RB + | beqz RC, >3 + |2: + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | addi RC, RC, -8 + | sd CRET1, 0(TMP2) + | addi TMP2, TMP2, 8 + | bnez RC, <2 + |3: + | addi TMP3, TMP3, -8 + |5: + | bltu TMP2, TMP3, >6 + | ld LFUNC:TMP1, FRAME_FUNC(BASE) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next + | + |6: // Fill up results with nil. + | sd TISNIL, 0(TMP2) + | addi TMP2, TMP2, 8 + | j <5 + | + |->BC_RETV_Z: // Non-standard return case. + | andi TMP2, TMP1, FRAME_TYPEP + | bnez TMP2, ->vm_return + | // Return from vararg function: relocate BASE down. + | sub BASE, BASE, TMP1 + | ld PC, FRAME_PC(BASE) + | j <1 + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | add RA, BASE, RA + | mv MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | xori TMP1, PC, FRAME_VARG + | bnez TMP0, ->BC_RETV_Z + | lw INS, -4(PC) + | addi TMP2, BASE, -16 + if (op == BC_RET1) { + | ld CRET1, 0(RA) + } + | decode_RB8 RB, INS + | decode_RA8 RA, INS + | sub BASE, TMP2, RA + if (op == BC_RET1) { + | sd CRET1, 0(TMP2) + } + |5: + | bltu RD, RB, >6 + | ld TMP1, FRAME_FUNC(BASE) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ins_next1 + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | addi TMP2, TMP2, 8 + | addi RD, RD, 8 + if (op == BC_RET1) { + | sd TISNIL, 0(TMP2) + } else { + | sd TISNIL, -8(TMP2) + } + | j <5 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RD = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | add RA, BASE, RA + | ld CARG1, FORL_IDX*8(RA) // CARG1 = IDX + | ld CARG2, FORL_STEP*8(RA) // CARG2 = STEP + | ld CARG3, FORL_STOP*8(RA) // CARG3 = STOP + | gettp CARG4, CARG1 + | gettp CARG5, CARG2 + | gettp CARG6, CARG3 + if (op != BC_JFORL) { + | srliw RD, RD, 1 + | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J<<2 + | add TMP2, RD, TMP2 + } + | bne CARG4, TISNUM, >3 + | sext.w CARG4, CARG1 // start + | sext.w CARG3, CARG3 // stop + if (!vk) { // init + | bxne CARG6, TISNUM, ->vmeta_for + | bxne CARG5, TISNUM, ->vmeta_for + | bfextri TMP0, CARG2, 31, 31 // sign + | slt CARG2, CARG3, CARG4 + | slt TMP1, CARG4, CARG3 + | neg TMP4, TMP0 + | xor TMP0, TMP1, CARG2 // CARG2 = TMP0 ? TMP1 : CARG2 + | and TMP0, TMP0, TMP4 + | xor CARG2, CARG2, TMP0 // CARG2=0: +,start <= stop or -,start >= stop + } else { + | sext.w CARG5, CARG2 // step + | addw CARG1, CARG4, CARG5 // start + step + | xor TMP3, CARG1, CARG4 // y^a + | xor TMP1, CARG1, CARG5 // y^b + | and TMP3, TMP3, TMP1 + | slt TMP1, CARG1, CARG3 // start+step < stop ? + | slt CARG3, CARG3, CARG1 // stop < start+step ? + | sltz TMP0, CARG5 // step < 0 ? + | sltz TMP3, TMP3 // ((y^a) & (y^b)) < 0: overflow. + | neg TMP4, TMP0 + | xor TMP1, TMP1, CARG3 // CARG3 = TMP0 ? TMP1 : CARG3 + | and TMP1, TMP1, TMP4 + | xor CARG3, CARG3, TMP1 + | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue + | zext.w CARG1, CARG1 + | settp_b CARG1, TISNUM + | sd CARG1, FORL_IDX*8(RA) + } + |1: + if (op == BC_FORI) { + | neg TMP4, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + } else if (op == BC_JFORI) { + | add PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + | addi TMP4, CARG2, -1 // CARG2!=0: next INS; CARG2==0: jump back + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + } + | ins_next1 + | sd CARG1, FORL_EXT*8(RA) + |2: + if (op == BC_JFORI) { + | decode_RD8b RD + | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop + } else if (op == BC_JFORL) { + | beqz CARG2, =>BC_JLOOP + } + | ins_next2 + | + |3: // FP loop. + | fld FTMP0, FORL_IDX*8(RA) // start + | fld FTMP1, FORL_STOP*8(RA) // stop + | ld TMP0, FORL_STEP*8(RA) // step + | sltz CARG2, TMP0 // step < 0 ? + | neg CARG2, CARG2 + if (!vk) { + | sltiu TMP3, CARG4, LJ_TISNUM // start is number ? + | sltiu TMP0, CARG5, LJ_TISNUM // step is number ? + | sltiu TMP1, CARG6, LJ_TISNUM // stop is number ? + | and TMP3, TMP3, TMP1 + | and TMP0, TMP0, TMP3 + | bxeqz TMP0, ->vmeta_for // if start or step or stop isn't number + | flt.d TMP3, FTMP0, FTMP1 // start < stop ? + | flt.d TMP4, FTMP1, FTMP0 // stop < start ? + | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 + | and TMP0, TMP0, CARG2 + | xor CARG2, TMP4, TMP0 // CARG2=0:+,startstop + | j <1 + } else { + | fld FTMP3, FORL_STEP*8(RA) + | fadd.d FTMP0, FTMP0, FTMP3 // start + step + | flt.d TMP3, FTMP0, FTMP1 // start + step < stop ? + | flt.d TMP4, FTMP1, FTMP0 + | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 + | and TMP0, TMP0, CARG2 + | xor CARG2, TMP4, TMP0 + if (op == BC_IFORL) { + | addi TMP3, CARG2, -1 + | and TMP2, TMP2, TMP3 + | add PC, PC, TMP2 + } + | fsd FTMP0, FORL_IDX*8(RA) + | ins_next1 + | fsd FTMP0, FORL_EXT*8(RA) + | j <2 + } + break; + + case BC_ITERL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RD = target + | add RA, BASE, RA + | ld TMP1, 0(RA) + | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | sd TMP1,-8(RA) + | j =>BC_JLOOP + } else { + | branch_RD // Otherwise save control var + branch. + | sd TMP1, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | // RA = base*8, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | // RA = base*8 (ignored), RD = traceno*8 + | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. + | add TMP0, TMP0, RD + | // Traces on RISC-V don't store the trace number, so use 0. + | sd x0, GL->vmstate + | ld TRACE:TMP1, 0(TMP0) + | sd BASE, GL->jit_base // store Current JIT code L->base + | ld TMP1, TRACE:TMP1->mcode + | sd L, GL->tmpbuf.L + | jr TMP1 + |.endif + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RD = target + | branch_RD // PC + (jump - 0x8000)<<2 + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + |.if JIT + | hotcall + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | ld TMP2, L->maxstack + | lbu TMP1, -4+PC2PROTO(numparams)(PC) + | ld KBASE, -4+PC2PROTO(k)(PC) + | bltu TMP2, RA, ->vm_growstack_l + | slliw TMP1, TMP1, 3 // numparams*8 + |2: + | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters. + if (op == BC_JFUNCF) { + | decode_RD8 RD, INS + | j =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | add TMP0, BASE, NARGS8:RC + | sd TISNIL, 0(TMP0) + | addiw NARGS8:RC, NARGS8:RC, 8 + | j <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | li TMP0, LJ_TFUNC + | add TMP1, BASE, RC + | ld TMP2, L->maxstack + | settp LFUNC:RB, TMP0 + | add TMP0, RA, RC + | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | addi TMP2, TMP2, -8 + | addi TMP3, RC, 16+FRAME_VARG + | ld KBASE, -4+PC2PROTO(k)(PC) + | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. + | bgeu TMP0, TMP2, ->vm_growstack_l + | lbu TMP2, -4+PC2PROTO(numparams)(PC) + | mv RA, BASE + | mv RC, TMP1 + | ins_next1 + | addi BASE, TMP1, 16 + | beqz TMP2, >2 + |1: + | ld TMP0, 0(RA) + | sltu CARG2, RA, RC // Less args than parameters? + | addi RA, RA, 8 + | addi TMP1, TMP1, 8 + | addiw TMP2, TMP2, -1 + | beqz CARG2, >3 + | neg TMP4, CARG2 // Clear old fixarg slot (help the GC). + | xor TMP3, TISNIL, TMP0 // CARG1 = CARG2 ? TISNIL : TMP0 + | and TMP3, TMP3, TMP4 + | xor CARG1, TMP0, TMP3 + | sd CARG1, -8(RA) + | sd TMP0, 8(TMP1) + | bnez TMP2, <1 + |2: + | ins_next2 + |3: + | neg TMP4, CARG2 // Clear missing fixargs. + | xor TMP3, TMP0, TISNIL // TMP0 = CARG2 ? TMP0 : TISNIL + | and TMP3, TMP3, TMP4 + | xor TMP0, TISNIL, TMP3 + | sd TMP0, 8(TMP1) + | bnez TMP2, <1 + | j <2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ld CARG4, CFUNC:RB->f + } else { + | ld CARG4, GL->wrapf + } + | add TMP1, RA, NARGS8:RC + | ld TMP2, L->maxstack + | add RC, BASE, NARGS8:RC + | sd BASE, L->base // base of currently excuting function + | sd RC, L->top + | bgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. + | li_vmstate C // li TMP0, ~LJ_VMST_C + if (op == BC_FUNCCW) { + | ld CARG2, CFUNC:RB->f + } + | mv CARG1, L + | st_vmstate // sw TMP0, GL->vmstate + | jalr CARG4 // (lua_State *L [, lua_CFunction f]) + | // Returns nresults. + | ld BASE, L->base + | ld TMP1, L->top + | sd L, GL->cur_L + | slliw RD, CRET1, 3 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of caller. + | sub RA, TMP1, RD // RA = L->top - nresults*8 + | st_vmstate + | j ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.4byte .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.4byte 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 1\n" /* Return address is in ra. */ + "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ + "\t.align 3\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.4byte .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.4byte .Lframe0\n" + "\t.8byte .Lbegin\n" + "\t.8byte %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" + "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */, + fcofs, CFRAME_SIZE); + for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); + fprintf(ctx->fp, + "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ + "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); + for (i = 27; i >= 18; i--) /* offset f31-f18 */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); + fprintf(ctx->fp, + "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ + "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ + "\t.align 3\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.4byte .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.4byte .Lframe0\n" + "\t.4byte lj_vm_ffi_call\n" + "\t.4byte %d\n" + "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ + "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ + "\t.byte 0xd\n\t.uleb128 0x12\n" + "\t.align 3\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe1:\n" + "\t.4byte .LECIE1-.LSCIE1\n" + ".LSCIE1:\n" + "\t.4byte 0\n" + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 1\n" /* Return address is in ra. */ + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" + "\t.4byte lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" + "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ + "\t.align 2\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, + ".LSFDE2:\n" + "\t.4byte .LEFDE2-.LASFDE2\n" + ".LASFDE2:\n" + "\t.4byte .LASFDE2-.Lframe1\n" + "\t.4byte .Lbegin-.\n" + "\t.4byte %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 %d\n" + "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */ + fcofs, CFRAME_SIZE); + for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); + fprintf(ctx->fp, + "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ + "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); + for (i = 27; i >= 18; i--) /* offset f31-f18 */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); + fprintf(ctx->fp, + "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ + "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ + "\t.align 2\n" + ".LEFDE2:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".Lframe2:\n" + "\t.4byte .LECIE2-.LSCIE2\n" + ".LSCIE2:\n" + "\t.4byte 0\n" + "\t.byte 0x1\n" + "\t.string \"zR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 1\n" /* Return address is in ra. */ + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" + "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ + "\t.align 2\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, + ".LSFDE3:\n" + "\t.4byte .LEFDE3-.LASFDE3\n" + ".LASFDE3:\n" + "\t.4byte .LASFDE3- .Lframe2\n" + "\t.4byte lj_vm_ffi_call-.\n" + "\t.4byte %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ + "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ + "\t.byte 0xd\n\t.uleb128 0x12\n" + "\t.align 2\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); +#endif +#endif + break; + default: + break; + } +}