From 34a74ff42de88ef9c8952099757a3097ccfcb727 Mon Sep 17 00:00:00 2001 From: gns Date: Tue, 5 Mar 2024 17:09:31 +0800 Subject: [PATCH 01/22] riscv(support): add RISC-V 64 arch base definition --- src/lj_arch.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/lj_arch.h b/src/lj_arch.h index 112c23269..b85d29e16 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -33,6 +33,8 @@ #define LUAJIT_ARCH_mips64 7 #define LUAJIT_ARCH_S390X 8 #define LUAJIT_ARCH_s390x 8 +#define LUAJIT_ARCH_RISCV64 9 +#define LUAJIT_ARCH_riscv64 9 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -69,6 +71,8 @@ #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 +#elif (defined(__riscv) || defined(__riscv__)) && __riscv_xlen == 64 +#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 #else #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #endif @@ -470,6 +474,20 @@ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_TARGET_GC64 1 #define LJ_ARCH_NOJIT 1 /* NYI */ +#elif LUAJIT_TARGET == LUAJIT_ARCH_RISCV64 + +#define LJ_ARCH_NAME "riscv64" +#define LJ_ARCH_BITS 64 +#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ +#define LJ_TARGET_RISCV64 1 +#define LJ_TARGET_GC64 1 +#define LJ_TARGET_EHRETREG 10 +#define LJ_TARGET_EHRAREG 1 +#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ + AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ +#define LJ_TARGET_MASKSHIFT 1 +#define LJ_TARGET_MASKROT 1 +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #else #error "No target architecture defined" @@ -554,6 +572,10 @@ #error "Only n64 ABI supported for MIPS64" #undef LJ_TARGET_MIPS #endif +#elif LJ_TARGET_RISCV64 +#if !defined(__riscv_float_abi_double) +#error "Only RISC-V 64 double float supported for now" +#endif #endif #endif From 155917ee55c60e4fa7f818a86cb50e2c0b37a377 Mon Sep 17 00:00:00 2001 From: gns Date: Tue, 5 Mar 2024 17:11:11 +0800 Subject: [PATCH 02/22] riscv(dynasm): add RISC-V support --- dynasm/dasm_riscv.h | 435 ++++++++++++++++++ dynasm/dasm_riscv.lua | 979 ++++++++++++++++++++++++++++++++++++++++ dynasm/dasm_riscv32.lua | 12 + dynasm/dasm_riscv64.lua | 12 + 4 files changed, 1438 insertions(+) create mode 100644 dynasm/dasm_riscv.h create mode 100644 dynasm/dasm_riscv.lua create mode 100644 dynasm/dasm_riscv32.lua create mode 100644 dynasm/dasm_riscv64.lua diff --git a/dynasm/dasm_riscv.h b/dynasm/dasm_riscv.h new file mode 100644 index 000000000..b2739fdbb --- /dev/null +++ b/dynasm/dasm_riscv.h @@ -0,0 +1,435 @@ +/* +** DynASM RISC-V encoding engine. +** Copyright (C) 2005-2025 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +** +** Contributed by gns from PLCT Lab, ISCAS. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "riscv" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals. */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl; + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imms(int n) +{ + return (n >= -2048 && n < 2048) ? n : 4096; +} +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 20); + if (action >= DASM__MAX || (ins & 0xf)) { + ofs += 4; + } else { + ins >>= 4; + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: +#ifdef DASM_CHECKS + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); +#endif + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if (ins & 0x8000) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMS: +#ifdef DASM_CHECKS + CK(dasm_imms(n) != 4096, RANGE_I); +#endif + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 20); + if (ins & 0xf) continue; else ins >>= 4; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMMS: pos++; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + if (ins & 0xf) { *cp++ = ins; continue; } + unsigned int action = (ins >> 20); + unsigned int val = (ins >> 4); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (val & 2047), 1); + goto patchrel; + case DASM_ALIGN: + val &= 255; while ((((char *)cp - base) & val)) *cp++ = 0x60000000; + break; + case DASM_REL_LG: + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; + patchrel: + if (val & 2048) { /* B */ + CK((n & 1) == 0 && ((n + 0x1000) >> 13) == 0, RANGE_REL); + cp[-1] |= ((n << 19) & 0x80000000) | ((n << 20) & 0x7e000000) + | ((n << 7) & 0x00000f00) | ((n >> 4) & 0x00000080); + } else { /* J */ + CK((n & 1) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL); + cp[-1] |= ((n << 11) & 0x80000000) | ((n << 20) & 0x7fe00000) + | ((n << 9) & 0x00100000) | (n & 0x000ff000); + } + break; + case DASM_LABEL_LG: + val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((val>>5)&31))-1)) << (val&31); + break; + case DASM_IMMS: + cp[-1] |= (((n << 20) & 0xfe000000) | ((n << 7) & 0x00000f80)); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); + return D->status; +} +#endif + diff --git a/dynasm/dasm_riscv.lua b/dynasm/dasm_riscv.lua new file mode 100644 index 000000000..4c8518f16 --- /dev/null +++ b/dynasm/dasm_riscv.lua @@ -0,0 +1,979 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +-- +-- Contributed by gns from PLCT Lab, ISCAS. +------------------------------------------------------------------------------ + +local riscv32 = riscv32 +local riscv64 = riscv64 + +-- Module information: +local _info = { + arch = riscv32 and "riscv32" or riscv64 and "riscv64", + description = "DynASM RISC-V module", + version = "1.5.0", + vernum = 10500, + release = "2022-07-12", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable = assert, setmetatable +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch = _s.match, _s.gmatch +local concat, sort = table.concat, table.sort +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex + +local function __orderedIndexGen(t) + local orderedIndex = {} + for key in pairs(t) do + table.insert(orderedIndex, key) + end + table.sort( orderedIndex ) + return orderedIndex +end + +local function __orderedNext(t, state) + local key = nil + if state == nil then + t.__orderedIndex = __orderedIndexGen(t) + key = t.__orderedIndex[1] + else + local j = 0 + for _,_ in pairs(t.__orderedIndex) do j = j + 1 end + for i = 1, j do + if t.__orderedIndex[i] == state then + key = t.__orderedIndex[i+1] + end + end + end + + if key then + return key, t[key] + end + + t.__orderedIndex = nil + return +end + +local function opairs(t) + return __orderedNext, t, nil +end + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMMS", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x100000 + (val or 0) * 16) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if band(n, 0xf) == 0 then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= -0x80000000 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. +local map_archdef = { + ra = "x1", sp = "x2", +} -- Ext. register name -> int. name. + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + if s == "x1" then return "ra" + elseif s == "x2" then return "sp" end + return s +end + +------------------------------------------------------------------------------ + +-- Template strings for RISC-V instructions. +local map_op = {} + +local map_op_rv32imafd = { + + -- RV32I + lui_2 = "00000037DU", + auipc_2 = "00000017DA", + + jal_2 = "0000006fDJ", + jalr_3 = "00000067DRJ", + -- pseudo-instrs + j_1 = "0000006fJ", + jal_1 = "000000efJ", + jr_1 = "00000067R", + jalr_1 = "000000e7R", + jalr_2 = "000000e7RJ", + + beq_3 = "00000063RrB", + bne_3 = "00001063RrB", + blt_3 = "00004063RrB", + bge_3 = "00005063RrB", + bltu_3 = "00006063RrB", + bgeu_3 = "00007063RrB", + -- pseudo-instrs + bnez_2 = "00001063RB", + beqz_2 = "00000063RB", + blez_2 = "00005063rB", + bgez_2 = "00005063RB", + bltz_2 = "00004063RB", + bgtz_2 = "00004063rB", + bgt_3 = "00004063rRB", + ble_3 = "00005063rRB", + bgtu_3 = "00006063rRB", + bleu_3 = "00007063rRB", + + lb_2 = "00000003DL", + lh_2 = "00001003DL", + lw_2 = "00002003DL", + lbu_2 = "00004003DL", + lhu_2 = "00005003DL", + + sb_2 = "00000023rS", + sh_2 = "00001023rS", + sw_2 = "00002023rS", + + addi_3 = "00000013DRI", + slti_3 = "00002013DRI", + sltiu_3 = "00003013DRI", + xori_3 = "00004013DRI", + ori_3 = "00006013DRI", + andi_3 = "00007013DRI", + slli_3 = "00001013DRi", + srli_3 = "00005013DRi", + srai_3 = "40005013DRi", + -- pseudo-instrs + seqz_2 = "00103013DR", + ["zext.b_2"] = "0ff07013DR", + + add_3 = "00000033DRr", + sub_3 = "40000033DRr", + sll_3 = "00001033DRr", + slt_3 = "00002033DRr", + sltu_3 = "00003033DRr", + xor_3 = "00004033DRr", + srl_3 = "00005033DRr", + sra_3 = "40005033DRr", + or_3 = "00006033DRr", + and_3 = "00007033DRr", + -- pseudo-instrs + snez_2 = "00003033Dr", + sltz_2 = "00002033DR", + sgtz_2 = "00002033Dr", + + ecall_0 = "00000073", + ebreak_0 = "00100073", + + nop_0 = "00000013", + li_2 = "00000013DI", + mv_2 = "00000013DR", + not_2 = "fff04013DR", + neg_2 = "40000033Dr", + ret_0 = "00008067", + + -- RV32M + mul_3 = "02000033DRr", + mulh_3 = "02001033DRr", + mulhsu_3 = "02002033DRr", + mulhu_3 = "02003033DRr", + div_3 = "02004033DRr", + divu_3 = "02005033DRr", + rem_3 = "02006033DRr", + remu_3 = "02007033DRr", + + -- RV32A + ["lr.w_2"] = "c0000053FR", + ["sc.w_2"] = "c0001053FRr", + ["amoswap.w_3"] = "c0002053FRr", + ["amoadd.w_3"] = "c0003053FRr", + ["amoxor.w_3"] = "c0004053FRr", + ["amoor.w_3"] = "c0005053FRr", + ["amoand.w_3"] = "c0006053FRr", + ["amomin.w_3"] = "c0007053FRr", + ["amomax.w_3"] = "c0008053FRr", + ["amominu.w_3"] = "c0009053FRr", + ["amomaxu.w_3"] = "c000a053FRr", + + -- RV32F + ["flw_2"] = "00002007FL", + ["fsw_2"] = "00002027gS", + + ["fmadd.s_4"] = "00000043FGgH", + ["fmsub.s_4"] = "00000047FGgH", + ["fnmsub.s_4"] = "0000004bFGgH", + ["fnmadd.s_4"] = "0000004fFGgH", + ["fmadd.s_5"] = "00000043FGgHM", + ["fmsub.s_5"] = "00000047FGgHM", + ["fnmsub.s_5"] = "0000004bFGgHM", + ["fnmadd.s_5"] = "0000004fFGgHM", + + ["fadd.s_3"] = "00000053FGg", + ["fsub.s_3"] = "08000053FGg", + ["fmul.s_3"] = "10000053FGg", + ["fdiv.s_3"] = "18000053FGg", + ["fsqrt.s_2"] = "58000053FG", + ["fadd.s_4"] = "00000053FGgM", + ["fsub.s_4"] = "08000053FGgM", + ["fmul.s_4"] = "10000053FGgM", + ["fdiv.s_4"] = "18000053FGgM", + ["fsqrt.s_3"] = "58000053FGM", + + ["fsgnj.s_3"] = "20000053FGg", + ["fsgnjn.s_3"] = "20001053FGg", + ["fsgnjx.s_3"] = "20002053FGg", + + ["fmin.s_3"] = "28000053FGg", + ["fmax.s_3"] = "28001053FGg", + + ["fcvt.w.s_2"] = "c0000053DG", + ["fcvt.wu.s_2"] = "c0100053DG", + ["fcvt.w.s_3"] = "c0000053DGM", + ["fcvt.wu.s_3"] = "c0100053DGM", + ["fmv.x.w_2"] = "e0000053DG", + + ["feq.s_3"] = "a0002053DGg", + ["flt.s_3"] = "a0001053DGg", + ["fle.s_3"] = "a0000053DGg", + + ["fclass.s_2"] = "e0001053DG", + + ["fcvt.s.w_2"] = "d0000053FR", + ["fcvt.s.wu_2"] = "d0100053FR", + ["fcvt.s.w_3"] = "d0000053FRM", + ["fcvt.s.wu_3"] = "d0100053FRM", + ["fmv.w.x_2"] = "f0000053FR", + + -- RV32D + ["fld_2"] = "00003007FL", + ["fsd_2"] = "00003027gS", + + ["fmadd.d_4"] = "02000043FGgH", + ["fmsub.d_4"] = "02000047FGgH", + ["fnmsub.d_4"] = "0200004bFGgH", + ["fnmadd.d_4"] = "0200004fFGgH", + ["fmadd.d_5"] = "02000043FGgHM", + ["fmsub.d_5"] = "02000047FGgHM", + ["fnmsub.d_5"] = "0200004bFGgHM", + ["fnmadd.d_5"] = "0200004fFGgHM", + + ["fadd.d_3"] = "02000053FGg", + ["fsub.d_3"] = "0a000053FGg", + ["fmul.d_3"] = "12000053FGg", + ["fdiv.d_3"] = "1a000053FGg", + ["fsqrt.d_2"] = "5a000053FG", + ["fadd.d_4"] = "02000053FGgM", + ["fsub.d_4"] = "0a000053FGgM", + ["fmul.d_4"] = "12000053FGgM", + ["fdiv.d_4"] = "1a000053FGgM", + ["fsqrt.d_3"] = "5a000053FGM", + + ["fsgnj.d_3"] = "22000053FGg", + ["fsgnjn.d_3"] = "22001053FGg", + ["fsgnjx.d_3"] = "22002053FGg", + ["fmin.d_3"] = "2a000053FGg", + ["fmax.d_3"] = "2a001053FGg", + ["fcvt.s.d_2"] = "40100053FG", + ["fcvt.d.s_2"] = "42000053FG", + ["feq.d_3"] = "a2002053DGg", + ["flt.d_3"] = "a2001053DGg", + ["fle.d_3"] = "a2000053DGg", + ["fclass.d_2"] = "e2001053DG", + ["fcvt.w.d_2"] = "c2000053DG", + ["fcvt.wu.d_2"] = "c2100053DG", + ["fcvt.d.w_2"] = "d2000053FR", + ["fcvt.d.wu_2"] = "d2100053FR", + ["fcvt.w.d_3"] = "c2000053DGM", + ["fcvt.wu.d_3"] = "c2100053DGM", + ["fcvt.d.w_3"] = "d2000053FRM", + ["fcvt.d.wu_3"] = "d2100053FRM", + + ["fmv.d_2"] = "22000053FY", + ["fneg.d_2"] = "22001053FY", + ["fabs.d_2"] = "22002053FY", + +} + +local map_op_rv64imafd = { + + -- RV64I + lwu_2 = "00006003DL", + ld_2 = "00003003DL", + + sd_2 = "00003023rS", + + slli_3 = "00001013DRj", + srli_3 = "00005013DRj", + srai_3 = "40005013DRj", + + addiw_3 = "0000001bDRI", + slliw_3 = "0000101bDRi", + srliw_3 = "0000501bDRi", + sraiw_3 = "4000501bDRi", + + addw_3 = "0000003bDRr", + subw_3 = "4000003bDRr", + sllw_3 = "0000103bDRr", + srlw_3 = "0000503bDRr", + sraw_3 = "4000503bDRr", + + negw_2 = "4000003bDr", + ["sext.w_2"] = "0000001bDR", + + -- RV64M + mulw_3 = "0200003bDRr", + divw_3 = "0200403bDRr", + divuw_3 = "0200503bDRr", + remw_3 = "0200603bDRr", + remuw_3 = "0200703bDRr", + + -- RV64A + ["lr.d_2"] = "c2000053FR", + ["sc.d_2"] = "c2001053FRr", + ["amoswap.d_3"] = "c2002053FRr", + ["amoadd.d_3"] = "c2003053FRr", + ["amoxor.d_3"] = "c2004053FRr", + ["amoor.d_3"] = "c2005053FRr", + ["amoand.d_3"] = "c2006053FRr", + ["amomin.d_3"] = "c2007053FRr", + ["amomax.d_3"] = "c2008053FRr", + ["amominu.d_3"] = "c2009053FRr", + ["amomaxu.d_3"] = "c200a053FRr", + + -- RV64F + ["fcvt.l.s_2"] = "c0200053DG", + ["fcvt.lu.s_2"] = "c0300053DG", + ["fcvt.l.s_3"] = "c0200053DGM", + ["fcvt.lu.s_3"] = "c0300053DGM", + ["fcvt.s.l_2"] = "d0200053FR", + ["fcvt.s.lu_2"] = "d0300053FR", + ["fcvt.s.l_3"] = "d0200053FRM", + ["fcvt.s.lu_3"] = "d0300053FRM", + + -- RV64D + ["fcvt.l.d_2"] = "c2200053DG", + ["fcvt.lu.d_2"] = "c2300053DG", + ["fcvt.l.d_3"] = "c2200053DGM", + ["fcvt.lu.d_3"] = "c2300053DGM", + ["fmv.x.d_2"] = "e2000053DG", + ["fcvt.d.l_2"] = "d2200053FR", + ["fcvt.d.lu_2"] = "d2300053FR", + ["fcvt.d.l_3"] = "d2200053FRM", + ["fcvt.d.lu_3"] = "d2300053FRM", + ["fmv.d.x_2"] = "f2000053FR", + +} + +local map_op_zicsr = { + csrrw_3 = "00001073DCR", + csrrs_3 = "00002073DCR", + csrrc_3 = "00003073DCR", + csrrwi_3 = "00005073DCu", + csrrsi_3 = "00006073DCu", + csrrci_3 = "00007073DCu", + + -- pseudo-ops + csrrw_2 = "00001073DC", + csrrs_2 = "00002073CR", + csrrc_2 = "00003073CR", + csrrwi_2 = "00005073Cu", + csrrsi_2 = "00006073Cu", + csrrci_2 = "00007073Cu", + + rdinstret_1 = "C0202073D", + rdcycle_1 = "C0002073D", + rdtime_1 = "C0102073D", + rdinstreth_1 = "C8202073D", + rdcycleh_1 = "C8002073D", + rdtimeh_1 = "C8102073D", + + frcsr_1 = "00302073D", + fscsr_2 = "00301073DR", + fscsr_1 = "00301073R", + frrm_1 = "00202073D", + fsrm_2 = "00201073DR", + fsrm_1 = "00201073R", + fsrmi_2 = "00205073Du", + fsrmi_1 = "00205073u", + frflags_1 = "00102073D", + fsflags_2 = "00101073DR", + fsflagsi_2 = "00105073Du", + fsflagsi_1 = "00105073u", +} + +local map_op_zifencei = { + ["fence.i_3"] = "0000100fDRI", +} + +local list_map_op_rv32 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_zifencei, ['c'] = map_op_zicsr } +local list_map_op_rv64 = { ['a'] = map_op_rv32imafd, ['b'] = map_op_rv64imafd, ['c'] = map_op_zifencei, ['d'] = map_op_zicsr } + +if riscv32 then for _, map in opairs(list_map_op_rv32) do + for k, v in pairs(map) do map_op[k] = v end + end +end +if riscv64 then for _, map in opairs(list_map_op_rv64) do + for k, v in pairs(map) do map_op[k] = v end + end +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(x[1-3]?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^x([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_fpr(expr) + local r = match(expr, "^f([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_imm(imm, bits, shift, scale, signed, action) + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[xf]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction(action or "IMM", + (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) + return 0 + end +end + +local function parse_csr(expr) + local r = match(expr, "^([1-4]?[0-9]?[0-9]?[0-9])$") + if r then + r = tonumber(r) + if r <= 4095 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_imms(imm) + local n = tonumber(imm) + if n then + if n >= -2048 and n < 2048 then + local imm5, imm7 = band(n, 0x1f), shr(band(n, 0xfe0), 5) + return shl(imm5, 7) + shl(imm7, 25) + end + werror("out of range immediate `"..imm.."'") + elseif match(imm, "^[xf]([1-3]?[0-9])$") or + match(imm, "^([%w_]+):([xf][1-3]?[0-9])$") then + werror("expected immediate operand, got register") + else + waction("IMMS", 0, imm); return 0 + end +end + +local function parse_rm(mode) + local rnd_mode = { + rne = 0, rtz = 1, rdn = 2, rup = 3, rmm = 4, dyn = 7 + } + local n = rnd_mode[mode] + if n then return n + else werror("bad rounding mode `"..mode.."'") end +end + +local function parse_disp(disp, mode) + local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") + if imm then + local r = shl(parse_gpr(reg), 15) + local extname = match(imm, "^extern%s+(%S+)$") + if extname then + waction("REL_EXT", map_extern[extname], nil, 1) + return r + else + if mode == "load" then + return r + parse_imm(imm, 12, 20, 0, true) + elseif mode == "store" then + return r + parse_imms(imm) + else + werror("bad displacement mode '"..mode.."'") + end + end + end + local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local r, tp = parse_gpr(reg) + if tp then + if mode == "load" then + waction("IMM", 32768+12*32+20, format(tp.ctypefmt, tailr)) + elseif mode == "store" then + waction("IMMS", 0, format(tp.ctypefmt, tailr)) + else + werror("bad displacement mode '"..mode.."'") + end + return shl(r, 15) + end + end + werror("bad displacement `"..disp.."'") +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +map_op[".template__"] = function(params, template, nparams) + if not params then return sub(template, 9) end + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 2 positions (ins/ext). + if secpos+2 > maxsecpos then wflush() end + local pos = wpos() + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + if p == "D" then -- gpr rd + op = op + shl(parse_gpr(params[n]), 7); n = n + 1 + elseif p == "R" then -- gpr rs1 + op = op + shl(parse_gpr(params[n]), 15); n = n + 1 + elseif p == "r" then -- gpr rs2 + op = op + shl(parse_gpr(params[n]), 20); n = n + 1 + elseif p == "F" then -- fpr rd + op = op + shl(parse_fpr(params[n]), 7); n = n + 1 + elseif p == "G" then -- fpr rs1 + op = op + shl(parse_fpr(params[n]), 15); n = n + 1 + elseif p == "g" then -- fpr rs2 + op = op + shl(parse_fpr(params[n]), 20); n = n + 1 + elseif p == "H" then -- fpr rs3 + op = op + shl(parse_fpr(params[n]), 27); n = n + 1 + elseif p == "C" then -- csr + op = op + shl(parse_csr(params[n]), 20); n = n + 1 + elseif p == "M" then -- fpr rounding mode + op = op + shl(parse_rm(params[n]), 12); n = n + 1 + elseif p == "Y" then -- fpr psuedo-op + local r = parse_fpr(params[n]) + op = op + shl(r, 15) + shl(r, 20); n = n + 1 + elseif p == "I" then -- I-type imm12 + op = op + parse_imm(params[n], 12, 20, 0, true); n = n + 1 + elseif p == "i" then -- I-type shamt5 + op = op + parse_imm(params[n], 5, 20, 0, false); n = n + 1 + elseif p == "j" then -- I-type shamt6 + op = op + parse_imm(params[n], 6, 20, 0, false); n = n + 1 + elseif p == "u" then -- I-type uimm + op = op + parse_imm(params[n], 5, 15, 0, false); n = n + 1 + elseif p == "U" then -- U-type imm20 + op = op + parse_imm(params[n], 20, 12, 0, false); n = n + 1 + elseif p == "L" then -- load + op = op + parse_disp(params[n], "load"); n = n + 1 + elseif p == "S" then -- store + op = op + parse_disp(params[n], "store"); n = n + 1 + elseif p == "B" or p == "J" then -- control flow + local mode, m, s = parse_label(params[n], false) + if p == "B" then m = m + 2048 end + waction("REL_"..mode, m, s, 1); n = n + 1 + elseif p == "A" then -- AUIPC + local mode, m, s = parse_label(params[n], false) + waction("REL_"..mode, m, s, 1); n = n + 1 + else + assert(false) + end + end + wputpos(pos, op) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/dynasm/dasm_riscv32.lua b/dynasm/dasm_riscv32.lua new file mode 100644 index 000000000..f194ce1dc --- /dev/null +++ b/dynasm/dasm_riscv32.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V 32 module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 32 bit mode for the combined RISC-V module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +riscv32 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") diff --git a/dynasm/dasm_riscv64.lua b/dynasm/dasm_riscv64.lua new file mode 100644 index 000000000..25274395d --- /dev/null +++ b/dynasm/dasm_riscv64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V 64 module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined RISC-V module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +riscv64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") From 3f2a9a27ebc9490b9f3cb77663b8c03655527d47 Mon Sep 17 00:00:00 2001 From: gns Date: Tue, 5 Mar 2024 18:03:33 +0800 Subject: [PATCH 03/22] riscv(interp): add register definition --- src/vm_riscv64.dasc | 81 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 src/vm_riscv64.dasc diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc new file mode 100644 index 000000000..41c14eebb --- /dev/null +++ b/src/vm_riscv64.dasc @@ -0,0 +1,81 @@ +|// Low-level VM code for RISC-V 64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +|// +|// Contributed by gns from PLCT Lab, ISCAS. +| +|.arch riscv64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|// Note: The ragged indentation of the instructions is intentional. +|// The starting columns indicate data dependencies. +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter. +|// Don't use: x0 = 0, x1 = ra, x2 = sp, x3 = gp, x4 = tp +| +| +|// The following must be C callee-save (but BASE is often refetched). +|.define BASE, x18 // Base of current Lua stack frame. +|.define KBASE, x19 // Constants of current Lua function. +|.define PC, x20 // Next PC. +|.define GLREG, x21 // Global state. +|.define DISPATCH, x22 // Opcode dispatch table. +|.define LREG, x23 // Register holding lua_State (also in SAVE_L). +|.define MULTRES, x24 // Size of multi-result: (nresults+1)*8. +| +|// Constants for type-comparisons, stores and conversions. C callee-save. +|.define TISNIL, x8 +|.define TISNUM, x25 +|.define TOBIT, f27 // 2^52 + 2^51. +| +|// The following temporaries are not saved across C calls, except for RA. +|.define RA, x9 // Callee-save. +|.define RB, x14 +|.define RC, x15 +|.define RD, x16 +|.define INS, x17 +| +|.define TMP0, x6 +|.define TMP1, x7 +|.define TMP2, x28 +|.define TMP3, x29 +|.define TMP4, x30 +| +|// RISC-V lp64d calling convention. +|.define CFUNCADDR, x5 +|.define CARG1, x10 +|.define CARG2, x11 +|.define CARG3, x12 +|.define CARG4, x13 +|.define CARG5, x14 +|.define CARG6, x15 +|.define CARG7, x16 +|.define CARG8, x17 +| +|.define CRET1, x10 +|.define CRET2, x11 +| +|.define FARG1, f10 +|.define FARG2, f11 +|.define FARG3, f12 +|.define FARG4, f13 +|.define FARG5, f14 +|.define FARG6, f15 +|.define FARG7, f16 +|.define FARG8, f17 +| +|.define FRET1, f10 +|.define FRET2, f11 +| +|.define FTMP0, f0 +|.define FTMP1, f1 +|.define FTMP2, f2 +|.define FTMP3, f3 +|.define FTMP4, f4 From ac5e7a826c881028bc42b4fc1ff474f6c22253f6 Mon Sep 17 00:00:00 2001 From: gns Date: Tue, 5 Mar 2024 18:05:22 +0800 Subject: [PATCH 04/22] riscv(interp): add frame definition --- src/lj_frame.h | 9 +++++ src/vm_riscv64.dasc | 83 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/src/lj_frame.h b/src/lj_frame.h index 2fb1b2f3e..440e83c36 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -287,6 +287,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ ** need to change to 3. */ #define CFRAME_SHIFT_MULTRES 0 +#elif LJ_TARGET_RISCV64 +#define CFRAME_OFS_ERRF 252 +#define CFRAME_OFS_NRES 248 +#define CFRAME_OFS_PREV 240 +#define CFRAME_OFS_L 232 +#define CFRAME_OFS_PC 224 +#define CFRAME_OFS_MULTRES 0 +#define CFRAME_SIZE 256 +#define CFRAME_SHIFT_MULTRES 3 #else #error "Missing CFRAME_* definitions for this architecture" #endif diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index 41c14eebb..63977f5f6 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -79,3 +79,86 @@ |.define FTMP2, f2 |.define FTMP3, f3 |.define FTMP4, f4 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|// RISC-V 64 lp64d. +| +|.define CFRAME_SPACE, 256 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 252 // 32 bit values. +|.define SAVE_NRES, 248 +|.define SAVE_CFRAME, 240 // 64 bit values. +|.define SAVE_L, 232 +|.define SAVE_PC, 224 +|//----- 16 byte aligned +|// Padding 216 +|.define SAVE_GPR_, 112 // .. 112+13*8: 64 bit GPR saves. +|.define SAVE_FPR_, 16 // .. 16+12*8: 64 bit FPR saves. +| +| +|.define TMPD, 0 +|//----- 16 byte aligned +| +|.define TMPD_OFS, 0 +| +|//----------------------------------------------------------------------- +| +|.macro saveregs +| addi sp, sp, -CFRAME_SPACE +| fsd f27, SAVE_FPR_+11*8(sp) +| fsd f26, SAVE_FPR_+10*8(sp) +| fsd f25, SAVE_FPR_+9*8(sp) +| fsd f24, SAVE_FPR_+8*8(sp) +| fsd f23, SAVE_FPR_+7*8(sp) +| fsd f22, SAVE_FPR_+6*8(sp) +| fsd f21, SAVE_FPR_+5*8(sp) +| fsd f20, SAVE_FPR_+4*8(sp) +| fsd f19, SAVE_FPR_+3*8(sp) +| fsd f18, SAVE_FPR_+2*8(sp) +| fsd f9, SAVE_FPR_+1*8(sp) +| fsd f8, SAVE_FPR_+0*8(sp) +| sd ra, SAVE_GPR_+12*8(sp) +| sd x27, SAVE_GPR_+11*8(sp) +| sd x26, SAVE_GPR_+10*8(sp) +| sd x25, SAVE_GPR_+9*8(sp) +| sd x24, SAVE_GPR_+8*8(sp) +| sd x23, SAVE_GPR_+7*8(sp) +| sd x22, SAVE_GPR_+6*8(sp) +| sd x21, SAVE_GPR_+5*8(sp) +| sd x20, SAVE_GPR_+4*8(sp) +| sd x19, SAVE_GPR_+3*8(sp) +| sd x18, SAVE_GPR_+2*8(sp) +| sd x9, SAVE_GPR_+1*8(sp) +| sd x8, SAVE_GPR_+0*8(sp) +|.endmacro +| +|.macro restoreregs_ret +| ld ra, SAVE_GPR_+12*8(sp) +| ld x27, SAVE_GPR_+11*8(sp) +| ld x26, SAVE_GPR_+10*8(sp) +| ld x25, SAVE_GPR_+9*8(sp) +| ld x24, SAVE_GPR_+8*8(sp) +| ld x23, SAVE_GPR_+7*8(sp) +| ld x22, SAVE_GPR_+6*8(sp) +| ld x21, SAVE_GPR_+5*8(sp) +| ld x20, SAVE_GPR_+4*8(sp) +| ld x19, SAVE_GPR_+3*8(sp) +| ld x18, SAVE_GPR_+2*8(sp) +| ld x9, SAVE_GPR_+1*8(sp) +| ld x8, SAVE_GPR_+0*8(sp) +| fld f27, SAVE_FPR_+11*8(sp) +| fld f26, SAVE_FPR_+10*8(sp) +| fld f25, SAVE_FPR_+9*8(sp) +| fld f24, SAVE_FPR_+8*8(sp) +| fld f23, SAVE_FPR_+7*8(sp) +| fld f22, SAVE_FPR_+6*8(sp) +| fld f21, SAVE_FPR_+5*8(sp) +| fld f20, SAVE_FPR_+4*8(sp) +| fld f19, SAVE_FPR_+3*8(sp) +| fld f18, SAVE_FPR_+2*8(sp) +| fld f9, SAVE_FPR_+1*8(sp) +| fld f8, SAVE_FPR_+0*8(sp) +| addi sp, sp, CFRAME_SPACE +| ret +|.endmacro From 8a1761f781467752582678109db0f1ec4aea1e39 Mon Sep 17 00:00:00 2001 From: gns Date: Tue, 5 Mar 2024 18:07:52 +0800 Subject: [PATCH 05/22] riscv(interp): add helper macros and typedefs --- src/vm_riscv64.dasc | 353 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index 63977f5f6..562d610dd 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -162,3 +162,356 @@ | addi sp, sp, CFRAME_SPACE | ret |.endmacro +| +|//----------------------------------------------------------------------- +| +|// Pseudo-instruction macros +|// Be cautious with local label 9 since we use them here! +|.macro bxeq, a, b, tgt +| bne a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxne, a, b, tgt +| beq a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxlt, a, b, tgt +| bge a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxge, a, b, tgt +| blt a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxgt, a, b, tgt +| bge b, a, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxle, a, b, tgt +| blt b, a, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxltu, a, b, tgt +| bgeu a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxgeu, a, b, tgt +| bltu a, b, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxgtu, a, b, tgt +| bgeu b, a, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxleu, a, b, tgt +| bltu b, a, >9 +| j tgt +|9: +|.endmacro +| +|.macro bxeqz, a, tgt +| bxeq a, x0, tgt +|.endmacro +| +|.macro bxnez, a, tgt +| bxne a, x0, tgt +|.endmacro +| +|.macro bxlez, a, tgt +| bxge x0, a, tgt +|.endmacro +| +|.macro bxgez, a, tgt +| bxge a, x0, tgt +|.endmacro +| +|.macro bxltz, a, tgt +| bxlt a, x0, tgt +|.endmacro +| +|.macro bxgtz, a, tgt +| bxlt x0, a, tgt +|.endmacro +| +|.macro lxi, a, b +| lui a, (b)&0xfffff +| srai a, a, 12 +|.endmacro +| +|.macro lzi, a, b +| lui a, (b)&0xfffff +| srli a, a, 12 +|.endmacro +| +|.macro addxi, a, b, c +| lui x31, (c)&0xfffff +| srai x31, x31, 12 +| add a, x31, b +|.endmacro +| +|.macro sext.b, a, b +| slli a, b, 56 +| srai a, a, 56 +|.endmacro +| +|.macro sext.h, a, b +| slli a, b, 48 +| srai a, a, 48 +|.endmacro +| +|.macro zext.h, a, b +| slli a, b, 48 +| srli a, a, 48 +|.endmacro +| +|.macro zext.w, a, b +| slli a, b, 32 +| srli a, a, 32 +|.endmacro +| +|.macro bfextri, a, b, c, d +| slli a, b, (63-c) +| srli a, a, (d+63-c) +|.endmacro +| +|//----------------------------------------------------------------------- +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State, GLREG +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; .long 0x00100073; .endmacro +| +|//----------------------------------------------------------------------- +| +|// Access to frame relative to BASE. +|.define FRAME_PC, -8 +|.define FRAME_FUNC, -16 +| +|//----------------------------------------------------------------------- +| +|// Endian-specific defines. RISC-V only has little endian ABI for now. +|.define OFS_RD, 2 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +| +|// Instruction decode. +|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro +|.macro decode_BC4b, dst; slliw dst, dst, 2; .endmacro +|.macro decode_BC8b, dst; slliw dst, dst, 3; .endmacro +|.macro decode_RX8b, dst; andi dst, dst, 0x7f8; .endmacro +| +|.macro decode_OP8a, dst, ins; decode_OP1 dst, ins; .endmacro +|.macro decode_OP8b, dst; decode_BC8b dst; .endmacro +|.macro decode_RA8a, dst, ins; srliw dst, ins, 5; .endmacro +|.macro decode_RA8b, dst; decode_RX8b dst; .endmacro +|.macro decode_RB8a, dst, ins; srliw dst, ins, 21; .endmacro +|.macro decode_RB8b, dst; decode_RX8b dst; .endmacro +|.macro decode_RC8a, dst, ins; srliw dst, ins, 13; .endmacro +|.macro decode_RC8b, dst; decode_RX8b dst; .endmacro +|.macro decode_RD8a, dst, ins; srliw dst, ins, 16; .endmacro +|.macro decode_RD4b, dst; decode_BC4b dst; .endmacro +|.macro decode_RD8b, dst; decode_BC8b dst; .endmacro +|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro +| +|.macro decode_OP8, dst, ins; decode_OP1 dst, ins; decode_BC8b dst; .endmacro +|.macro decode_RA8, dst, ins; decode_RA8a dst, ins; decode_RA8b dst; .endmacro +|.macro decode_RB8, dst, ins; decode_RB8a dst, ins; decode_RB8b dst; .endmacro +|.macro decode_RC8, dst, ins; decode_RC8a dst, ins; decode_RC8b dst; .endmacro +|.macro decode_RD8, dst, ins; decode_RD8a dst, ins; decode_RD8b dst; .endmacro +| +|// Instruction fetch. +|.macro ins_NEXT1 +| lw INS, 0(PC) +| addi PC, PC, 4 +|.endmacro +|// Instruction decode+dispatch. +|.macro ins_NEXT2 +| decode_OP8 TMP1, INS +| add TMP0, DISPATCH, TMP1 +| decode_RD8a RD, INS +| ld TMP4, 0(TMP0) +| decode_RA8a RA, INS +| decode_RD8b RD +| decode_RA8b RA +| jr TMP4 +|.endmacro +|.macro ins_NEXT +| ins_NEXT1 +| ins_NEXT2 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +| .define ins_next1, ins_NEXT1 +| .define ins_next2, ins_NEXT2 +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| j ->ins_next +| .endmacro +| .macro ins_next1 +| .endmacro +| .macro ins_next2 +| j ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ld PC, LFUNC:RB->pc +| lw INS, 0(PC) +| addi PC, PC, 4 +| decode_OP8 TMP1, INS +| decode_RA8 RA, INS +| add TMP0, DISPATCH, TMP1 +| ld TMP0, 0(TMP0) +| add RA, RA, BASE +| jr TMP0 +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| sd PC, FRAME_PC(BASE) +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|.macro branch_RD +| srliw TMP0, RD, 1 +| lui TMP4, (-(BCBIAS_J*4 >> 12)) & 0xfffff +| addw TMP0, TMP0, TMP4 +| add PC, PC, TMP0 +|.endmacro +| +|// Assumes J is relative to GL. Some J members might be out of range though. +#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro call_intern, curfunc, func +|->curfunc .. _pcrel_ .. func: +| auipc CFUNCADDR, extern %pcrel_hi(func) +| jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func) +|.endmacro +|.macro call_extern, func +| call extern func +| empty +|.endmacro +| +|// Set current VM state. Uses TMP0. +|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro +|.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp, target +| ld tmp, GL->gc.grayagain +| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) +| sd tab, GL->gc.grayagain +| sb mark, tab->marked +| sd tmp, tab->gclist +| j target +|.endmacro +| +|// Clear type tag. Isolate lowest 64-17=47 bits of reg. +|.macro cleartp, reg; slli reg, reg, 17; srli reg, reg, 17; .endmacro +|.macro cleartp, dst, reg; slli dst, reg, 17; srli dst, dst, 17; .endmacro +| +|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. +|.macro settp_a, dst; cleartp dst; .endmacro +|.macro settp_a, dst, src; cleartp dst, src; .endmacro +|.macro settp_b, dst, tp; +| slli x31, tp, 47 +| or dst, dst, x31 +|.endmacro +|.macro settp_b, dst, src, tp; +| slli x31, tp, 47 +| or dst, src, x31 +|.endmacro +|.macro settp, dst, tp; settp_a dst; settp_b dst, tp; .endmacro +|.macro settp, dst, src, tp; settp_a dst, src; settp_b dst, dst, tp; .endmacro +| +|// Extract (negative) type tag. +|.macro gettp, dst, src; srai dst, src, 47; .endmacro +| +|// Macros to check the TValue type and extract the GCobj. Branch on failure. +|.macro checktp, reg, tp, target +| gettp TMP4, reg +| addi TMP4, TMP4, tp +| cleartp reg +| bxnez TMP4, target +|.endmacro +|.macro checktp, dst, reg, tp, target +| gettp TMP4, reg +| addi TMP4, TMP4, tp +| cleartp dst, reg +| bxnez TMP4, target +|.endmacro +|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro +|.macro checkint, reg, target +| gettp TMP4, reg +| bxne TMP4, TISNUM, target +|.endmacro +|.macro checknum, reg, target +| gettp TMP4, reg +| sltiu TMP4, TMP4, LJ_TISNUM +| bxeqz TMP4, target +|.endmacro +| +|.macro mov_false, reg +| li reg, 0x001 +| slli reg, reg, 47 +| not reg, reg +|.endmacro +|.macro mov_true, reg +| li reg, 0x001 +| slli reg, reg, 48 +| not reg, reg +|.endmacro +| +|//----------------------------------------------------------------------- From 7dcaa4d77bd4f4068cff15e0421fda5166bce4d8 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 08:40:08 +0800 Subject: [PATCH 06/22] riscv(interp): add base assembly interpreter VM --- src/lj_vm.h | 3 + src/lj_vmmath.c | 3 +- src/vm_riscv64.dasc | 3584 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 3586 insertions(+), 4 deletions(-) diff --git a/src/lj_vm.h b/src/lj_vm.h index 63d094396..4b1cbf1d9 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]); #if LJ_TARGET_PPC void lj_vm_cachesync(void *start, void *end); #endif +#if LJ_TARGET_RISCV64 +void lj_vm_fence_rw_rw(); +#endif LJ_ASMF double lj_vm_foldarith(double x, double y, int op); #if LJ_HASJIT LJ_ASMF double lj_vm_foldfpm(double x, int op); diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index 3351e72b4..1ee32d018 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double y, int op) /* -- Helper functions for generated machine code ------------------------- */ -#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS +#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS \ + || LJ_TARGET_RISCV64 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index 562d610dd..87327c798 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -437,9 +437,11 @@ | auipc CFUNCADDR, extern %pcrel_hi(func) | jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func) |.endmacro -|.macro call_extern, func -| call extern func -| empty +|.macro call_extern, curfunc, func +|->curfunc .. _got_pcrel_ .. func: +| auipc CFUNCADDR, extern %got_pcrel_hi(func) +| ld CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _got_pcrel_ .. func)(CFUNCADDR) +| jalr CFUNCADDR |.endmacro | |// Set current VM state. Uses TMP0. @@ -515,3 +517,3579 @@ |.endmacro | |//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: TMP2 = previous base. + | andi TMP0, PC, FRAME_P + | + | // Return from pcall or xpcall fast func. + | mov_true TMP1 + | bxeqz TMP0, ->cont_dispatch + | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | mv BASE, TMP2 // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. + | sd TMP1, -8(RA) // Prepend true to results. + | addi RA, RA, -8 + | + |->vm_returnc: + | addiw RD, RD, 8 // RD = (nresults+1)*8. + | andi TMP0, PC, FRAME_TYPE + | li CRET1, LUA_YIELD + | bxeqz RD, ->vm_unwind_c_eh + | mv MULTRES, RD + | bxeqz TMP0, ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return + | // TMP0 = PC & FRAME_TYPE + | andi TMP2, PC, ~FRAME_TYPEP + | xori TMP0, TMP0, FRAME_C + | sub TMP2, BASE, TMP2 // TMP2 = previous base. + | bxnez TMP0, ->vm_returnp + | + | addiw TMP1, RD, -8 + | sd TMP2, L->base + | li_vmstate C + | lw TMP2, SAVE_NRES(sp) + | addi BASE, BASE, -16 + | st_vmstate + | slliw TMP2, TMP2, 3 + | beqz TMP1, >2 + |1: + | addiw TMP1, TMP1, -8 + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | sd CRET1, 0(BASE) + | addi BASE, BASE, 8 + | bnez TMP1, <1 + | + |2: + | bne TMP2, RD, >6 + |3: + | sd BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ld TMP0, SAVE_CFRAME(sp) // Restore previous C frame. + | mv CRET1, x0 // Ok return status for vm_pcall. + | sd TMP0, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | ld TMP1, L->maxstack + | blt TMP2, RD, >7 + | // More results wanted. Check stack size and fill up results with nil. + | bge BASE, TMP1, >9 + | sd TISNIL, 0(BASE) + | addiw RD, RD, 8 + | addi BASE, BASE, 8 + | j <2 + | + |7: // Less results wanted. + | subw TMP0, RD, TMP2 + | sub TMP0, BASE, TMP0 // Either keep top or shrink it. + | beqz TMP2, >8 + | mv BASE, TMP0 // LUA_MULTRET+1 case + |8: + | j <3 + | + |9: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | + | sd BASE, L->top // Save current top held in BASE (yes). + | mv MULTRES, RD + | srliw CARG2, TMP2, 3 + | mv CARG1, L + | call_intern vm_leave_unw, lj_state_growstack // (lua_State *L, int n) + | lw TMP2, SAVE_NRES(sp) + | ld BASE, L->top // Need the (realloced) L->top in BASE. + | mv RD, MULTRES + | slliw TMP2, TMP2, 3 + | j <2 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mv sp, CARG1 + | mv CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ld L, SAVE_L(sp) + | li TMP0, ~LJ_VMST_C + | ld GL, L->glref + | sw TMP0, GL->vmstate + | j ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | andi sp, CARG1, CFRAME_RAWMASK + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ld L, SAVE_L(sp) + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | ld BASE, L->base + | ld GL, L->glref // Setup pointer to global state. + | slli TMP3, TMP3, 32 + | mov_false TMP1 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | fmv.d.x TOBIT, TMP3 + | addi RA, BASE, -8 // Results start at BASE-8. + | addxi DISPATCH, GL, GG_G2DISP + | sd TMP1, -8(BASE) // Prepend false to error message. + | st_vmstate + | li RD, 16 // 2 results: false + error message. + | j ->vm_returnc + | + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | li CARG2, LUA_MINSTACK + | j >2 + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | add RC, BASE, RC + | sub RA, RA, BASE + | sd BASE, L->base + | addi PC, PC, 4 // Must point after first instruction. + | sd RC, L->top + | srliw CARG2, RA, 3 + |2: + | // L->base = new base, L->top = top + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | call_intern vm_growstack_l, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | ld RC, L->top + | ld LFUNC:RB, FRAME_FUNC(BASE) + | sub RC, RC, BASE + | cleartp LFUNC:RB + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mv L, CARG1 + | ld GL, L->glref // Setup pointer to global state. + | mv BASE, CARG2 + | lbu TMP1, L->status + | sd L, SAVE_L(sp) + | li PC, FRAME_CP + | addi TMP0, sp, CFRAME_RESUME + | addxi DISPATCH, GL, GG_G2DISP + | sw x0, SAVE_NRES(sp) + | sw x0, SAVE_ERRF(sp) + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sd x0, SAVE_CFRAME(sp) + | sd TMP0, L->cframe + | beqz TMP1, >3 + | + | // Resume after yield (like a return). + | sd L, GL->cur_L + | mv RA, BASE + | ld BASE, L->base + | ld TMP1, L->top + | ld PC, FRAME_PC(BASE) + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | sub RD, TMP1, BASE + | slli TMP3, TMP3, 32 + | sb x0, L->status + | fmv.d.x TOBIT, TMP3 + | li_vmstate INTERP + | addi RD, RD, 8 + | st_vmstate + | mv MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | bxeqz TMP0, ->BC_RET_Z + | j ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | sw CARG4, SAVE_ERRF(sp) + | li PC, FRAME_CP + | j >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | li PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ld TMP1, L:CARG1->cframe + | mv L, CARG1 + | sw CARG3, SAVE_NRES(sp) + | ld GL, L->glref // Setup pointer to global state. + | sd CARG1, SAVE_L(sp) + | mv BASE, CARG2 + | addxi DISPATCH, GL, GG_G2DISP + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sd TMP1, SAVE_CFRAME(sp) + | sd sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | sd L, GL->cur_L + | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | ld TMP1, L->top + | slli TMP3, TMP3, 32 + | add PC, PC, BASE + | sub NARGS8:RC, TMP1, BASE + | li TISNUM, LJ_TISNUM + | sub PC, PC, TMP2 // PC = frame delta + frame type + | fmv.d.x TOBIT, TMP3 + | li_vmstate INTERP + | li TISNIL, LJ_TNIL + | st_vmstate + | + |->vm_call_dispatch: + | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ld LFUNC:RB, FRAME_FUNC(BASE) + | checkfunc LFUNC:RB, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | mv L, CARG1 + | ld TMP0, L:CARG1->stack + | sd CARG1, SAVE_L(sp) + | ld TMP1, L->top + | ld GL, L->glref // Setup pointer to global state. + | sd CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. + | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). + | ld TMP1, L->cframe + | addxi DISPATCH, GL, GG_G2DISP + | sw TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame. + | sw x0, SAVE_ERRF(sp) // No error function. + | sd TMP1, SAVE_CFRAME(sp) + | sd sp, L->cframe // Add our C frame to cframe chain. + | sd L, GL->cur_L + | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | mv BASE, CRET1 + | li PC, FRAME_CP + | bnez CRET1, <3 // Else continue with the call. + | j ->vm_leave_cp // No base? Just remove C frame. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 + | ld TMP0, -32(BASE) // Continuation. + | mv RB, BASE + | mv BASE, TMP2 // Restore caller BASE. + | ld LFUNC:TMP1, FRAME_FUNC(TMP2) + | ld PC, -24(RB) // Restore PC from [cont|PC]. + | cleartp LFUNC:TMP1 + | add TMP2, RA, RD + | ld TMP1, LFUNC:TMP1->pc + | sd TISNIL, -8(TMP2) // Ensure one valid arg. + | // BASE = base, RA = resultptr, RB = meta base + | ld KBASE, PC2PROTO(k)(TMP1) + | jr TMP0 // Jump to continuation. + | + |->cont_cat: // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | addi CARG2, RB, -32 + | ld TMP0, 0(RA) + | decode_RB8 MULTRES, INS + | decode_RA8 RA, INS + | add TMP1, BASE, MULTRES + | sd BASE, L->base + | sub CARG3, CARG2, TMP1 + | sd TMP0, 0(CARG2) + | bxne TMP1, CARG2, ->BC_CAT_Z + | add RA, BASE, RA + | sd TMP0, 0(RA) + | j ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | addi CARG3, GL, offsetof(global_State, tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tgets: + | addi CARG2, GL, offsetof(global_State, tmptv) + | addi CARG3, GL, offsetof(global_State, tmptv2) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | settp STR:RC, TMP1 + | sd TAB:RB, 0(CARG2) + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tgetb: // TMP0 = index + | addi CARG3, GL, offsetof(global_State, tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tgetv: + |1: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, TValue *k) + | call_intern vmeta_tgetv, lj_meta_tget + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + | ld TMP0, 0(CRET1) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | addi TMP1, BASE, -FRAME_CONT + | li NARGS8:RC, 16 // 2 args for func(t, k). + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | sub PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | j ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | call_intern vmeta_tgetr, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | mv TMP1, TISNIL + | bxeqz CRET1, ->BC_TGETR_Z + | ld TMP1, 0(CRET1) + | j ->BC_TGETR_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | addi, CARG3, GL, offsetof(global_State, tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tsets: + | addi CARG2, GL, offsetof(global_State, tmptv) + | addi CARG3, GL, offsetof(global_State, tmptv2) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | settp STR:RC, TMP1 + | sd TAB:RB, 0(CARG2) + | sd STR:RC, 0(CARG3) + | j >1 + | + |->vmeta_tsetb: // TMP0 = index + | addi CARG3, GL, offsetof(global_State, tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tsetv: + |1: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, TValue *k) + | call_intern vmeta_tsetv, lj_meta_tset + | // Returns TValue * (finished) or NULL (metamethod). + | ld TMP2, 0(RA) + | beqz CRET1, >3 + | ins_next1 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | sd TMP2, 0(CRET1) + | ins_next2 + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | addi TMP1, BASE, -FRAME_CONT + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | sub PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | li NARGS8:RC, 24 // 3 args for func(t, k, v) + | cleartp LFUNC:RB + | sd TMP2, 16(BASE) // Copy value to third argument. + | j ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, GCtab *t, int32_t key) + | call_intern vmeta_tsetr, lj_tab_setinth + | // Returns TValue *. + | j ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | // RA/RD point to o1/o2. + | mv CARG2, RA + | mv CARG3, RD + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | decode_OP1 CARG4, INS + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o1, *o2, int op) + | call_intern vmeta_comp, lj_meta_comp + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu TMP1, CRET1, 2 + | bxeqz TMP1, ->vmeta_binop + | negw TMP2, CRET1 + |4: + | lhu RD, OFS_RD(PC) + | addi PC, PC, 4 + | lui TMP1, (-(BCBIAS_J*4 >> 12)) & 0xfffff + | slliw RD, RD, 2 + | addw RD, RD, TMP1 + | and RD, RD, TMP2 + | add PC, PC, RD + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | lbu TMP1, -4+OFS_RA(PC) + | ld TMP2, 0(RA) + | slliw TMP1, TMP1, 3 + | add TMP1, BASE, TMP1 + | sd TMP2, 0(TMP1) + | j ->cont_nop + | + |->cont_condt: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu TMP1, TMP0, LJ_TISTRUECOND + | negw TMP2, TMP1 // Branch if result is true. + | j <4 + | + |->cont_condf: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu TMP1, TMP0, LJ_TISTRUECOND + | addiw TMP2, TMP1, -1 // Branch if result is false. + | j <4 + | + |->vmeta_equal: + | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. + | cleartp LFUNC:CARG3, CARG2 + | cleartp LFUNC:CARG2, CARG1 + | mv CARG4, TMP0 + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | // (lua_State *L, GCobj *o1, *o2, int ne) + | call_intern vmeta_equal, lj_meta_equal + | // Returns 0/1 or TValue * (metamethod). + | j <3 + | + |->vmeta_istype: + | addi PC, PC, -4 + | sd BASE, L->base + | mv CARG1, L + | srliw CARG2, RA, 3 + | srliw CARG3, RD, 3 + | sd PC, SAVE_PC(sp) + | // (lua_State *L, TValue *o, BCReg tp) + | call_intern vmeta_istype, lj_meta_istype + | j ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_unm: + | mv RC, RB + | + |->vmeta_arith: + | mv CARG1, L + | sd BASE, L->base + | mv CARG2, RA + | sd PC, SAVE_PC(sp) + | mv CARG3, RB + | mv CARG4, RC + | decode_OP1 CARG5, INS + | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | call_intern vmeta_arith, lj_meta_arith + | // Returns NULL (finished) or TValue * (metamethod). + | bxeqz CRET1, ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | sub TMP1, CRET1, BASE + | sd PC, -24(CRET1) // [cont|PC] + | mv TMP2, BASE + | addi PC, TMP1, FRAME_CONT + | mv BASE, CRET1 + | li NARGS8:RC, 16 // 2 args for func(o1, o2). + | j ->vm_call_dispatch + | + |->vmeta_len: + | // CARG2 already set by BC_LEN. +#if LJ_52 + | mv MULTRES, CARG1 +#endif + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) + | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bxnez CRET1, ->vmeta_binop // Binop call for compatibility. + | mv CARG1, MULTRES + | j ->BC_LEN_Z +#else + | j ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // TMP2 = old base, BASE = new base, RC = nargs*8 + | mv CARG1, L + | sd TMP2, L->base // This is the callers base! + | addi CARG2, BASE, -16 + | sd PC, SAVE_PC(sp) + | add CARG3, BASE, RC + | mv MULTRES, NARGS8:RC + | // (lua_State *L, TValue *func, TValue *top) + | call_intern vmeta_call, lj_meta_call + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:RB + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | mv CARG1, L + | sd BASE, L->base + | addi CARG2, RA, -16 + | sd PC, SAVE_PC(sp) + | add CARG3, RA, RC + | mv MULTRES, NARGS8:RC + | // (lua_State *L, TValue *func, TValue *top) + | call_intern vmeta_callt, lj_meta_call + | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ld TMP1, FRAME_PC(BASE) + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:CARG3, RB + | j ->BC_CALLT_Z + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mv CARG1, L + | sd BASE, L->base + | mv CARG2, RA + | sd PC, SAVE_PC(sp) + | mv MULTRES, INS + | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base) + | decode_RA8 RA, MULTRES + | decode_RD8 RD, MULTRES + | j =>BC_FORI + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | bxeqz NARGS8:RC, ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | sltiu TMP0, NARGS8:RC, 16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | bxnez TMP0, ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) + | bxeqz NARGS8:RC, ->fff_fallback + | checknum CARG1, ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + |->ff_ .. name: + | ld CARG1, 0(BASE) + | sltiu TMP0, NARGS8:RC, 16 + | ld CARG2, 8(BASE) + | bxnez TMP0, ->fff_fallback + | gettp TMP1, CARG1 + | gettp TMP2, CARG2 + | sltiu TMP1, TMP1, LJ_TISNUM + | sltiu TMP2, TMP2, LJ_TISNUM + | fld FARG1, 0(BASE) + | and TMP1, TMP1, TMP2 + | fld FARG2, 8(BASE) + | bxeqz TMP1, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. + |.macro ffgccheck + | ld TMP0, GL->gc.total + | ld TMP1, GL->gc.threshold + | bltu TMP0, TMP1, >1 + | jal ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + |.ffunc_1 assert + | gettp TMP1, CARG1 + | sltiu TMP1, TMP1, LJ_TISTRUECOND + | addi RA, BASE, -16 + | bxeqz TMP1, ->fff_fallback + | ld PC, FRAME_PC(BASE) + | addiw RD, NARGS8:RC, 8 // Compute (nresults+1)*8. + | addi TMP1, BASE, 8 + | add TMP2, RA, RD + | sd CARG1, -16(BASE) + | bne BASE, TMP2, >1 + | j ->fff_res // Done if exactly 1 argument. + |1: + | ld TMP0, 0(TMP1) + | sd TMP0, -16(TMP1) + | mv TMP3, TMP1 + | addi TMP1, TMP1, 8 + | bne TMP3, TMP2, <1 + | j ->fff_res + | + |.ffunc_1 type + | gettp TMP0, CARG1 + | not TMP3, TMP0 + | bltu TISNUM, TMP0, >1 + | li TMP3, ~LJ_TISNUM + |1: + | slli TMP3, TMP3, 3 + | add TMP3, CFUNC:RB, TMP3 + | ld CARG1, CFUNC:TMP3->upvalue + | j ->fff_restv + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | gettp TMP2, CARG1 + | addi TMP0, TMP2, -LJ_TTAB + | addi TMP1, TMP2, -LJ_TUDATA + | snez TMP0, TMP0 + | neg TMP0, TMP0 + | and TMP0, TMP0, TMP1 + | cleartp TAB:CARG1 + | bnez TMP0, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:RB, TAB:CARG1->metatable + |2: + | ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] + | li CARG1, LJ_TNIL + | bxeqz TAB:RB, ->fff_restv + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slli TMP0, TMP1, 5 + | slli TMP1, TMP1, 3 + | sub TMP1, TMP0, TMP1 + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | li CARG4, LJ_TSTR + | settp STR:RC, CARG4 // Tagged key to look for. + |3: // Rearranged logic, because we expect _not_ to find the key. + | ld TMP0, NODE:TMP2->key + | ld CARG1, NODE:TMP2->val + | ld NODE:TMP2, NODE:TMP2->next + | li TMP3, LJ_TTAB + | beq RC, TMP0, >5 + | bnez NODE:TMP2, <3 + |4: + | settp CARG1, RB, TMP3 + | j ->fff_restv // Not found, keep default result. + |5: + | bxne CARG1, TISNIL, ->fff_restv + | j <4 // Ditto for nil value. + | + |6: + | sltiu TMP3, TMP2, LJ_TISNUM + | neg TMP4, TMP3 + | xor TMP0, TMP2, TISNUM // TMP2 = TMP3 ? TISNUM : TMP2 + | and TMP0, TMP0, TMP4 + | xor TMP2, TMP0, TMP2 + | slli TMP2, TMP2, 3 + | sub TMP0, GL, TMP2 + | ld TAB:RB, (offsetof(global_State, gcroot[GCROOT_BASEMT])-8)(TMP0) + | j <2 + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | gettp TMP3, CARG2 + | ld TAB:TMP0, TAB:TMP1->metatable + | lbu TMP2, TAB:TMP1->marked + | addi TMP3, TMP3, -LJ_TTAB + | cleartp TAB:CARG2 + | or TMP3, TMP3, TAB:TMP0 + | bxnez TMP3, ->fff_fallback + | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table) + | sd TAB:CARG2, TAB:TMP1->metatable + | bxeqz TMP3, ->fff_restv + | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv + | + |.ffunc rawget + | ld CARG2, 0(BASE) + | sltiu TMP0, NARGS8:RC, 16 + | gettp TMP1, CARG2 + | cleartp CARG2 + | addi TMP1, TMP1, -LJ_TTAB + | or TMP0, TMP0, TMP1 + | addi CARG3, BASE, 8 + | bxnez TMP0, ->fff_fallback + | mv CARG1, L + | call_intern ff_rawget, lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | ld CARG1, 0(CRET1) + | j ->fff_restv + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ld CARG1, 0(BASE) + | xori TMP0, NARGS8:RC, 8 // Exactly one number argument. + | gettp TMP1, CARG1 + | sltu TMP1, TISNUM, TMP1 + | or TMP0, TMP0, TMP1 + | bxnez TMP0, ->fff_fallback // No args or CARG1 is not number + | j ->fff_restv + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | gettp TMP0, CARG1 + | addi TMP1, TMP0, -LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | bxeqz TMP1, ->fff_restv // String key? + | // Handle numbers inline, unless a number base metatable is present. + | ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM] + | sltu TMP0, TISNUM, TMP0 + | sd BASE, L->base // Add frame since C call can throw. + | or TMP0, TMP0, TMP1 + | bxnez TMP0, ->fff_fallback + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | ffgccheck + | mv CARG1, L + | mv CARG2, BASE + | call_intern ff_tostring, lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | li TMP1, LJ_TSTR + | ld BASE, L->base + | settp CARG1, TMP1 + | j ->fff_restv + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | checktp CARG1, -LJ_TTAB, ->fff_fallback + | add TMP0, BASE, NARGS8:RC + | ld PC, FRAME_PC(BASE) + | sd TISNIL, 0(TMP0) // Set missing 2nd arg to nil. + | addi CARG2, BASE, 8 + | addi CARG3, BASE, -16 + | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | li RD, (2+1)*8 + | bxgtz CRET1, ->fff_res // Found key/value. + | mv TMP1, CRET1 + | mv CARG1, TISNIL + | bxeqz TMP1, ->fff_restv // End of traversal: return nil. + | ld CFUNC:RB, FRAME_FUNC(BASE) + | li RC, 2*8 + | cleartp CFUNC:RB + | j ->fff_fallback // Invalid key. + | + |.ffunc_1 pairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld TMP0, CFUNC:RB->upvalue[0] + | bxnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + | sd TISNIL, 0(BASE) + | sd CARG1, -8(BASE) + | sd TMP0, -16(BASE) + | li RD, (3+1)*8 + | j ->fff_res + | + |.ffunc_2 ipairs_aux + | checktab CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | ld PC, FRAME_PC(BASE) + | sext.w TMP2, CARG2 + | addiw TMP2, TMP2, 1 + | sltu TMP3, TMP2, TMP0 + | zext.w TMP0, TMP2 + | settp_b TMP0, TISNUM + | sd TMP0, -16(BASE) + | beqz TMP3, >2 // Not in array part? + | slli TMP3, TMP2, 3 + | add TMP3, TMP1, TMP3 + | ld TMP1, 0(TMP3) + |1: + | li RD, (0+1)*8 + | bxeq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. + | sd TMP1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | lw TMP0, TAB:CARG1->hmask + | li RD, (0+1)*8 + | bxeqz TMP0, ->fff_res + | mv CARG2, TMP2 + | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | li RD, (0+1)*8 + | bxeqz CRET1, ->fff_res + | ld TMP1, 0(CRET1) + | j <1 + | + |.ffunc_1 ipairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable +#endif + | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] +#if LJ_52 + | bxnez TAB:TMP2, ->fff_fallback +#endif + | slli TMP1, TISNUM, 47 + | sd CARG1, -8(BASE) + | sd TMP1, 0(BASE) + | sd CFUNC:TMP0, -16(BASE) + | li RD, (3+1)*8 + | j ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | ld TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC + | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:TMP0, NARGS8:RC, -8 + | lbu TMP3, GL->hookmask + | mv TMP2, BASE + | bxltz NARGS8:TMP0, ->fff_fallback + | mv NARGS8:RC, NARGS8:TMP0 + | addi BASE, BASE, 16 + | // Remember active hook before pcall. + | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT + | andi TMP3, TMP3, 1 + | addi PC, TMP3, 16+FRAME_PCALL + | bxeqz NARGS8:RC, ->vm_call_dispatch + |1: + | add TMP0, BASE, NARGS8:RC + |2: + | ld TMP1, -16(TMP0) + | sd TMP1, -8(TMP0) + | addi TMP0, TMP0, -8 + | bne TMP0, BASE, <2 + | j ->vm_call_dispatch + | + |.ffunc xpcall + | ld TMP1, L->maxstack + | add TMP2, BASE, NARGS8:RC + | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:TMP0, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | lbu TMP1, GL->hookmask + | bxltz NARGS8:TMP0, ->fff_fallback + | gettp TMP2, CARG2 + | addi TMP2, TMP2, -LJ_TFUNC + | bxnez TMP2, ->fff_fallback // Traceback must be a function. + | mv TMP2, BASE + | mv NARGS8:RC, NARGS8:TMP0 + | addi BASE, BASE, 24 + | // Remember active hook before pcall. + | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT + | sd CARG2, 0(TMP2) // Swap function and traceback. + | andi TMP3, TMP3, 1 + | sd CARG1, 8(TMP2) + | addi PC, TMP3, 24+FRAME_PCALL + | bnez NARGS8:RC, <1 + | j ->vm_call_dispatch + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ld L:CARG1, CFUNC:RB->upvalue[0].gcr + | cleartp L:CARG1 + |.endif + | lbu TMP0, L:CARG1->status + | ld TMP1, L:CARG1->cframe + | ld CARG2, L:CARG1->top + | ld TMP2, L:CARG1->base + | addiw CARG4, TMP0, -LUA_YIELD + | add CARG3, CARG2, TMP0 + | addi TMP3, CARG2, 8 + | seqz TMP4, CARG4 + | neg TMP4, TMP4 + | xor CARG2, CARG2, TMP3 // CARG2 = TMP4 ? CARG2 : TMP3 + | and CARG2, CARG2, TMP4 + | xor CARG2, TMP3, CARG2 + | bxgtz CARG4, ->fff_fallback // st > LUA_YIELD? + | xor TMP2, TMP2, CARG3 + | or CARG4, TMP2, TMP0 + | bxnez TMP1, ->fff_fallback // cframe != 0? + | ld TMP0, L:CARG1->maxstack + | ld PC, FRAME_PC(BASE) + | bxeqz CARG4, ->fff_fallback // base == top && st == 0? + | add TMP2, CARG2, NARGS8:RC + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | bxltu TMP0, TMP2, ->fff_fallback // Stack overflow? + |1: + |.if resume + | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. + | addi NARGS8:RC, NARGS8:RC, -8 + | addi TMP2, TMP2, -8 + |.endif + | sd TMP2, L:CARG1->top + | sd BASE, L->top + | add TMP1, BASE, NARGS8:RC + | mv CARG3, CARG2 + |2: // Move args to coroutine. + | ld TMP0, 0(BASE) + | sltu TMP3, BASE, TMP1 + | addi BASE, BASE, 8 + | beqz TMP3, >3 + | sd TMP0, 0(CARG3) + | addi CARG3, CARG3, 8 + | j <2 + |3: + | mv L:RA, L:CARG1 + | jal ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | // Returns thread status. + |4: + | ld TMP2, L:RA->base + | sltiu TMP1, CRET1, LUA_YIELD+1 + | ld TMP3, L:RA->top + | li_vmstate INTERP + | ld BASE, L->base + | sd L, GL->cur_L + | st_vmstate + | sub RD, TMP3, TMP2 + | beqz TMP1, >8 + | ld TMP0, L->maxstack + | add TMP1, BASE, RD + | beqz RD, >6 // No results? + | add TMP3, TMP2, RD + | bltu TMP0, TMP1, >9 // Need to grow stack? + | sd TMP2, L:RA->top // Clear coroutine stack. + | mv TMP1, BASE + |5: // Move results from coroutine. + | ld TMP0, 0(TMP2) + | addi TMP2, TMP2, 8 + | sd TMP0, 0(TMP1) + | addi TMP1, TMP1, 8 + | bltu TMP2, TMP3, <5 + |6: + |.if resume + | mov_true TMP1 + | addi RD, RD, 16 + |7: + | sd TMP1, -8(BASE) // Prepend true/false to results. + | addi RA, BASE, -8 + |.else + | mv RA, BASE + | addi RD, RD, 8 + |.endif + | andi TMP0, PC, FRAME_TYPE + | sd PC, SAVE_PC(sp) + | mv MULTRES, RD + |// bxeqz TMP0, ->BC_RET_Z // Local label 9 in use + | bnez TMP0, >6 + | j ->BC_RET_Z + |6: + | j ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | addi TMP3, TMP3, -8 + | mov_false TMP1 + | li RD, (2+1)*8 + | ld TMP0, 0(TMP3) + | sd TMP3, L:RA->top // Remove error from coroutine stack. + | sd TMP0, 0(BASE) // Copy error message. + | j <7 + |.else + | mv CARG1, L + | mv CARG2, L:RA + | // (lua_State *L, lua_State *co) + | call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err + |.endif + | + |9: // Handle stack expansion on return from yield. + | mv CARG1, L + | srliw CARG2, RD, 3 + | // (lua_State *L, int n) + |.if resume + | call_intern ff_coroutine_resume, lj_state_growstack + |.else + | call_intern ff_coroutine_wrap_aux, lj_state_growstack + |.endif + | mv CRET1, x0 + | j <4 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ld TMP0, L->cframe + | add TMP1, BASE, NARGS8:RC + | li CRET1, LUA_YIELD + | sd BASE, L->base + | andi TMP0, TMP0, CFRAME_RESUME + | sd TMP1, L->top + | bxeqz TMP0, ->fff_fallback + | sd x0, L->cframe + | sb CRET1, L->status + | j ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.macro math_round, func, rm + |->ff_math_ .. func: + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | bxeqz NARGS8:RC, ->fff_fallback + | fmv.d.x FARG1, CARG1 + | bxeq TMP0, TISNUM, ->fff_restv + | srli TMP1, CARG1, 52 // Extract exponent (and sign). + | bxgeu TMP0, TISNUM, ->fff_fallback + | andi TMP1, TMP1, 0x7ff // Extract exponent. + | slti TMP2, TMP1, 1023 + 52 + 1 // 1023: Bias, 52: Max fraction + | bxeqz TMP2, ->fff_resn // Less than 2^52 / Not NaN? + | fcvt.l.d TMP3, FARG1, rm + | fcvt.d.l FTMP1, TMP3 + | fsgnj.d FRET1, FTMP1, FARG1 + | j ->fff_resn + |.endmacro + | + | math_round floor, rdn + | math_round ceil, rup + | + |.ffunc_1 math_abs + | gettp CARG2, CARG1 + | addi TMP2, CARG2, -LJ_TISNUM + | sext.w TMP1, CARG1 + | bnez TMP2, >1 + | sraiw TMP0, TMP1, 31 // Extract sign. int + | xor TMP1, TMP1, TMP0 + | sub CARG1, TMP1, TMP0 + | slli TMP3, CARG1, 32 + | settp CARG1, TISNUM + | bxgez TMP3, ->fff_restv + | lui CARG1, 0x41e00 // 2^31 as a double. + | slli CARG1, CARG1, 32 + | j ->fff_restv + |1: + | sltiu TMP2, CARG2, LJ_TISNUM + | slli CARG1, CARG1, 1 + | srli CARG1, CARG1, 1 + | bxeqz TMP2, ->fff_fallback // int + |// fallthrough + | + |->fff_restv: + | // CARG1 = TValue result. + | ld PC, FRAME_PC(BASE) + | sd CARG1, -16(BASE) + |->fff_res1: + | // RA = results, PC = return. + | li RD, (1+1)*8 + |->fff_res: + | // RA = results, RD = (nresults+1)*8, PC = return. + | andi TMP0, PC, FRAME_TYPE + | mv MULTRES, RD + | addi RA, BASE, -16 + | bxnez TMP0, ->vm_return + | lw INS, -4(PC) + | decode_RB8 RB, INS + |5: + | bltu RD, RB, >6 // More results expected? + | decode_RA8a TMP0, INS + | ins_next1 + | decode_RA8b TMP0 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | sub BASE, RA, TMP0 + | ins_next2 + | + |6: // Fill up results with nil. + | add TMP1, RA, RD + | addi RD, RD, 8 + | sd TISNIL, -8(TMP1) + | j <5 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | call_extern ff_math_extern, func + | j ->fff_resn + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | call_extern ff_math_extern2, func + | j ->fff_resn + |.endmacro + | + |.ffunc_n math_sqrt + | fsqrt.d FRET1, FARG1 + |->fff_resn: + | ld PC, FRAME_PC(BASE) + | fsd FRET1, -16(BASE) + | j ->fff_res1 + | + |.ffunc math_log + | li TMP1, 8 + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) + | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. + | checknum CARG1, ->fff_fallback + | call_extern ff_math_log, log + | j ->fff_resn + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | fld FARG1, 0(BASE) + | lw CARG1, 8(BASE) + | call_extern ff_math_ldexp, ldexp // (double x, int exp) + | j ->fff_resn + | + |.ffunc_n math_frexp + | ld PC, FRAME_PC(BASE) + | addi CARG1, GL, offsetof(global_State, tmptv) + | call_extern ff_math_frexp, frexp + | lw TMP1, GL->tmptv + | fcvt.d.w FARG2, TMP1 + | fsd FRET1, -16(BASE) + | fsd FARG2, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + | + |.ffunc_n math_modf + | addi CARG1, BASE, -16 + | ld PC, FRAME_PC(BASE) + | call_extern ff_math_modf, modf + | fsd FRET1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res + | + |.macro math_minmax, name, ismax + | .ffunc_1 name + | add RB, BASE, NARGS8:RC + | addi RA, BASE, 8 + | checkint CARG1, >4 + |1: // Handle integers. + | ld CARG2, 0(RA) + | bxeq RA, RB, ->fff_restv + | sext.w CARG1, CARG1 + | checkint CARG2, >3 + | sext.w CARG2, CARG2 + | slt TMP0, CARG1, CARG2 + |.if ismax + | addi TMP1, TMP0, -1 + |.else + | neg TMP1, TMP0 + |.endif + | xor TMP2, CARG1, CARG2 // CARG1 = TMP1 ? CARG1 : CARG2 + | and TMP2, TMP2, TMP1 + | xor CARG1, CARG2, TMP2 + | addi RA, RA, 8 + | zext.w CARG1, CARG1 + | settp_b CARG1, TISNUM + | j <1 + |3: // Convert intermediate result to number and continue below. + | fcvt.d.w FARG1, CARG1 + | checknum CARG2, ->fff_fallback + | fld FARG2, 0(RA) + | j >6 + | + |4: + | fld FARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |5: // Handle numbers. + | ld CARG2, 0(RA) + | fld FARG2, 0(RA) + | bxgeu RA, RB, ->fff_resn + | checknum CARG2, >7 + |6: + |.if ismax + | flt.d TMP0, FARG2, FARG1 + |.else // min + | flt.d TMP0, FARG1, FARG2 + |.endif + | bnez TMP0, >8 // skip swap + | fmv.d FARG1, FARG2 + |8: + | addi RA, RA, 8 + | j <5 + |7: // Convert integer to number and continue above. + | checkint CARG2, ->fff_fallback + | fcvt.d.w FARG2, CARG2 + | j <6 + |.endmacro + | + | math_minmax math_min, 0 + | math_minmax math_max, 1 + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori TMP1, NARGS8:RC, 8 + | addi TMP0, TMP0, -LJ_TSTR + | or TMP1, TMP1, TMP0 + | cleartp STR:CARG1 + | bxnez TMP1, ->fff_fallback // Need exactly 1 string argument. + | lw TMP0, STR:CARG1->len + | ld PC, FRAME_PC(BASE) + | snez RD, TMP0 + | lbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). + | addiw RD, RD, 1 + | slliw RD, RD, 3 // RD = ((str->len != 0)+1)*8 + | settp_b TMP2, TISNUM + | sd TMP2, -16(BASE) + | j ->fff_res + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori TMP1, NARGS8:RC, 8 // Need exactly 1 argument. + | addi TMP0, TMP0, -LJ_TISNUM // Integer. + | li TMP2, 255 + | sext.w CARG1, CARG1 + | or TMP1, TMP1, TMP0 + | sltu TMP2, TMP2, CARG1 // !(255 < n). + | or TMP1, TMP1, TMP2 + | li CARG3, 1 + | bxnez TMP1, ->fff_fallback + | addi CARG2, sp, TMPD_OFS + | sb CARG1, TMPD(sp) + |->fff_newstr: + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | // (lua_State *L, const char *str, size_t l) + | call_intern fff_newstr, lj_str_new + | // Returns GCstr *. + | ld BASE, L->base + |->fff_resstr: + | li TMP1, LJ_TSTR + | settp CRET1, TMP1 + | j ->fff_restv + | + |.ffunc string_sub + | ffgccheck + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | ld CARG3, 16(BASE) + | addi TMP0, NARGS8:RC, -16 + | gettp TMP1, CARG1 + | bxltz TMP0, ->fff_fallback + | cleartp STR:CARG1, CARG1 + | li CARG4, -1 + | beqz TMP0, >1 + | sext.w CARG4, CARG3 + | checkint CARG3, ->fff_fallback + |1: + | checkint CARG2, ->fff_fallback + | addi TMP0, TMP1, -LJ_TSTR + | sext.w CARG3, CARG2 + | bxnez TMP0, ->fff_fallback + | lw CARG2, STR:CARG1->len + | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end + | addiw TMP0, CARG2, 1 + | bgez CARG4, >2 + | addw CARG4, CARG4, TMP0 // if (end < 0) end += len+1 + |2: + | bgez CARG3, >3 + | addw CARG3, CARG3, TMP0 // if (start < 0) start += len+1 + |3: + | bgez CARG4, >4 + | mv CARG4, x0 // if (end < 0) end = 0 + |4: + | bgtz CARG3, >5 + | li CARG3, 1 // if (start < 1) start = 1 + |5: + | ble CARG4, CARG2, >6 + | mv CARG4, CARG2 // if (end > len) end = len + |6: + | add CARG2, STR:CARG1, CARG3 + | sub CARG3, CARG4, CARG3 // len = end - start + | addi CARG2, CARG2, sizeof(GCstr)-1 + | addiw CARG3, CARG3, 1 // len += 1 + | bxgez CARG3, ->fff_newstr + |->fff_emptystr: // Return empty string. + | li TMP1, LJ_TSTR + | addi STR:CARG1, GL, offsetof(global_State, strempty) + | settp CARG1, TMP1 + | j ->fff_restv + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + | ld CARG2, 0(BASE) + | bxeqz NARGS8:RC, ->fff_fallback + | checkstr STR:CARG2, ->fff_fallback + | addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf) + | ld TMP0, SBUF:CARG1->b + | sd L, SBUF:CARG1->L + | sd BASE, L->base + | sd TMP0, SBUF:CARG1->w + | sd PC, SAVE_PC(sp) + | call_intern ff_string_ .. name, lj_buf_putstr_ .. name + | call_intern ff_string_ .. name, lj_buf_tostr // CARG1 = CRET1 + | ld BASE, L->base + | j ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |->vm_tobit_fb: + | fld FARG1, 0(BASE) + | bxeqz TMP1, ->fff_fallback + | fadd.d FARG1, FARG1, TOBIT + | fmv.x.w CRET1, FARG1 + | zext.w CRET1, CRET1 + | ret + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | gettp TMP0, CARG1 + | zext.w CRET1, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |1: + |.endmacro + | + |.macro .ffunc_bit_op, name, bins + | .ffunc_bit name + | addi TMP2, BASE, 8 + | add TMP3, BASE, NARGS8:RC + |1: + | ld TMP1, 0(TMP2) + | bxeq TMP2, TMP3, ->fff_resi + | gettp TMP0, TMP1 + | addi TMP2, TMP2, 8 + | bne TMP0, TISNUM, >2 + | zext.w TMP1, TMP1 + | bins CRET1, CRET1, TMP1 + | j <1 + |2: + | fld FARG1, -8(TMP2) + | sltiu TMP0, TMP0, LJ_TISNUM + | fadd.d FARG1, FARG1, TOBIT + | bxeqz TMP0, ->fff_fallback + | fmv.x.w TMP1, FARG1 + | zext.w TMP1, TMP1 + | bins CRET1, CRET1, TMP1 + | j <1 + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, or + |.ffunc_bit_op bxor, xor + | + |.ffunc_bit bswap + | srliw CARG2, CARG1, 8 + | lui CARG3, 16 + | addiw CARG3, CARG3, -256 + | and CARG2, CARG2, CARG3 + | srliw CARG3, CARG1, 24 + | or CARG2, CARG2, CARG3 + | slli CARG3, CARG1, 8 + | lui CARG4, 0x00ff0 + | and CARG3, CARG3, CARG4 + | slli CARG1, CARG1, 24 + | or CARG1, CARG1, CARG3 + | or CARG1, CARG1, CARG2 + | slli CARG1, CARG1, 32 + | srli CARG1, CARG1, 32 + | j ->fff_resi + | + |.ffunc_bit tobit + |->fff_resi: + | settp CARG1, TISNUM // CARG1 = CRET1 + | j ->fff_restv + | + |.ffunc_bit bnot + | not CRET1, CRET1 + | zext.w CRET1, CRET1 + | j ->fff_resi + | + |.macro .ffunc_bit_sh, name, shins + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |// mv CARG1, CRET1 // CARG1 = CRET1 + |1: + | gettp TMP0, CARG2 + | zext.w CARG2, CARG2 + | bxne TMP0, TISNUM, ->fff_fallback + | sext.w CARG1, CARG1 + | shins CRET1, CARG1, CARG2 + | zext.w CRET1, CRET1 + | j ->fff_resi + |.endmacro + | + |.ffunc_bit_sh lshift, sllw + |.ffunc_bit_sh rshift, srlw + |.ffunc_bit_sh arshift, sraw + | + |.macro .ffunc_bit_rot, name, rotinsa, rotinsb + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + | sltiu TMP1, TMP0, LJ_TISNUM + | jal ->vm_tobit_fb + |// mv CARG1, CRET1 // CARG1 = CRET1 + |1: + | gettp TMP0, CARG2 + | zext.w CARG2, CARG2 + | bxne TMP0, TISNUM, ->fff_fallback + | sext.w CARG1, CARG1 + | neg TMP2, CARG2 + | rotinsa TMP1, CARG1, CARG2 + | rotinsb TMP0, CARG1, TMP2 + | or CRET1, TMP0, TMP1 + | zext.w CRET1, CRET1 + | j ->fff_resi + |.endmacro + | + |.ffunc_bit_rot rol, sllw, srlw + |.ffunc_bit_rot ror, srlw, sllw + | + |//----------------------------------------------------------------------- + | + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RB = CFUNC, RC = nargs*8 + | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. + | ld CARG3, CFUNC:RB->f + | add TMP1, BASE, NARGS8:RC + | sd BASE, L->base + | addi TMP0, TMP1, 8*LUA_MINSTACK + | ld TMP2, L->maxstack + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | sd TMP1, L->top + | mv CARG1, L + | bltu TMP2, TMP0, >5 // Need to grow stack. + | jalr CARG3 // (lua_State *L) + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ld BASE, L->base + | slliw RD, CRET1, 3 + | bxgtz CRET1, ->fff_res // Returned nresults+1? + |1: // Returned 0 or -1: retry fast path. + | ld LFUNC:RB, FRAME_FUNC(BASE) + | ld TMP0, L->top + | sub NARGS8:RC, TMP0, BASE + | cleartp LFUNC:RB + | bxnez CRET1, ->vm_call_tail // Returned -1? + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | andi TMP0, PC, FRAME_TYPE + | andi TMP1, PC, ~FRAME_TYPEP // TODO + | bnez TMP0, >3 + | lbu TMP1, OFS_RA(PC) + | slliw TMP1, TMP1, 3 + | addiw TMP1, TMP1, 16 + |3: + | sub TMP2, BASE, TMP1 + | j ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | li CARG2, LUA_MINSTACK + | mv CARG1, L + | call_intern vm_call_tail, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | mv CRET1, x0 // Set zero-flag to force retry. + | j <1 + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | mv MULTRES, ra + | add TMP0, BASE, NARGS8:RC // Calculate L->top. + | sd BASE, L->base + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | mv CARG1, L + | sd TMP0, L->top + | call_intern fff_gc_step, lj_gc_step // (lua_State *L) + | ld BASE, L->base + | mv ra, MULTRES // Help return address predictor. + | ld TMP0, L->top + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | sub NARGS8:RC, TMP0, BASE + | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + | + |->vm_rethook: // Dispatch target for return hooks. + | lbu TMP3, GL->hookmask + | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? + | beqz TMP1, >1 + |5: // Re-dispatch to static ins. + | ld TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. + | jr TMP1 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | lbu TMP3, GL->hookmask + | lw TMP2, GL->hookcount + | andi TMP1, TMP3, HOOK_ACTIVE // Hook already active? + | bnez TMP1, <5 + | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | addiw TMP2, TMP2, -1 + | beqz TMP1, <5 + | sw TMP2, GL->hookcount + | beqz TMP2, >1 + | andi TMP1, TMP3, LUA_MASKLINE + | beqz TMP1, <5 + |1: + | sw MULTRES, TMPD(sp) + | mv CARG2, PC + | sd BASE, L->base + | mv CARG1, L + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call_intern vm_inshook, lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ld BASE, L->base + |4: // Re-dispatch to static ins. + | lw INS, -4(PC) + | decode_OP8 TMP1, INS + | add TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld TMP1, GG_DISP2STATIC(TMP0) + | decode_RA8 RA, INS + | decode_RD8b RD + | jr TMP1 + | + |->cont_hook: // Continue from hook yield. + | addi PC, PC, 4 + | lw MULTRES, -24(RB) // Restore MULTRES for *M ins. + | j <4 + | + | + |->vm_callhook: // Dispatch target for call hooks. + | mv CARG2, PC + | + |->cont_stitch: // Trace stitching. + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mv CARG1, L + | mv CARG2, PC + | sd BASE, L->base + | sw MULTRES, TMPD(sp) + | // (lua_State *L, const BCIns *pc) + | call_intern vm_profhook, lj_dispatch_profile + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | addi PC, PC, -4 + | ld BASE, L->base + | j ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + | + |// Hard-float round to integer. + |// Modifies TMP0, FARG1, FARG5 + |.macro vm_round, rm + | fmv.x.d TMP0, FARG1 + | srli TMP0, TMP0, 52 // Extract exponent (and sign). + | andi TMP0, TMP0, 0x7ff // Extract exponent. + | addi TMP0, TMP0, -1075 + | bgtz TMP0, >1 // Less than 2^52 / Not NaN? + | fcvt.l.d TMP0, FARG1, rm + | fcvt.d.l FARG5, TMP0 + | fsgnj.d FRET1, FARG5, FARG1 + |1: + | ret + |.endmacro + | + | + |->vm_floor: + | vm_round rdn + |->vm_ceil: + | vm_round rup + | + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |// void lj_vm_fence_rw_rw() + |->vm_fence_rw_rw: + |.if JIT or FFI + | .long 0x0330000f + | ret + |.endif + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + | add RA, BASE, RA + | add RD, BASE, RD + if (op == BC_ISLT || op == BC_ISGE) { + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + } else { + | ld CARG2, 0(RA) + | ld CARG1, 0(RD) + | gettp CARG3, CARG2 + | gettp CARG4, CARG1 + } + | lhu TMP2, OFS_RD(PC) // TMP2=jump + | addi PC, PC, 4 + | bne CARG3, TISNUM, >2 + | decode_BC4b TMP2 + | bne CARG4, TISNUM, >5 + | sext.w CARG1, CARG1 + | sext.w CARG2, CARG2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | slt TMP1, CARG1, CARG2 + | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 + if (op == BC_ISLT || op == BC_ISGT) { + | neg TMP1, TMP1 + } else { + | addi TMP1, TMP1, -1 + } + | and TMP2, TMP2, TMP1 + |1: + | add PC, PC, TMP2 + | ins_next + | + |2: // RA is not an integer. + | sltiu TMP1, CARG3, LJ_TISNUM + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | bxeqz TMP1, ->vmeta_comp + | sltiu TMP1, CARG4, LJ_TISNUM + | decode_BC4b TMP2 + | beqz TMP1, >4 + | fmv.d.x FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + |3: // RA and RD are both numbers. + | addw TMP2, TMP2, TMP3 + if (op == BC_ISLT) { + | flt.d TMP3, FTMP0, FTMP2 + | neg TMP3, TMP3 + } else if (op == BC_ISGE) { + | flt.d TMP3, FTMP0, FTMP2 + | addi TMP3, TMP3, -1 + } else if (op == BC_ISLE) { + | fle.d TMP3, FTMP2, FTMP0 + | neg TMP3, TMP3 + } else if (op == BC_ISGT) { + | fle.d TMP3, FTMP2, FTMP0 + | addi TMP3, TMP3, -1 + } + | and TMP2, TMP2, TMP3 + | j <1 + | + |4: // RA is a number, RD is not a number. + | // RA is a number, RD is an integer. Convert RD to a number. + | bxne CARG4, TISNUM, ->vmeta_comp + if (op == BC_ISLT || op == BC_ISGE) { + | fcvt.d.w FTMP2, CARG2 + | fmv.d.x FTMP0, CARG1 + } else { + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + } + | j <3 + | + |5: // RA is an integer, RD is not an integer + | sltiu TMP1, CARG4, LJ_TISNUM + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | bxeqz TMP1, ->vmeta_comp + | // RA is an integer, RD is a number. Convert RA to a number. + if (op == BC_ISLT || op == BC_ISGE) { + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + } else { + | fcvt.d.w FTMP2, CARG2 + | fmv.d.x FTMP0, CARG1 + } + | j <3 + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + | add RA, BASE, RA + | add RD, BASE, RD + | addi PC, PC, 4 + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, -4+OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | sltu TMP0, TISNUM, CARG3 + | sltu TMP1, TISNUM, CARG4 + | or TMP0, TMP0, TMP1 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + if (vk) { + | beqz TMP0, ->BC_ISEQN_Z + } else { + | beqz TMP0, ->BC_ISNEN_Z + } + |// Either or both types are not numbers. + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | decode_BC4b TMP2 + | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 + | bne CARG1, CARG2, >2 + | // Tag and value are equal. + if (vk) { + |->BC_ISEQV_Z: + | add PC, PC, TMP2 + } + |1: + | ins_next + | + |2: // Check if the tags are the same and it's a table or userdata. + | xor TMP3, CARG3, CARG4 // Same type? + | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? TMP0=1 + | beqz TMP3, >3 + | mv TMP0, x0 // TMP0=0: not same type, or same type table/userdata + |3: + | cleartp TAB:TMP1, CARG1 + if (vk) { + | beqz TMP0, <1 + } else { + | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:TMP3, TAB:TMP1->metatable + if (vk) { + | beqz TAB:TMP3, <1 // No metatable? + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<BC_ISEQV_Z // No metatable? + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? + } + | j ->vmeta_equal // Handle __eq metamethod. + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target + | add RA, BASE, RA + | addi PC, PC, 4 + | ld CARG1, 0(RA) + | sub RD, KBASE, RD + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, -8(RD) // KBASE-8-str_const*8 + | li TMP0, LJ_TSTR + | decode_BC4b TMP2 + | settp CARG2, TMP0 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D + | addw TMP2, TMP2, TMP3 + if (vk) { + | seqz TMP4, TMP0 + } else { + | snez TMP4, TMP0 + } + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + | ins_next + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + | add RA, BASE, RA + | add RD, KBASE, RD + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | addi PC, PC, 4 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | decode_BC4b TMP2 + | bne CARG3, TISNUM, >4 + | addw TMP2, TMP2, TMP3 + | bne CARG4, TISNUM, >6 + | xor TMP0, CARG1, CARG2 // TMP0=0: A==D; TMP0!=0: A!=D + |1: + if (vk) { + | seqz TMP4, TMP0 + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + |2: + } else { + | snez TMP4, TMP0 + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + |2: + | add PC, PC, TMP2 + } + |3: + | ins_next + | + |4: // RA is not an integer. + | addw TMP2, TMP2, TMP3 + | bgeu CARG3, TISNUM, <2 + | fmv.d.x FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + | bne CARG4, TISNUM, >5 + |// RA is a number, RD is an integer. + | fcvt.d.w FTMP2, CARG2 + | + |5: // RA and RD are both numbers. + | feq.d TMP0, FTMP0, FTMP2 + | seqz TMP0, TMP0 + | j <1 + | + |6: // RA is an integer, RD is a number. + | bgeu CARG4, TISNUM, <2 + | fcvt.d.w FTMP0, CARG1 + | fmv.d.x FTMP2, CARG2 + | j <5 + | + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target + | add RA, BASE, RA + | srliw TMP0, RD, 3 + | ld TMP1, 0(RA) + | not TMP0, TMP0 // ~TMP0: ~0 ~1 ~2 + | lhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target + | gettp TMP1, TMP1 + | addi PC, PC, 4 + | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D + | decode_BC4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 + if (vk) { + | seqz TMP4, TMP0 + } else { + | snez TMP4, TMP0 + } + | neg TMP4, TMP4 + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target + | add RD, BASE, RD + | lhu TMP2, OFS_RD(PC) + | ld TMP0, 0(RD) + | addi PC, PC, 4 + | gettp TMP0, TMP0 + | add RA, BASE, RA + | sltiu TMP0, TMP0, LJ_TISTRUECOND // TMP0=1 true; TMP0=0 false + | decode_BC4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | ld CRET1, 0(RD) + | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 + if (op == BC_IST || op == BC_ISTC) { + | beqz TMP0, >1 + if (op == BC_ISTC) { + | sd CRET1, 0(RA) + } + } else { + | bnez TMP0, >1 + if (op == BC_ISFC) { + | sd CRET1, 0(RA) + } + } + | add PC, PC, TMP2 + |1: + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | add TMP0, BASE, RA + | srliw TMP1, RD, 3 + | ld TMP0, 0(TMP0) + | gettp TMP0, TMP0 + | add TMP0, TMP0, TMP1 // if itype of RA == type, then TMP0=0 + | bxnez TMP0, ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | add TMP0, BASE, RA + | ld TMP0, 0(TMP0) + | checknum TMP0, ->vmeta_istype + | ins_next + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RD = src*8 + | add RD, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_NOT: + | // RA = dst*8, RD = src*8 + | add RD, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | li TMP1, LJ_TTRUE + | ins_next1 + | gettp TMP0, TMP0 + | sltu TMP0, TMP1, TMP0 + | addiw TMP0, TMP0, 1 + | slli TMP0, TMP0, 47 + | not TMP0, TMP0 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_UNM: + | // RA = dst*8, RD = src*8 + | add RB, BASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RB) + | lui TMP1, 0x80000 + | gettp CARG3, TMP0 + | bne CARG3, TISNUM, >1 + | negw TMP0, TMP0 + | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + | zext.w TMP0, TMP0 + | settp_b TMP0, TISNUM + | j >2 + |1: + | sltiu TMP3, CARG3, LJ_TISNUM + | slli TMP1, TMP1, 32 + | bxeqz TMP3, ->vmeta_unm + | xor TMP0, TMP0, TMP1 // sign => ~sign + |2: + | sd TMP0, 0(RA) + | ins_next + break; + case BC_LEN: + | // RA = dst*8, RD = src*8 + | add CARG2, BASE, RD + | ld TMP0, 0(CARG2) + | add RA, BASE, RA + | gettp TMP1, TMP0 + | addi TMP2, TMP1, -LJ_TSTR + | cleartp STR:CARG1, TMP0 + | bnez TMP2, >2 + | lwu CARG1, STR:CARG1->len + |1: + | settp_b CARG1, TISNUM + | sd CARG1, 0(RA) + | ins_next + |2: + | addi TMP2, TMP1, -LJ_TTAB + | bxnez TMP2, ->vmeta_len +#if LJ_52 + | ld TAB:TMP2, TAB:CARG1->metatable + | bnez TAB:TMP2, >9 + |3: +#endif + |->BC_LEN_Z: + | call_intern BC_LEN, lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | j <1 +#if LJ_52 + |9: + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_len +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro fpmod, a, b, c + | fdiv.d FARG1, b, c + | jal ->vm_floor // floor(b/c) + | fmul.d a, FRET1, c + | fsub.d a, b, a // b - floor(b/c)*c + |.endmacro + | + |.macro ins_arithpre + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||if (vk == 1) { + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | decode_RB8 RC, INS + | decode_RDtoRC8 RB, RD + ||} else { + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + ||} + ||switch (vk) { + ||case 0: // suffix is VN + | add RB, BASE, RB + | add RC, KBASE, RC + || break; + ||case 1: // suffix is NV + | add RC, BASE, RC + | add RB, KBASE, RB + || break; + ||default: // CAT or suffix is VV + | add RB, BASE, RB + | add RC, BASE, RC + || break; + ||} + |.endmacro + | + |.macro ins_arithfp, fpins, itype1, itype2 + | fld FTMP0, 0(RB) + | sltu itype1, itype1, TISNUM + | sltu itype2, itype2, TISNUM + | fld FTMP2, 0(RC) + | and itype1, itype1, itype2 + | add RA, BASE, RA + | bxeqz itype1, ->vmeta_arith + | fpins FRET1, FTMP0, FTMP2 + | ins_next1 + | fsd FRET1, 0(RA) + | ins_next2 + |.endmacro + | + |.macro ins_arithead, itype1, itype2, tval1, tval2 + | ld tval1, 0(RB) + | ld tval2, 0(RC) + | // Check for two integers. + | gettp itype1, tval1 + | gettp itype2, tval2 + |.endmacro + | + |.macro ins_arithdn, intins, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | bne TMP0, TISNUM, >1 + | bne TMP1, TISNUM, >1 + | sext.w CARG3, CARG1 + | sext.w CARG4, CARG2 + |.if "intins" == "addw" + | intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. + | xor TMP2, CRET1, CARG4 + | and TMP1, TMP1, TMP2 + | add RA, BASE, RA + | bxltz TMP1, ->vmeta_arith + |.elif "intins" == "subw" + | intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. + | xor TMP2, CARG3, CARG4 + | and TMP1, TMP1, TMP2 + | add RA, BASE, RA + | bxltz TMP1, ->vmeta_arith + |.elif "intins" == "mulw" + | mul TMP2, CARG3, CARG4 + | add RA, BASE, RA + | sext.w CRET1, TMP2 + | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow. + |.endif + | zext.w CRET1, CRET1 + | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | + |.macro ins_arithdiv, fpins + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | + |.macro ins_arithmod, fpins, BC + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | bne TMP0, TISNUM, >1 + | bne TMP1, TISNUM, >1 + | sext.w CARG1, CARG1 + | sext.w CARG2, CARG2 + | add RA, BASE, RA + | bxeqz CARG2, ->vmeta_arith + | call_intern BC, lj_vm_modi + | zext.w CRET1, CRET1 + | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn addw, fadd.d + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn subw, fsub.d + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithdn mulw, fmul.d + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithdiv fdiv.d + break; + case BC_MODVN: + | ins_arithmod fpmod, BC_MODVN + break; + case BC_MODNV: + | ins_arithmod fpmod, BC_MODNV + break; + case BC_MODVV: + | ins_arithmod fpmod, BC_MODVV + break; + case BC_POW: + | ins_arithpre + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | add RA, BASE, RA + | bxeqz TMP0, ->vmeta_arith + | fld FARG1, 0(RB) + | fld FARG2, 0(RC) + | call_extern BC_POW, pow + | ins_next1 + | fsd FRET1, 0(RA) + | ins_next2 + break; + + case BC_CAT: + | // RA = dst*8, RB = src_start*8, RC = src_end*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | sub CARG3, RC, RB + | sd BASE, L->base + | add CARG2, BASE, RC + | mv MULTRES, RB + |->BC_CAT_Z: + | srliw CARG3, CARG3, 3 + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ld BASE, L->base + | bxnez CRET1, ->vmeta_binop + | add RB, BASE, MULTRES + | ld TMP0, 0(RB) + | add RA, BASE, RA + | sd TMP0, 0(RA) + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RD = str_const*8 (~) + | sub TMP1, KBASE, RD + | li TMP2, LJ_TSTR + | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 + | add RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next + break; + case BC_KCDATA: + break; + case BC_KSHORT: + | // RA = dst*8, RD = int16_literal*8 + | sraiw RD, INS, 16 + | add RA, BASE, RA + | zext.w RD, RD + | ins_next1 + | settp_b RD, TISNUM + | sd RD, 0(RA) + | ins_next2 + break; + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | add RD, KBASE, RD + | add RA, BASE, RA + | ld TMP0, 0(RD) + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KPRI: + | // RA = dst*8, RD = primitive_type*8 (~) + | add RA, BASE, RA + | slli TMP0, RD, 44 // 44+3 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KNIL: + | // RA = base*8, RD = end*8 + | add RA, BASE, RA + | sd TISNIL, 0(RA) + | addi RA, RA, 8 + | add RD, BASE, RD + |1: + | sd TISNIL, 0(RA) + | slt TMP0, RA, RD + | addi RA, RA, 8 + | bnez TMP0, <1 + | ins_next + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RD = uvnum*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RA, BASE, RA + | cleartp LFUNC:TMP0 + | add RD, RD, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RD->uvptr + | ld TMP1, UPVAL:TMP0->v + | ld TMP2, 0(TMP1) + | ins_next1 + | sd TMP2, 0(RA) + | ins_next2 + break; + case BC_USETV: + | // RA = uvnum*8, RD = src*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RD, BASE, RD + | cleartp LFUNC:TMP0 + | add RA, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | lbu TMP3, UPVAL:TMP0->marked + | ld CARG2, UPVAL:TMP0->v + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbu TMP0, UPVAL:TMP0->closed + | gettp TMP2, CRET1 + | sd CRET1, 0(CARG2) + | or TMP3, TMP3, TMP0 + | li TMP0, LJ_GC_BLACK|1 + | addi TMP2, TMP2, -(LJ_TNUMX+1) + | beq TMP3, TMP0, >2 // Upvalue is closed and black? + |1: + | ins_next + | + |2: // Check if new value is collectable. + | sltiu TMP0, TMP2, LJ_TISGCV - (LJ_TNUMX+1) + | cleartp GCOBJ:CRET1, CRET1 + | beqz TMP0, <1 // tvisgcv(v) + | lbu TMP3, GCOBJ:CRET1->gch.marked + | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) + | beqz TMP3, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL + | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETS: + | // RA = uvnum*8, RD = str_const*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | sub TMP1, KBASE, RD + | cleartp LFUNC:TMP0 + | add RA, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:RA->uvptr + | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 + | lbu TMP2, UPVAL:TMP0->marked + | ld CARG2, UPVAL:TMP0->v + | lbu TMP3, STR:TMP1->marked + | andi TMP4, TMP2, LJ_GC_BLACK // isblack(uv) + | lbu TMP2, UPVAL:TMP0->closed + | li TMP0, LJ_TSTR + | settp TMP1, TMP0 + | sd TMP1, 0(CARG2) + | bnez TMP4, >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | beqz TMP2, <1 + | andi TMP0, TMP3, LJ_GC_WHITES // iswhite(str) + | beqz TMP0, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL + | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETN: + | // RA = uvnum*8, RD = num_const*8 + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | add RD, KBASE, RD + | cleartp LFUNC:TMP0 + | add TMP0, RA, LFUNC:TMP0 + | ld UPVAL:TMP0, LFUNC:TMP0->uvptr + | ld TMP1, 0(RD) + | ld TMP0, UPVAL:TMP0->v + | sd TMP1, 0(TMP0) + | ins_next + break; + case BC_USETP: + | // RA = uvnum*8, RD = primitive_type*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | slli TMP2, RD, 44 + | cleartp LFUNC:TMP0 + | add TMP0, RA, LFUNC:TMP0 + | not TMP2, TMP2 + | ld UPVAL:TMP0, LFUNC:TMP0->uvptr + | ld TMP1, UPVAL:TMP0->v + | sd TMP2, 0(TMP1) + | ins_next + break; + + case BC_UCLO: + | // RA = level*8, RD = target + | ld TMP2, L->openupval + | branch_RD // Do this first since RD is not saved. + | sd BASE, L->base + | mv CARG1, L + | beqz TMP2, >1 + | add CARG2, BASE, RA + | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level) + | ld BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) + | sub TMP1, KBASE, RD + | ld CARG3, FRAME_FUNC(BASE) + | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | cleartp CARG3 + | mv CARG1, L + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call_intern BC_FNEW, lj_func_newL_gc + | // Returns GCfuncL *. + | li TMP0, LJ_TFUNC + | ld BASE, L->base + | settp CRET1, TMP0 + | add RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) + | ld TMP0, GL->gc.total + | ld TMP1, GL->gc.threshold + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | bgeu TMP0, TMP1, >5 + |1: + if (op == BC_TNEW) { + | srliw CARG2, RD, 3 + | andi CARG2, CARG2, 0x7ff + | lzi TMP0, 0x801 + | addiw TMP2, CARG2, -0x7ff + | srliw CARG3, RD, 14 + | seqz TMP3, TMP2 + | neg TMP4, TMP3 + | xor CARG1, TMP0, CARG2 // CARG2 = TMP3 ? TMP0 : CARG2 + | and CARG1, CARG1, TMP4 + | xor CARG2, CARG2, CARG1 + | mv CARG1, L + | // (lua_State *L, int32_t asize, uint32_t hbits) + | call_intern BC_TNEW, lj_tab_new + | // Returns Table *. + } else { + | sub TMP1, KBASE, RD + | mv CARG1, L + | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 + | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt) + | // Returns Table *. + } + | li TMP0, LJ_TTAB + | ld BASE, L->base + | ins_next1 + | settp CRET1, TMP0 + | add RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next2 + |5: + | mv MULTRES, RD + | mv CARG1, L + if (op == BC_TNEW) { + | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L) + } else { + | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L) + } + | mv RD, MULTRES + | j <1 + break; + + case BC_GGET: + | // RA = dst*8, RD = str_const*8 (~) + case BC_GSET: + | // RA = src*8, RD = str_const*8 (~) + | ld LFUNC:TMP0, FRAME_FUNC(BASE) + | sub TMP1, KBASE, RD + | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 + | cleartp LFUNC:TMP0 + | ld TAB:RB, LFUNC:TMP0->env + | add RA, BASE, RA + if (op == BC_GGET) { + | j ->BC_TGETS_Z + } else { + | j ->BC_TSETS_Z + } + break; + + case BC_TGETV: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | add RA, BASE, RA + | checktab TAB:RB, ->vmeta_tgetv + | gettp TMP3, TMP2 + | lw TMP0, TAB:RB->asize + | bne TMP3, TISNUM, >5 // Integer key? + | sext.w TMP2, TMP2 + | ld TMP1, TAB:RB->array + | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? + | slliw TMP2, TMP2, 3 + | add TMP2, TMP1, TMP2 + | ld CRET1, 0(TMP2) + | beq CRET1, TISNIL, >2 + |1: + | sd CRET1, 0(RA) + | ins_next + | + |2: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_tgetv + | + |5: + | li TMP0, LJ_TSTR + | cleartp RC, TMP2 + | bxne TMP3, TMP0, ->vmeta_tgetv // String key? + | j ->BC_TGETS_Z + break; + case BC_TGETS: + | // RA = dst*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | sub CARG3, KBASE, RC + | ld TAB:RB, 0(CARG2) + | add RA, BASE, RA + | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 + | checktab TAB:RB, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slliw TMP0, TMP1, 5 + | slliw TMP1, TMP1, 3 + | subw TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |1: + | ld CARG1, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | ld TAB:TMP3, TAB:RB->metatable + | bne CARG1, RC, >4 + | beq CARG2, TISNIL, >5 // Key found, but nil value? + |3: + | sd CARG2, 0(RA) + | ins_next + | + |4: // Follow hash chain. + | mv NODE:TMP2, NODE:TMP1 + | bnez NODE:TMP1, <1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | mv CARG2, TISNIL + | beqz TAB:TMP3, <3 // No metatable: done. + | lbu TMP0, TAB:TMP3->nomm + | andi TMP0, TMP0, 1<vmeta_tgets + break; + case BC_TGETB: + | // RA = dst*8, RB = table*8, RC = index*8 + | decode_RB8 RB, INS + | add CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | add RA, BASE, RA + | srliw TMP0, RC, 3 + | checktab TAB:RB, ->vmeta_tgetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | bxgeu TMP0, TMP1, ->vmeta_tgetb + | add RC, TMP2, RC + | ld CRET1, 0(RC) + | beq CRET1, TISNIL, >5 + |1: + | sd CRET1, 0(RA) + | ins_next + | + |5: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! + break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add RB, BASE, RB + | add RC, BASE, RC + | ld TAB:CARG1, 0(RB) + | lw CARG2, 0(RC) + | add RA, BASE, RA + | cleartp TAB:CARG1 + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | bxgeu CARG2, TMP0, ->vmeta_tgetr // In array part? + | slliw TMP2, CARG2, 3 + | add TMP3, TMP1, TMP2 + | ld TMP1, 0(TMP3) + |->BC_TGETR_Z: + | ins_next1 + | sd TMP1, 0(RA) + | ins_next2 + break; + + case BC_TSETV: + | // RA = src*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | add RA, BASE, RA + | checktab TAB:RB, ->vmeta_tsetv + | sext.w RC, TMP2 + | checkint TMP2, >5 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | bxgeu RC, TMP0, ->vmeta_tsetv // Integer key and in array part? + | slliw TMP2, RC, 3 + | add TMP1, TMP1, TMP2 + | lbu TMP3, TAB:RB->marked + | ld TMP0, 0(TMP1) + | ld CRET1, 0(RA) + | beq TMP0, TISNIL, >3 + |1: + | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) + | sd CRET1, 0(TMP1) + | bnez TMP2, >7 + |2: + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP2, TAB:TMP2->nomm + | andi TMP2, TMP2, 1<vmeta_tsetv + |5: + | gettp TMP0, TMP2 + | addi TMP0, TMP0, -LJ_TSTR + | bxnez TMP0, ->vmeta_tsetv + | cleartp STR:RC, TMP2 + | j ->BC_TSETS_Z // String key? + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETS: + | // RA = src*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | sub CARG3, KBASE, RC + | ld TAB:RB, 0(CARG2) + | ld RC, -8(CARG3) // KBASE-8-str_const*8 + | add RA, BASE, RA + | cleartp STR:RC + | checktab TAB:RB, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | sb x0, TAB:RB->nomm // Clear metamethod cache. + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | slliw TMP0, TMP1, 5 + | slliw TMP1, TMP1, 3 + | subw TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + | fld FTMP0, 0(RA) + |1: + | ld TMP0, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | lbu TMP3, TAB:RB->marked + | bne TMP0, RC, >5 + | ld TAB:TMP0, TAB:RB->metatable + | beq CARG2, TISNIL, >4 // Key found, but nil value? + |2: + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(table) + | fsd FTMP0, NODE:TMP2->val + | bnez TMP3, >7 + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | beqz TAB:TMP0, <2 // No metatable: done. + | lbu TMP0, TAB:TMP0->nomm + | andi TMP0, TMP0, 1<vmeta_tsets + | + |5: // Follow hash chain. + | mv NODE:TMP2, NODE:TMP1 + | bnez NODE:TMP1, <1 + | // End of hash chain: key not found, add a new one + | + | // But check for __newindex first. + | ld TAB:TMP2, TAB:RB->metatable + | addi CARG3, GL, offsetof(global_State, tmptv) + | beqz TAB:TMP2, >6 // No metatable: continue. + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | sd RC, 0(CARG3) + | sd BASE, L->base + | mv CARG2, TAB:RB + | sd PC, SAVE_PC(sp) + | mv CARG1, L + | // (lua_State *L, GCtab *t, TValue *k) + | call_intern BC_TSETS, lj_tab_newkey + | // Returns TValue *. + | ld BASE, L->base + | fsd FTMP0, 0(CRET1) + | j <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <3 + break; + case BC_TSETB: + | // RA = src*8, RB = table*8, RC = index*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG2, BASE, RB + | add RA, BASE, RA + | ld TAB:RB, 0(CARG2) + | srliw TMP0, RC, 3 + | checktab RB, ->vmeta_tsetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | bxgeu TMP0, TMP1, ->vmeta_tsetb + | add RC, TMP2, RC + | ld TMP1, 0(RC) + | lbu TMP3, TAB:RB->marked + | beq TMP1, TISNIL, >5 + |1: + | ld CRET1, 0(RA) + | andi TMP1, TMP3, LJ_GC_BLACK // isblack(table) + | sd CRET1, 0(RC) + | bnez TMP1, >7 + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8 RB, INS + | decode_RDtoRC8 RC, RD + | add CARG1, BASE, RB + | add CARG3, BASE, RC + | ld TAB:CARG2, 0(CARG1) + | lw CARG3, 0(CARG3) + | cleartp TAB:CARG2 + | lbu TMP3, TAB:CARG2->marked + | lw TMP0, TAB:CARG2->asize + | ld TMP1, TAB:CARG2->array + | andi TMP2, TMP3, LJ_GC_BLACK // isblack(table) + | add RA, BASE, RA + | bnez TMP2, >7 + |2: + | bxgeu CARG3, TMP0, ->vmeta_tsetr // In array part? + | slliw TMP2, CARG3, 3 + | add CRET1, TMP1, TMP2 + |->BC_TSETR_Z: + | ld TMP1, 0(RA) + | ins_next1 + | sd TMP1, 0(CRET1) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, CRET1, <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RD = num_const*8 (start index) + | add RA, BASE, RA + |1: + | add TMP3, KBASE, RD + | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. + | addiw TMP0, MULTRES, -8 + | lw TMP3, 0(TMP3) // Integer constant is in lo-word. + | srliw CARG3, TMP0, 3 + | beqz TMP0, >4 // Nothing to copy? + | cleartp TAB:CARG2 + | addw CARG3, CARG3, TMP3 + | lw TMP2, TAB:CARG2->asize + | slliw TMP1, TMP3, 3 + | lbu TMP3, TAB:CARG2->marked + | ld CARG1, TAB:CARG2->array + | bltu TMP2, CARG3, >5 + | add TMP2, RA, TMP0 + | add TMP1, TMP1, CARG1 + | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |3: // Copy result slots to table. + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | sd CRET1, 0(TMP1) + | addi TMP1, TMP1, 8 + | bltu RA, TMP2, <3 + | bnez TMP0, >7 + |4: + | ins_next + | + |5: // Need to resize array part. + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | mv BASE, RD + | mv CARG1, L + | // (lua_State *L, GCtab *t, int nasize) + | call_intern BC_TSETM, lj_tab_reasize + | // Must not reallocate the stack. + | mv RD, BASE + | ld BASE, L->base // Reload BASE for lack of a saved register. + | j <1 + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP0, <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 + | decode_RDtoRC8 NARGS8:RC, RD + | addw NARGS8:RC, NARGS8:RC, MULTRES + | j ->BC_CALL_Z + break; + case BC_CALL: + | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 + | decode_RDtoRC8 NARGS8:RC, RD + |->BC_CALL_Z: + | mv TMP2, BASE + | add BASE, BASE, RA + | ld LFUNC:RB, 0(BASE) + | addi BASE, BASE, 16 + | addiw NARGS8:RC, NARGS8:RC, -8 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs*8 + | addw NARGS8:RD, NARGS8:RD, MULTRES + | j ->BC_CALLT_Z1 + break; + case BC_CALLT: + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + |->BC_CALLT_Z1: + | add RA, BASE, RA + | ld LFUNC:RB, 0(RA) + | mv NARGS8:RC, RD + | ld TMP1, FRAME_PC(BASE) + | addi RA, RA, 16 + | addiw NARGS8:RC, NARGS8:RC, -8 + | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt + |->BC_CALLT_Z: + | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. + | lbu TMP3, LFUNC:CARG3->ffid + | xori TMP2, TMP1, FRAME_VARG + | bnez TMP0, >7 + |1: + | sd LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. + | sltiu CARG4, TMP3, 2 // (> FF_C) Calling a fast function? + | mv TMP2, BASE + | mv RB, CARG3 + | mv TMP3, NARGS8:RC + | beqz NARGS8:RC, >3 + |2: + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | addiw TMP3, TMP3, -8 + | sd CRET1, 0(TMP2) + | addi TMP2, TMP2, 8 + | bnez TMP3, <2 + |3: + | or TMP0, TMP0, CARG4 + | beqz TMP0, >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | lw INS, -4(TMP1) + | decode_RA8 RA, INS + | sub TMP1, BASE, RA + | ld TMP1, -32(TMP1) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. + | j <4 + | + |7: // Tailcall from a vararg function. + | andi CARG4, TMP2, FRAME_TYPEP + | sub TMP2, BASE, TMP2 // Relocate BASE down. + | bnez CARG4, <1 // Vararg frame below? + | mv BASE, TMP2 + | ld TMP1, FRAME_PC(TMP2) + | andi TMP0, TMP1, FRAME_TYPE + | j <1 + break; + + case BC_ITERC: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) + | mv TMP2, BASE // Save old BASE for vmeta_call. + | add BASE, BASE, RA + | ld RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. + | ld CARG1, -16(BASE) + | ld CARG2, -8(BASE) + | li NARGS8:RC, 16 // Iterators get 2 arguments. + | sd RB, 0(BASE) // Copy callable. + | sd CARG1, 16(BASE) // Copy state. + | sd CARG2, 24(BASE) // Copy control var. + | addi BASE, BASE, 16 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + |->vm_IITERN: + | add RA, BASE, RA + | ld TAB:RB, -16(RA) + | lw RC, -8(RA) // Get index from control var. + | cleartp TAB:RB + | addi PC, PC, 4 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | slli CARG3, TISNUM, 47 + |1: // Traverse array part. + | bleu TMP0, RC, >5 // Index points after array part? + | slliw TMP3, RC, 3 + | add TMP3, TMP1, TMP3 + | ld CARG1, 0(TMP3) + | lhu RD, -4+OFS_RD(PC) // ITERL RD + | or TMP2, RC, CARG3 + | addiw RC, RC, 1 + | beq CARG1, TISNIL, <1 // Skip holes in array part. + | sd TMP2, 0(RA) + | sd CARG1, 8(RA) + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | decode_BC4b RD + | add RD, RD, TMP3 + | sw RC, -8(RA) // Update control var. + | add PC, PC, RD + |3: + | ins_next + | + |5: // Traverse hash part. + | lw TMP1, TAB:RB->hmask + | subw RC, RC, TMP0 + | ld TMP2, TAB:RB->node + |6: + | bltu TMP1, RC, <3 // End of iteration? Branch to ITERL+1. + | slliw TMP3, RC, 5 + | slliw RB, RC, 3 + | subw TMP3, TMP3, RB + | add NODE:TMP3, TMP3, TMP2 // node = tab->node + (idx*32-idx*8) + | ld CARG1, 0(NODE:TMP3) + | lhu RD, -4+OFS_RD(PC) // ITERL RD + | addiw RC, RC, 1 + | beq CARG1, TISNIL, <6 // Skip holes in hash part. + | ld CARG2, NODE:TMP3->key + | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | sd CARG1, 8(RA) + | addw RC, RC, TMP0 + | decode_BC4b RD + | addw RD, RD, TMP3 + | sd CARG2, 0(RA) + | add PC, PC, RD + | sw RC, -8(RA) // Update control var. + | j <3 + break; + + case BC_ISNEXT: + | // RA = base*8, RD = target (points to ITERN) + | add RA, BASE, RA + | srliw TMP0, RD, 1 + | ld CFUNC:CARG1, -24(RA) + | add TMP0, PC, TMP0 + | ld CARG2, -16(RA) + | ld CARG3, -8(RA) + | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + | checkfunc CFUNC:CARG1, >5 + | gettp CARG2, CARG2 + | addi CARG2, CARG2, -LJ_TTAB + | lbu TMP1, CFUNC:CARG1->ffid + | addi CARG3, CARG3, -LJ_TNIL + | or TMP3, CARG2, CARG3 + | addi TMP1, TMP1, -FF_next_N + | or TMP3, TMP3, TMP1 + | lui TMP1, ((LJ_KEYINDEX - (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800)) >> 12) & 0xfffff + | bnez TMP3, >5 + | add PC, TMP0, TMP2 + | addi TMP1, TMP1, (((LJ_KEYINDEX & 0xfff)^0x800) - 0x800) + | slli TMP1, TMP1, 32 + | sd TMP1, -8(RA) + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | li TMP3, BC_JMP + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) + | add PC, TMP0, TMP2 + | sb TMP1, OFS_OP(PC) + | j <1 + break; + + case BC_VARG: + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ld TMP0, FRAME_PC(BASE) + | decode_RDtoRC8 RC, RD + | decode_RB8 RB, INS + | add RC, BASE, RC + | add RA, BASE, RA + | addi RC, RC, FRAME_VARG + | add TMP2, RA, RB + | addi TMP3, BASE, -16 // TMP3 = vtop + | sub RC, RC, TMP0 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | sub TMP1, TMP3, RC + | beqz RB, >5 // Copy all varargs? + | addi TMP2, TMP2, -16 + |1: // Copy vararg slots to destination slots. + | ld CARG1, 0(RC) + | sltu TMP0, RC, TMP3 + | addi RC, RC, 8 + | bnez TMP0, >2 + | mv CARG1, TISNIL + |2: + | sd CARG1, 0(RA) + | sltu TMP0, RA, TMP2 + | addi RA, RA, 8 + | bnez TMP0, <1 + |3: + | ins_next + | + |5: // Copy all varargs. + | ld TMP0, L->maxstack + | li MULTRES, 8 // MULTRES = (0+1)*8 + | blez TMP1, <3 // No vararg slots? + | add TMP2, RA, TMP1 + | addi MULTRES, TMP1, 8 + | bltu TMP0, TMP2, >7 + |6: + | ld CRET1, 0(RC) + | addi RC, RC, 8 + | sd CRET1, 0(RA) + | addi RA, RA, 8 + | bltu RC, TMP3, <6 // More vararg slots? + | j <3 + | + |7: // Grow stack for varargs. + | sd RA, L->top + | sub RA, RA, BASE + | sd BASE, L->base + | sub BASE, RC, BASE // Need delta, because BASE may change. + | sd PC, SAVE_PC(sp) + | srliw CARG2, TMP1, 3 + | mv CARG1, L + | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n) + | mv RC, BASE + | ld BASE, L->base + | add RA, BASE, RA + | add RC, BASE, RC + | addi TMP3, BASE, -16 + | j <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RD = extra_nresults*8 + | addw RD, RD, MULTRES + | j ->BC_RET_Z1 + break; + + case BC_RET: + | // RA = results*8, RD = (nresults+1)*8 + |->BC_RET_Z1: + | ld PC, FRAME_PC(BASE) + | add RA, BASE, RA + | mv MULTRES, RD + |1: + | andi TMP0, PC, FRAME_TYPE + | xori TMP1, PC, FRAME_VARG + | bnez TMP0, ->BC_RETV_Z + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return + | lw INS, -4(PC) + | addi TMP2, BASE, -16 + | addi RC, RD, -8 + | decode_RA8 TMP0, INS + | decode_RB8 RB, INS + | sub BASE, TMP2, TMP0 + | add TMP3, TMP2, RB + | beqz RC, >3 + |2: + | ld CRET1, 0(RA) + | addi RA, RA, 8 + | addi RC, RC, -8 + | sd CRET1, 0(TMP2) + | addi TMP2, TMP2, 8 + | bnez RC, <2 + |3: + | addi TMP3, TMP3, -8 + |5: + | bltu TMP2, TMP3, >6 + | ld LFUNC:TMP1, FRAME_FUNC(BASE) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next + | + |6: // Fill up results with nil. + | sd TISNIL, 0(TMP2) + | addi TMP2, TMP2, 8 + | j <5 + | + |->BC_RETV_Z: // Non-standard return case. + | andi TMP2, TMP1, FRAME_TYPEP + | bxnez TMP2, ->vm_return + | // Return from vararg function: relocate BASE down. + | sub BASE, BASE, TMP1 + | ld PC, FRAME_PC(BASE) + | j <1 + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | add RA, BASE, RA + | mv MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | xori TMP1, PC, FRAME_VARG + | bnez TMP0, ->BC_RETV_Z + | lw INS, -4(PC) + | addi TMP2, BASE, -16 + if (op == BC_RET1) { + | ld CRET1, 0(RA) + } + | decode_RB8 RB, INS + | decode_RA8 RA, INS + | sub BASE, TMP2, RA + if (op == BC_RET1) { + | sd CRET1, 0(TMP2) + } + |5: + | bltu RD, RB, >6 + | ld TMP1, FRAME_FUNC(BASE) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ins_next1 + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | addi TMP2, TMP2, 8 + | addi RD, RD, 8 + if (op == BC_RET1) { + | sd TISNIL, 0(TMP2) + } else { + | sd TISNIL, -8(TMP2) + } + | j <5 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORL: + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RD = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | add RA, BASE, RA + | ld CARG1, FORL_IDX*8(RA) // CARG1 = IDX + | ld CARG2, FORL_STEP*8(RA) // CARG2 = STEP + | ld CARG3, FORL_STOP*8(RA) // CARG3 = STOP + | gettp CARG4, CARG1 + | gettp CARG5, CARG2 + | gettp CARG6, CARG3 + if (op != BC_JFORL) { + | srliw RD, RD, 1 + | lui TMP2, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J<<2 + | add TMP2, RD, TMP2 + } + | bne CARG4, TISNUM, >3 + | sext.w CARG4, CARG1 // start + | sext.w CARG3, CARG3 // stop + if (!vk) { // init + | bxne CARG6, TISNUM, ->vmeta_for + | bxne CARG5, TISNUM, ->vmeta_for + | bfextri TMP0, CARG2, 31, 31 // sign + | slt CARG2, CARG3, CARG4 + | slt TMP1, CARG4, CARG3 + | neg TMP4, TMP0 + | xor TMP0, TMP1, CARG2 // CARG2 = TMP0 ? TMP1 : CARG2 + | and TMP0, TMP0, TMP4 + | xor CARG2, CARG2, TMP0 // CARG2=0: +,start <= stop or -,start >= stop + } else { + | sext.w CARG5, CARG2 // step + | addw CARG1, CARG4, CARG5 // start + step + | xor TMP3, CARG1, CARG4 // y^a + | xor TMP1, CARG1, CARG5 // y^b + | and TMP3, TMP3, TMP1 + | slt TMP1, CARG1, CARG3 // start+step < stop ? + | slt CARG3, CARG3, CARG1 // stop < start+step ? + | sltz TMP0, CARG5 // step < 0 ? + | sltz TMP3, TMP3 // ((y^a) & (y^b)) < 0: overflow. + | neg TMP4, TMP0 + | xor TMP1, TMP1, CARG3 // CARG3 = TMP0 ? TMP1 : CARG3 + | and TMP1, TMP1, TMP4 + | xor CARG3, CARG3, TMP1 + | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue + | zext.w CARG1, CARG1 + | settp_b CARG1, TISNUM + | sd CARG1, FORL_IDX*8(RA) + } + |1: + if (op == BC_FORI) { + | neg TMP4, CARG2 // CARG2!=0: jump out the loop; CARG2==0: next INS + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + } else if (op == BC_JFORI) { + | add PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + | addi TMP4, CARG2, -1 // CARG2!=0: next INS; CARG2==0: jump back + | and TMP2, TMP2, TMP4 + | add PC, PC, TMP2 + } + | ins_next1 + | sd CARG1, FORL_EXT*8(RA) + |2: + if (op == BC_JFORI) { + | decode_RD8b RD + | beqz CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop + } else if (op == BC_JFORL) { + | beqz CARG2, =>BC_JLOOP + } + | ins_next2 + | + |3: // FP loop. + | fld FTMP0, FORL_IDX*8(RA) // start + | fld FTMP1, FORL_STOP*8(RA) // stop + | ld TMP0, FORL_STEP*8(RA) // step + | sltz CARG2, TMP0 // step < 0 ? + | neg CARG2, CARG2 + if (!vk) { + | sltiu TMP3, CARG4, LJ_TISNUM // start is number ? + | sltiu TMP0, CARG5, LJ_TISNUM // step is number ? + | sltiu TMP1, CARG6, LJ_TISNUM // stop is number ? + | and TMP3, TMP3, TMP1 + | and TMP0, TMP0, TMP3 + | bxeqz TMP0, ->vmeta_for // if start or step or stop isn't number + | flt.d TMP3, FTMP0, FTMP1 // start < stop ? + | flt.d TMP4, FTMP1, FTMP0 // stop < start ? + | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 + | and TMP0, TMP0, CARG2 + | xor CARG2, TMP4, TMP0 // CARG2=0:+,startstop + | j <1 + } else { + | fld FTMP3, FORL_STEP*8(RA) + | fadd.d FTMP0, FTMP0, FTMP3 // start + step + | flt.d TMP3, FTMP0, FTMP1 // start + step < stop ? + | flt.d TMP4, FTMP1, FTMP0 + | xor TMP0, TMP3, TMP4 // CARG2 = CARG2 ? TMP3 : TMP4 + | and TMP0, TMP0, CARG2 + | xor CARG2, TMP4, TMP0 + if (op == BC_IFORL) { + | addi TMP3, CARG2, -1 + | and TMP2, TMP2, TMP3 + | add PC, PC, TMP2 + } + | fsd FTMP0, FORL_IDX*8(RA) + | ins_next1 + | fsd FTMP0, FORL_EXT*8(RA) + | j <2 + } + break; + + case BC_ITERL: + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RD = target + | add RA, BASE, RA + | ld TMP1, 0(RA) + | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | sd TMP1,-8(RA) + | j =>BC_JLOOP + } else { + | branch_RD // Otherwise save control var + branch. + | sd TMP1, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RD = target + | branch_RD // PC + (jump - 0x8000)<<2 + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | ld TMP2, L->maxstack + | lbu TMP1, -4+PC2PROTO(numparams)(PC) + | ld KBASE, -4+PC2PROTO(k)(PC) + | bxltu TMP2, RA, ->vm_growstack_l + | slliw TMP1, TMP1, 3 // numparams*8 + |2: + | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters. + if (op == BC_JFUNCF) { + | decode_RD8 RD, INS + | j =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | add TMP0, BASE, NARGS8:RC + | sd TISNIL, 0(TMP0) + | addiw NARGS8:RC, NARGS8:RC, 8 + | j <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | li TMP0, LJ_TFUNC + | add TMP1, BASE, RC + | ld TMP2, L->maxstack + | settp LFUNC:RB, TMP0 + | add TMP0, RA, RC + | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | addi TMP2, TMP2, -8 + | addi TMP3, RC, 16+FRAME_VARG + | ld KBASE, -4+PC2PROTO(k)(PC) + | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. + | bxgeu TMP0, TMP2, ->vm_growstack_l + | lbu TMP2, -4+PC2PROTO(numparams)(PC) + | mv RA, BASE + | mv RC, TMP1 + | ins_next1 + | addi BASE, TMP1, 16 + | beqz TMP2, >2 + |1: + | ld TMP0, 0(RA) + | sltu CARG2, RA, RC // Less args than parameters? + | addi RA, RA, 8 + | addi TMP1, TMP1, 8 + | addiw TMP2, TMP2, -1 + | beqz CARG2, >3 + | neg TMP4, CARG2 // Clear old fixarg slot (help the GC). + | xor TMP3, TISNIL, TMP0 // CARG1 = CARG2 ? TISNIL : TMP0 + | and TMP3, TMP3, TMP4 + | xor CARG1, TMP0, TMP3 + | sd CARG1, -8(RA) + | sd TMP0, 8(TMP1) + | bnez TMP2, <1 + |2: + | ins_next2 + |3: + | neg TMP4, CARG2 // Clear missing fixargs. + | xor TMP3, TMP0, TISNIL // TMP0 = CARG2 ? TMP0 : TISNIL + | and TMP3, TMP3, TMP4 + | xor TMP0, TISNIL, TMP3 + | sd TMP0, 8(TMP1) + | bnez TMP2, <1 + | j <2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ld CARG4, CFUNC:RB->f + } else { + | ld CARG4, GL->wrapf + } + | add TMP1, RA, NARGS8:RC + | ld TMP2, L->maxstack + | add RC, BASE, NARGS8:RC + | sd BASE, L->base // base of currently excuting function + | sd RC, L->top + | bxgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. + | li_vmstate C // li TMP0, ~LJ_VMST_C + if (op == BC_FUNCCW) { + | ld CARG2, CFUNC:RB->f + } + | mv CARG1, L + | st_vmstate // sw TMP0, GL->vmstate + | jalr CARG4 // (lua_State *L [, lua_CFunction f]) + | // Returns nresults. + | ld BASE, L->base + | ld TMP1, L->top + | sd L, GL->cur_L + | slliw RD, CRET1, 3 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of caller. + | sub RA, TMP1, RD // RA = L->top - nresults*8 + | st_vmstate + | j ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + +} From eb87e6fd6baef71bcfcdce4a3f5740e3a29e038e Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:19:25 +0800 Subject: [PATCH 07/22] riscv(support): add target definition --- src/lj_target.h | 4 +- src/lj_target_riscv.h | 542 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 545 insertions(+), 1 deletion(-) create mode 100644 src/lj_target_riscv.h diff --git a/src/lj_target.h b/src/lj_target.h index d00554d4b..a79f5d6a0 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -55,7 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 typedef uint64_t RegSet; #define RSET_BITS 6 #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) @@ -145,6 +145,8 @@ typedef uint32_t RegCost; #include "lj_target_mips.h" #elif LJ_TARGET_S390X #include "lj_target_s390x.h" +#elif LJ_TARGET_RISCV64 +#include "lj_target_riscv.h" #else #error "Missing include for target CPU" #endif diff --git a/src/lj_target_riscv.h b/src/lj_target_riscv.h new file mode 100644 index 000000000..22948dc5a --- /dev/null +++ b/src/lj_target_riscv.h @@ -0,0 +1,542 @@ +/* +** Definitions for RISC-V CPUs. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_RISCV_H +#define _LJ_TARGET_RISCV_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#define GPRDEF(_) \ + _(X0) _(RA) _(SP) _(X3) _(X4) _(X5) _(X6) _(X7) \ + _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ + _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ + _(X24) _(X25) _(X26) _(X27) _(X28) _(X29) _(X30) _(X31) +#define FPRDEF(_) \ + _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ + _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ + _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ + _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) +#define VRIDDEF(_) + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + RID_ZERO = RID_X0, + RID_TMP = RID_RA, + RID_GP = RID_X3, + RID_TP = RID_X4, + + /* Calling conventions. */ + RID_RET = RID_X10, + RID_RETLO = RID_X10, + RID_RETHI = RID_X11, + RID_FPRET = RID_F10, + RID_CFUNCADDR = RID_X5, + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_X18, /* Interpreter BASE. */ + RID_LPC = RID_X20, /* Interpreter PC. */ + RID_GL = RID_X21, /* Interpreter GL. */ + RID_LREG = RID_X23, /* Interpreter L. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_X0, + RID_MAX_GPR = RID_X31+1, + RID_MIN_FPR = RID_MAX_GPR, + RID_MAX_FPR = RID_F31+1, + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ +}; + +#define RID_NUM_KREF RID_NUM_GPR +#define RID_MIN_KREF RID_X0 + +/* -- Register sets ------------------------------------------------------- */ + +/* Make use of all registers, except ZERO, TMP, SP, GP, TP, CFUNCADDR and GL. */ +#define RSET_FIXED \ + (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ + RID2RSET(RID_GP)|RID2RSET(RID_TP)|RID2RSET(RID_GL)) +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) +#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) + +#define RSET_ALL (RSET_GPR|RSET_FPR) +#define RSET_INIT RSET_ALL + +#define RSET_SCRATCH_GPR \ + (RSET_RANGE(RID_X5, RID_X7+1)|RSET_RANGE(RID_X28, RID_X31+1)|\ + RSET_RANGE(RID_X10, RID_X17+1)) + +#define RSET_SCRATCH_FPR \ + (RSET_RANGE(RID_F0, RID_F7+1)|RSET_RANGE(RID_F10, RID_F17+1)|\ + RSET_RANGE(RID_F28, RID_F31+1)) +#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) + +#define REGARG_FIRSTGPR RID_X10 +#define REGARG_LASTGPR RID_X17 +#define REGARG_NUMGPR 8 + +#define REGARG_FIRSTFPR RID_F10 +#define REGARG_LASTFPR RID_F17 +#define REGARG_NUMFPR 8 + +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the *.dasc file(s). +** +** SPS_FIRST: First spill slot for general use. +*/ +#if LJ_32 +#define SPS_FIXED 5 +#else +#define SPS_FIXED 4 +#endif +#define SPS_FIRST 4 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) + +/* -- Exit state ---------------------------------------------------------- */ +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +/* Highest exit + 1 indicates stack check. */ +#define EXITSTATE_CHECKEXIT 1 + +/* Return the address of a per-trace exit stub. */ +static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) +{ + while (*p == 0x00000013) p++; /* Skip RISCVI_NOP. */ + return p + 4 + exitno; +} +/* Avoid dependence on lj_jit.h if only including lj_target.h. */ +#define exitstub_trace_addr(T, exitno) \ + exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) + +/* -- Instructions -------------------------------------------------------- */ + +/* Instruction fields. */ +#define RISCVF_D(d) (((d)&31) << 7) +#define RISCVF_S1(r) (((r)&31) << 15) +#define RISCVF_S2(r) (((r)&31) << 20) +#define RISCVF_S3(r) (((r)&31) << 27) +#define RISCVF_FUNCT2(f) (((f)&3) << 25) +#define RISCVF_FUNCT3(f) (((f)&7) << 12) +#define RISCVF_FUNCT7(f) (((f)&127) << 25) +#define RISCVF_SHAMT(s) ((s) << 20) +#define RISCVF_RM(m) (((m)&7) << 12) +#define RISCVF_IMMI(i) ((i) << 20) +#define RISCVF_IMMS(i) (((i)&0xfe0) << 20 | ((i)&0x1f) << 7) +#define RISCVF_IMMB(i) (((i)&0x1000) << 19 | ((i)&0x800) >> 4 | ((i)&0x7e0) << 20 | ((i)&0x1e) << 7) +#define RISCVF_IMMU(i) (((i)&0xfffff) << 12) +#define RISCVF_IMMJ(i) (((i)&0x100000) << 11 | ((i)&0xff000) | ((i)&0x800) << 9 | ((i)&0x7fe) << 20) + +/* Encode helpers. */ +#define RISCVF_W_HI(w) ((w) - ((((w)&0xfff)^0x800) - 0x800)) +#define RISCVF_W_LO(w) ((w)&0xfff) +#define RISCVF_HI(i) ((RISCVF_W_HI(i) >> 12) & 0xfffff) +#define RISCVF_LO(i) RISCVF_W_LO(i) + +/* Check for valid field range. */ +#define RISCVF_SIMM_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) +#define RISCVF_UIMM_OK(x, b) (((x) >> (b)) == 0) +#define checku11(i) RISCVF_UIMM_OK(i, 11) +#define checki12(i) RISCVF_SIMM_OK(i, 12) +#define checki13(i) RISCVF_SIMM_OK(i, 13) +#define checki20(i) RISCVF_SIMM_OK(i, 20) +#define checki21(i) RISCVF_SIMM_OK(i, 21) +#define checki32auipc(i) (checki32(i) && (int32_t)(i) < 0x7ffff800) + +typedef enum RISCVIns { + + /* --- RVI --- */ + RISCVI_LUI = 0x00000037, + RISCVI_AUIPC = 0x00000017, + + RISCVI_JAL = 0x0000006f, + RISCVI_JALR = 0x00000067, + + RISCVI_ADDI = 0x00000013, + RISCVI_SLTI = 0x00002013, + RISCVI_SLTIU = 0x00003013, + RISCVI_XORI = 0x00004013, + RISCVI_ORI = 0x00006013, + RISCVI_ANDI = 0x00007013, + + RISCVI_SLLI = 0x00001013, + RISCVI_SRLI = 0x00005013, + RISCVI_SRAI = 0x40005013, + + RISCVI_ADD = 0x00000033, + RISCVI_SUB = 0x40000033, + RISCVI_SLL = 0x00001033, + RISCVI_SLT = 0x00002033, + RISCVI_SLTU = 0x00003033, + RISCVI_XOR = 0x00004033, + RISCVI_SRL = 0x00005033, + RISCVI_SRA = 0x40005033, + RISCVI_OR = 0x00006033, + RISCVI_AND = 0x00007033, + + RISCVI_LB = 0x00000003, + RISCVI_LH = 0x00001003, + RISCVI_LW = 0x00002003, + RISCVI_LBU = 0x00004003, + RISCVI_LHU = 0x00005003, + RISCVI_SB = 0x00000023, + RISCVI_SH = 0x00001023, + RISCVI_SW = 0x00002023, + + RISCVI_BEQ = 0x00000063, + RISCVI_BNE = 0x00001063, + RISCVI_BLT = 0x00004063, + RISCVI_BGE = 0x00005063, + RISCVI_BLTU = 0x00006063, + RISCVI_BGEU = 0x00007063, + + RISCVI_ECALL = 0x00000073, + RISCVI_EBREAK = 0x00100073, + + RISCVI_NOP = 0x00000013, + RISCVI_MV = 0x00000013, + RISCVI_NOT = 0xfff04013, + RISCVI_NEG = 0x40000033, + RISCVI_RET = 0x00008067, + RISCVI_ZEXT_B = 0x0ff07013, + +#if LJ_TARGET_RISCV64 + RISCVI_LWU = 0x00007003, + RISCVI_LD = 0x00003003, + RISCVI_SD = 0x00003023, + + RISCVI_ADDIW = 0x0000001b, + + RISCVI_SLLIW = 0x0000101b, + RISCVI_SRLIW = 0x0000501b, + RISCVI_SRAIW = 0x4000501b, + + RISCVI_ADDW = 0x0000003b, + RISCVI_SUBW = 0x4000003b, + RISCVI_SLLW = 0x0000103b, + RISCVI_SRLW = 0x0000503b, + RISCVI_SRAW = 0x4000503b, + + RISCVI_NEGW = 0x4000003b, + RISCVI_SEXT_W = 0x0000001b, +#endif + + /* --- RVM --- */ + RISCVI_MUL = 0x02000033, + RISCVI_MULH = 0x02001033, + RISCVI_MULHSU = 0x02002033, + RISCVI_MULHU = 0x02003033, + RISCVI_DIV = 0x02004033, + RISCVI_DIVU = 0x02005033, + RISCVI_REM = 0x02006033, + RISCVI_REMU = 0x02007033, +#if LJ_TARGET_RISCV64 + RISCVI_MULW = 0x0200003b, + RISCVI_DIVW = 0x0200403b, + RISCVI_DIVUW = 0x0200503b, + RISCVI_REMW = 0x0200603b, + RISCVI_REMUW = 0x0200703b, +#endif + + /* --- RVF --- */ + RISCVI_FLW = 0x00002007, + RISCVI_FSW = 0x00002027, + + RISCVI_FMADD_S = 0x00000043, + RISCVI_FMSUB_S = 0x00000047, + RISCVI_FNMSUB_S = 0x0000004b, + RISCVI_FNMADD_S = 0x0000004f, + + RISCVI_FADD_S = 0x00000053, + RISCVI_FSUB_S = 0x08000053, + RISCVI_FMUL_S = 0x10000053, + RISCVI_FDIV_S = 0x18000053, + RISCVI_FSQRT_S = 0x58000053, + + RISCVI_FSGNJ_S = 0x20000053, + RISCVI_FSGNJN_S = 0x20001053, + RISCVI_FSGNJX_S = 0x20002053, + + RISCVI_FMIN_S = 0x28000053, + RISCVI_FMAX_S = 0x28001053, + + RISCVI_FCVT_W_S = 0xc0000053, + RISCVI_FCVT_WU_S = 0xc0100053, + + RISCVI_FMV_X_W = 0xe0000053, + + RISCVI_FEQ_S = 0xa0002053, + RISCVI_FLT_S = 0xa0001053, + RISCVI_FLE_S = 0xa0000053, + + RISCVI_FCLASS_S = 0xe0001053, + + RISCVI_FCVT_S_W = 0xd0000053, + RISCVI_FCVT_S_WU = 0xd0100053, + RISCVI_FMV_W_X = 0xf0000053, + + RISCVI_FMV_S = 0x20000053, + RISCVI_FNEG_S = 0x20001053, + RISCVI_FABS_S = 0x20002053, +#if LJ_TARGET_RISCV64 + RISCVI_FCVT_L_S = 0xc0200053, + RISCVI_FCVT_LU_S = 0xc0300053, + RISCVI_FCVT_S_L = 0xd0200053, + RISCVI_FCVT_S_LU = 0xd0300053, +#endif + + /* --- RVD --- */ + RISCVI_FLD = 0x00003007, + RISCVI_FSD = 0x00003027, + + RISCVI_FMADD_D = 0x02000043, + RISCVI_FMSUB_D = 0x02000047, + RISCVI_FNMSUB_D = 0x0200004b, + RISCVI_FNMADD_D = 0x0200004f, + + RISCVI_FADD_D = 0x02000053, + RISCVI_FSUB_D = 0x0a000053, + RISCVI_FMUL_D = 0x12000053, + RISCVI_FDIV_D = 0x1a000053, + RISCVI_FSQRT_D = 0x5a000053, + + RISCVI_FSGNJ_D = 0x22000053, + RISCVI_FSGNJN_D = 0x22001053, + RISCVI_FSGNJX_D = 0x22002053, + + RISCVI_FMIN_D = 0x2a000053, + RISCVI_FMAX_D = 0x2a001053, + + RISCVI_FCVT_S_D = 0x40100053, + RISCVI_FCVT_D_S = 0x42000053, + + RISCVI_FEQ_D = 0xa2002053, + RISCVI_FLT_D = 0xa2001053, + RISCVI_FLE_D = 0xa2000053, + + RISCVI_FCLASS_D = 0xe2001053, + + RISCVI_FCVT_W_D = 0xc2000053, + RISCVI_FCVT_WU_D = 0xc2100053, + RISCVI_FCVT_D_W = 0xd2000053, + RISCVI_FCVT_D_WU = 0xd2100053, + + RISCVI_FMV_D = 0x22000053, + RISCVI_FNEG_D = 0x22001053, + RISCVI_FABS_D = 0x22002053, +#if LJ_TARGET_RISCV64 + RISCVI_FCVT_L_D = 0xc2200053, + RISCVI_FCVT_LU_D = 0xc2300053, + RISCVI_FMV_X_D = 0xe2000053, + RISCVI_FCVT_D_L = 0xd2200053, + RISCVI_FCVT_D_LU = 0xd2300053, + RISCVI_FMV_D_X = 0xf2000053, +#endif + + /* --- Zifencei --- */ + RISCVI_FENCE = 0x0000000f, + RISCVI_FENCE_I = 0x0000100f, + + /* --- Zicsr --- */ + RISCVI_CSRRW = 0x00001073, + RISCVI_CSRRS = 0x00002073, + RISCVI_CSRRC = 0x00003073, + RISCVI_CSRRWI = 0x00005073, + RISCVI_CSRRSI = 0x00006073, + RISCVI_CSRRCI = 0x00007073, + + /* --- RVB --- */ + /* Zba */ + RISCVI_SH1ADD = 0x20002033, + RISCVI_SH2ADD = 0x20004033, + RISCVI_SH3ADD = 0x20006033, +#if LJ_TARGET_RISCV64 + RISCVI_ADD_UW = 0x0800003b, + + RISCVI_SH1ADD_UW = 0x2000203b, + RISCVI_SH2ADD_UW = 0x2000403b, + RISCVI_SH3ADD_UW = 0x2000603b, + + RISCVI_SLLI_UW = 0x0800101b, + + RISCVI_ZEXT_W = 0x0800003b, +#endif + /* Zbb */ + RISCVI_ANDN = 0x40007033, + RISCVI_ORN = 0x40006033, + RISCVI_XNOR = 0x40004033, + + RISCVI_CLZ = 0x60001013, + RISCVI_CTZ = 0x60101013, + + RISCVI_CPOP = 0x60201013, + + RISCVI_MAX = 0x0a006033, + RISCVI_MAXU = 0x0a007033, + RISCVI_MIN = 0x0a004033, + RISCVI_MINU = 0x0a005033, + + RISCVI_SEXT_B = 0x60401013, + RISCVI_SEXT_H = 0x60501013, +#if LJ_TARGET_RISCV64 + RISCVI_ZEXT_H = 0x0800403b, +#endif + + RISCVI_ROL = 0x60001033, + RISCVI_ROR = 0x60005033, + RISCVI_RORI = 0x60005013, + + RISCVI_ORC_B = 0x28705013, + +#if LJ_TARGET_RISCV64 + RISCVI_REV8 = 0x6b805013, + + RISCVI_CLZW = 0x6000101b, + RISCVI_CTZW = 0x6010101b, + + RISCVI_CPOPW = 0x6020101b, + + RISCVI_ROLW = 0x6000103b, + RISCVI_RORIW = 0x6000501b, + RISCVI_RORW = 0x6000503b, +#endif + /* NYI: Zbc, Zbs */ + + /* --- Zicond --- */ + RISCVI_CZERO_EQZ = 0x0e005033, + RISCVI_CZERO_NEZ = 0x0e007033, + + /* --- Zfa --- */ + RISCVI_FLI_S = 0xf0100053, + RISCVI_FMINM_S = 0x28002053, + RISCVI_FMAXM_S = 0x28003053, + RISCVI_FROUND_S = 0x40400053, + RISCVI_FROUNDNX_S = 0x40500053, + RISCVI_FCVTMOD_W_D = 0xc2801053, + RISCVI_FLEQ_S = 0xa0004053, + RISCVI_FLTQ_S = 0xa0005053, + RISCVI_FLI_D = 0xf2100053, + RISCVI_FMINM_D = 0x2a002053, + RISCVI_FMAXM_D = 0x2a003053, + RISCVI_FROUND_D = 0x42400053, + RISCVI_FROUNDNX_D = 0x42500053, + RISCVI_FLEQ_D = 0xa2004053, + RISCVI_FLTQ_D = 0xa2005053, + + RISCVI_FROUND_S_RTZ = 0x40401053, + RISCVI_FROUND_S_RDN = 0x40402053, + RISCVI_FROUND_S_RUP = 0x40403053, + RISCVI_FROUNDNX_S_RTZ = 0x40501053, + RISCVI_FROUNDNX_S_RDN = 0x40502053, + RISCVI_FROUNDNX_S_RUP = 0x40503053, + RISCVI_FROUND_D_RTZ = 0x42401053, + RISCVI_FROUND_D_RDN = 0x42402053, + RISCVI_FROUND_D_RUP = 0x42403053, + RISCVI_FROUNDNX_D_RTZ = 0x42501053, + RISCVI_FROUNDNX_D_RDN = 0x42502053, + RISCVI_FROUNDNX_D_RUP = 0x42503053, + + /* TBD: RVV?, RVP?, RVJ? */ + + /* --- XThead* --- */ + /* XTHeadBa */ + RISCVI_TH_ADDSL = 0x0000100b, + + /* XTHeadBb */ + RISCVI_TH_SRRI = 0x1000100b, +#if LJ_TARGET_RISCV64 + RISCVI_TH_SRRIW = 0x1400100b, +#endif + RISCVI_TH_EXT = 0x0000200b, + RISCVI_TH_EXTU = 0x0000300b, + RISCVI_TH_FF0 = 0x8400100b, + RISCVI_TH_FF1 = 0x8600100b, + RISCVI_TH_REV = 0x8200100b, +#if LJ_TARGET_RISCV64 + RISCVI_TH_REVW = 0x9000100b, +#endif + RISCVI_TH_TSTNBZ = 0x8000100b, + + /* XTHeadBs */ + RISCVI_TH_TST = 0x8800100b, + + /* XTHeadCondMov */ + RISCVI_TH_MVEQZ = 0x4000100b, + RISCVI_TH_MVNEZ = 0x4200100b, + + /* XTHeadMac */ + RISCVI_TH_MULA = 0x2000100b, + RISCVI_TH_MULAH = 0x2800100b, +#if LJ_TARGET_RISCV64 + RISCVI_TH_MULAW = 0x2400100b, +#endif + RISCVI_TH_MULS = 0x2200100b, + RISCVI_TH_MULSH = 0x2a00100b, + RISCVI_TH_MULSW = 0x2600100b, + + /* NYI: XTHeadMemIdx, XTHeadFMemIdx, XTHeadMemPair */ +} RISCVIns; + +typedef enum RISCVRM { + RISCVRM_RNE = 0, + RISCVRM_RTZ = 1, + RISCVRM_RDN = 2, + RISCVRM_RUP = 3, + RISCVRM_RMM = 4, + RISCVRM_DYN = 7, +} RISCVRM; + +static const uint16_t riscv_fli_map_hi16[32] = { + 0xbff0u, // -1 + 0x0010u, // min + 0x3ef0u, // 2^-16 + 0x3f00u, // 2^-15 + 0x3f70u, // 2^-8 + 0x3f80u, // 2^-7 + 0x3fb0u, // 2^-4 + 0x3fc0u, // 2^-3, 0.125 + 0x3fd0u, // 2^-2, 0.25 + 0x3fd4u, // 0.3125 + 0x3fd8u, // 0.375 + 0x3fdcu, // 0.4375 + 0x3fe0u, // 0.5 + 0x3fe4u, // 0.625 + 0x3fe8u, // 0.75 + 0x3fecu, // 0.875 + 0x3ff0u, // 1 + 0x3ff4u, // 1.25 + 0x3ff8u, // 1.5 + 0x3ffcu, // 1.75 + 0x4000u, // 2 + 0x4004u, // 2.5 + 0x4008u, // 3 + 0x4010u, // 4 + 0x4020u, // 8 + 0x4030u, // 16 + 0x4060u, // 128 + 0x4070u, // 256 + 0x40e0u, // 2^15, 32768 + 0x40f0u, // 2^16, 65536 + 0x7ff0u, // inf + 0x7ff8u, // canonical nan +}; + +#endif From ad11ee89948b74fce6f1534ec25648397bb78347 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:21:02 +0800 Subject: [PATCH 08/22] riscv(ffi): add call convention and support framework --- src/lj_ccall.c | 156 +++++++++++++++++++++++++++++++++- src/lj_ccall.h | 17 +++- src/lj_ccallback.c | 64 +++++++++++++- src/vm_riscv64.dasc | 202 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 434 insertions(+), 5 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index c613db2cc..e4bed4f84 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -687,6 +687,97 @@ if (ngpr < maxgpr) { dp = &cc->gpr[ngpr++]; goto done; } \ } +#elif LJ_TARGET_RISCV64 +/* -- RISC-V lp64d calling conventions ------------------------------------ */ + +#define CCALL_HANDLE_STRUCTRET \ + /* Return structs of size > 16 by reference. */ \ + cc->retref = !(sz <= 16); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + unsigned int cl = ccall_classify_struct(cts, ctr); \ + if ((cl & 4) && (cl >> 8) <= 2) { \ + CTSize i = (cl >> 8) - 1; \ + do { ((float *)dp)[i] = cc->fpr[i].f; } while (i--); \ + } else { \ + if (cl > 1) { \ + sp = (uint8_t *)&cc->fpr[0]; \ + if ((cl >> 8) > 2) \ + sp = (uint8_t *)&cc->gpr[0]; \ + } \ + memcpy(dp, sp, ctr->size); \ + } \ + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 1 or 2 FPRs. */ \ + cc->retref = 0; + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass long double complex by reference. */ \ + if (sz == 2*sizeof(long double)) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass complex in two FPRs or on stack. */ \ + else if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + sz = 2*CTSIZE_PTR; \ + } else { \ + isfp = 1; \ + sz = 2*CTSIZE_PTR; \ + } + +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass structs of size >16 by reference. */ \ + unsigned int cl = ccall_classify_struct(cts, d); \ + nff = cl >> 8; \ + if (sz > 16) { \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } \ + /* Pass struct in FPRs. */ \ + if (cl > 1) { \ + isfp = (cl & 4) ? 2 : 1; \ + } + + +#define CCALL_HANDLE_REGARG \ + if (isfp && (!isva)) { /* Try to pass argument in FPRs. */ \ + int n2 = ctype_isvector(d->info) ? 1 : \ + isfp == 1 ? n : 2; \ + if (nfpr + n2 <= CCALL_NARG_FPR && nff <= 2) { \ + dp = &cc->fpr[nfpr]; \ + nfpr += n2; \ + goto done; \ + } else { \ + if (ngpr + n2 <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n2; \ + goto done; \ + } \ + } \ + } else { /* Try to pass argument in GPRs. */ \ + if (ngpr + n <= maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -1047,6 +1138,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, #endif +/* -- RISC-V ABI struct classification ---------------------------- */ + +#if LJ_TARGET_RISCV64 + +static unsigned int ccall_classify_struct(CTState *cts, CType *ct) +{ + CTSize sz = ct->size; + unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + r |= sct->size; + if (!isu) n++; else if (n == 0) n = 1; + } else if (ctype_iscomplex(sct->info)) { + r |= (sct->size >> 1); + if (!isu) n += 2; else if (n < 2) n = 2; + } else if (ctype_isstruct(sct->info)) { + goto substruct; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info)) { + goto noth; + } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + sct = ctype_rawchild(cts, ct); + substruct: + if (sct->size > 0) { + unsigned int s = ccall_classify_struct(cts, sct); + if (s <= 1) goto noth; + r |= (s & 255); + if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + } + } + } + if ((r == 4 || r == 8) && n <= 4) + return r + (n << 8); +noth: /* Not a homogeneous float/double aggregate. */ + return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ +} + +#endif + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -1093,6 +1229,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #endif #endif +#if LJ_TARGET_RISCV64 + int nff = 0; +#endif + /* Clear unused regs to get some determinism in case of misdeclaration. */ memset(cc->gpr, 0, sizeof(cc->gpr)); #if CCALL_NUM_FPR @@ -1282,7 +1422,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int64_t *)dp = (int64_t)*(int32_t *)dp; } #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_RISCV64 + if (isfp && d->size == sizeof(float)) + ((uint32_t *)dp)[1] = 0xffffffffu; /* Float NaN boxing */ +#endif +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 || (isfp && nsp == 0) @@ -1322,6 +1466,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CTSize i = (sz >> 2) - 1; do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); } +#elif LJ_TARGET_RISCV64 + if (isfp == 2 && nff <= 2) { + /* Split complex float into separate registers. */ + CTSize i = (sz >> 2) - 1; + do { + ((uint64_t *)dp)[i] = 0xffffffff00000000ul | ((uint32_t *)dp)[i]; + } while (i--); + } #else UNUSED(isfp); #endif @@ -1331,7 +1483,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if ((int32_t)nsp < 0) nsp = 0; #endif -#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 cc->nfpr = nfpr; /* Required for vararg functions. */ #endif cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); diff --git a/src/lj_ccall.h b/src/lj_ccall.h index 38d35dc52..609effa0b 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -157,6 +157,21 @@ typedef union FPRArg { float f; } FPRArg; +#elif LJ_TARGET_RISCV64 + +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR 8 +#define CCALL_NRET_GPR 2 +#define CCALL_NRET_FPR 2 +#define CCALL_SPS_EXTRA 3 +#define CCALL_SPS_FREE 1 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; +} FPRArg; + #else #error "Missing calling convention definitions for this architecture" #endif @@ -204,7 +219,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ #elif LJ_TARGET_ARM64 void *retp; /* Aggregate return pointer in x8. */ -#elif LJ_TARGET_PPC +#elif LJ_TARGET_PPC || LJ_TARGET_RISCV64 uint8_t nfpr; /* Number of arguments in FPRs. */ #endif #if LJ_32 diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index ae0934587..ef9c13ffc 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -91,6 +91,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #define CALLBACK_MCODE_HEAD 52 +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_MCODE_HEAD 68 + #else /* Missing support for this architecture. */ @@ -293,6 +297,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) } return p; } +#elif LJ_TARGET_RISCV64 +static void *callback_mcode_init(global_State *g, uint32_t *page) +{ + uint32_t *p = page; + uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; + uintptr_t ug = (uintptr_t)(void *)g; + uintptr_t target_hi = (target >> 32), target_lo = target & 0xffffffffULL; + uintptr_t ug_hi = (ug >> 32), ug_lo = ug & 0xffffffffULL; + MSize slot; + *p++ = RISCVI_LUI | RISCVF_D(RID_X6) | RISCVF_IMMU(RISCVF_HI(target_hi)); + *p++ = RISCVI_LUI | RISCVF_D(RID_X7) | RISCVF_IMMU(RISCVF_HI(ug_hi)); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(RISCVF_LO(target_hi)); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(RISCVF_LO(ug_hi)); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo >> 21); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo >> 21); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(11); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(11); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI((target_lo >> 10) & 0x7ff); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI((ug_lo >> 10) & 0x7ff); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_SHAMT(10); + *p++ = RISCVI_SLLI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_SHAMT(10); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X6) | RISCVF_S1(RID_X6) | RISCVF_IMMI(target_lo & 0x3ff); + *p++ = RISCVI_ADDI | RISCVF_D(RID_X7) | RISCVF_S1(RID_X7) | RISCVF_IMMI(ug_lo & 0x3ff); + *p++ = RISCVI_JALR | RISCVF_D(RID_X0) | RISCVF_S1(RID_X6) | RISCVF_IMMJ(0); + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p++ = RISCVI_LUI | RISCVF_D(RID_X5) | RISCVF_IMMU(slot); + *p = RISCVI_JAL | RISCVF_IMMJ(((char *)page-(char *)p)); + p++; + } + return p; +} #else /* Missing support for this architecture. */ #define callback_mcode_init(g, p) (p) @@ -580,6 +617,31 @@ void lj_ccallback_mcode_free(CTState *cts) if (ngpr < maxgpr) { sp = &cts->cb.gpr[ngpr++]; goto done; } \ } +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr + n <= CCALL_NARG_FPR) { \ + sp = &cts->cb.fpr[nfpr]; \ + nfpr += n; \ + goto done; \ + } else if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } else { \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } \ + } + +#define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; + #else #error "Missing calling convention definitions for this architecture" #endif @@ -735,7 +797,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int64_t *)dp = (int64_t)*(int32_t *)dp; } #endif -#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_RISCV64 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index 87327c798..4c8189b54 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -811,14 +811,29 @@ static void build_subroutines(BuildCtx *ctx) | mv BASE, TMP2 // Restore caller BASE. | ld LFUNC:TMP1, FRAME_FUNC(TMP2) | ld PC, -24(RB) // Restore PC from [cont|PC]. + |.if FFI + | sltiu TMP3, TMP0, 2 + |.endif | cleartp LFUNC:TMP1 | add TMP2, RA, RD | ld TMP1, LFUNC:TMP1->pc | sd TISNIL, -8(TMP2) // Ensure one valid arg. + |.if FFI + | bnez TMP3, >1 + |.endif | // BASE = base, RA = resultptr, RB = meta base | ld KBASE, PC2PROTO(k)(TMP1) | jr TMP0 // Jump to continuation. | + |.if FFI + |1: + | addi TMP1, RB, -32 + | bxnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + | sub RC, TMP1, BASE + | j ->vm_call_tail + |.endif + | |->cont_cat: // RA = resultptr, RB = meta base | lw INS, -4(PC) | addi CARG2, RB, -32 @@ -1018,6 +1033,18 @@ static void build_subroutines(BuildCtx *ctx) | // Returns 0/1 or TValue * (metamethod). | j <3 | + |->vmeta_equal_cd: + |.if FFI + | addi PC, PC, -4 + | mv CARG1, L + | mv CARG2, INS + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | j <3 + |.endif + | |->vmeta_istype: | addi PC, PC, -4 | sd BASE, L->base @@ -2161,7 +2188,6 @@ static void build_subroutines(BuildCtx *ctx) | | |->vm_callhook: // Dispatch target for call hooks. - | mv CARG2, PC | |->cont_stitch: // Trace stitching. | @@ -2218,6 +2244,133 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in x5, g in x7. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs + | ld CTSTATE, GL:x7->ctype_state + | mv GL, x7 + | addxi DISPATCH, x7, GG_G2DISP + | srli x5, x5, 12 + | sw x5, CTSTATE->cb.slot + | sd CARG1, CTSTATE->cb.gpr[0] + | fsd FARG1, CTSTATE->cb.fpr[0] + | sd CARG2, CTSTATE->cb.gpr[1] + | fsd FARG2, CTSTATE->cb.fpr[1] + | sd CARG3, CTSTATE->cb.gpr[2] + | fsd FARG3, CTSTATE->cb.fpr[2] + | sd CARG4, CTSTATE->cb.gpr[3] + | fsd FARG4, CTSTATE->cb.fpr[3] + | sd CARG5, CTSTATE->cb.gpr[4] + | fsd FARG5, CTSTATE->cb.fpr[4] + | sd CARG6, CTSTATE->cb.gpr[5] + | fsd FARG6, CTSTATE->cb.fpr[5] + | sd CARG7, CTSTATE->cb.gpr[6] + | fsd FARG7, CTSTATE->cb.fpr[6] + | sd CARG8, CTSTATE->cb.gpr[7] + | fsd FARG8, CTSTATE->cb.fpr[7] + | addi TMP0, sp, CFRAME_SPACE + | sd TMP0, CTSTATE->cb.stack + | sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok. + | mv CARG1, CTSTATE + | mv CARG2, sp + | call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ld BASE, L:CRET1->base + | ld RC, L:CRET1->top + | mv L, CRET1 + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | ld LFUNC:RB, FRAME_FUNC(BASE) + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | slli TMP3, TMP3, 32 + | li_vmstate INTERP + | subw RC, RC, BASE + | cleartp LFUNC:RB + | st_vmstate + | fmv.d.x TOBIT, TMP3 + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | ld CTSTATE, GL->ctype_state + | sd BASE, L->base + | sd RB, L->top + | sd L, CTSTATE->L + | mv CARG1, CTSTATE + | mv CARG2, RA + | // (CTState *cts, TValue *o) + | call_intern cont_ffi_callback, lj_ccallback_leave + | fld FRET1, CTSTATE->cb.fpr[0] + | ld CRET1, CTSTATE->cb.gpr[0] + | fld FRET2, CTSTATE->cb.fpr[1] + | ld CRET2, CTSTATE->cb.gpr[1] + | j ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, CARG1 + | lw TMP1, CCSTATE->spadj + | lbu CARG2, CCSTATE->nsp + | lbu CARG3, CCSTATE->nfpr + | mv TMP2, sp + | sub sp, sp, TMP1 + | sd ra, -8(TMP2) + | sd x18, -16(TMP2) + | sd CCSTATE, -24(TMP2) + | mv x18, TMP2 + | addi TMP1, CCSTATE, offsetof(CCallState, stack) + | mv TMP2, sp + | add TMP3, TMP1, CARG2 + | beqz CARG2, >2 + |1: + | ld TMP0, 0(TMP1) + | addi TMP1, TMP1, 8 + | sd TMP0, 0(TMP2) + | addi TMP2, TMP2, 8 + | bltu TMP1, TMP3, <1 + |2: + | beqz CARG3, >3 + | fld FARG1, CCSTATE->fpr[0] + | fld FARG2, CCSTATE->fpr[1] + | fld FARG3, CCSTATE->fpr[2] + | fld FARG4, CCSTATE->fpr[3] + | fld FARG5, CCSTATE->fpr[4] + | fld FARG6, CCSTATE->fpr[5] + | fld FARG7, CCSTATE->fpr[6] + | fld FARG8, CCSTATE->fpr[7] + |3: + | ld CFUNCADDR, CCSTATE->func + | ld CARG2, CCSTATE->gpr[1] + | ld CARG3, CCSTATE->gpr[2] + | ld CARG4, CCSTATE->gpr[3] + | ld CARG5, CCSTATE->gpr[4] + | ld CARG6, CCSTATE->gpr[5] + | ld CARG7, CCSTATE->gpr[6] + | ld CARG8, CCSTATE->gpr[7] + | ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | jalr CFUNCADDR + | ld CCSTATE:TMP1, -24(x18) + | ld TMP0, -16(x18) + | ld ra, -8(x18) + | sd CRET1, CCSTATE:TMP1->gpr[0] + | sd CRET2, CCSTATE:TMP1->gpr[1] + | fsd FRET1, CCSTATE:TMP1->fpr[0] + | fsd FRET2, CCSTATE:TMP1->fpr[1] + | mv sp, x18 + | mv x18, TMP0 + | ret + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- } /* Generate the code for a single instruction. */ @@ -2342,6 +2495,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beqz TMP0, ->BC_ISNEN_Z } |// Either or both types are not numbers. + |.if FFI + | // Check if RA or RD is a cdata. + | xori TMP0, CARG3, LJ_TCDATA + | xori TMP1, CARG4, LJ_TCDATA + | and TMP0, TMP0, TMP1 + | bxeqz TMP0, ->vmeta_equal_cd + |.endif | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 | decode_BC4b TMP2 | addw TMP2, TMP2, TMP3 // (jump-0x8000)<<2 @@ -2394,10 +2554,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sub RD, KBASE, RD | lhu TMP2, -4+OFS_RD(PC) | ld CARG2, -8(RD) // KBASE-8-str_const*8 + |.if FFI + | gettp CARG3, CARG1 + | li TMP1, LJ_TCDATA + |.endif | li TMP0, LJ_TSTR | decode_BC4b TMP2 | settp CARG2, TMP0 | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 + |.if FFI + | bxeq CARG3, TMP1, ->vmeta_equal_cd + |.endif | xor TMP0, CARG1, CARG2 // TMP2=0: A==D; TMP2!=0: A!=D | addw TMP2, TMP2, TMP3 if (vk) { @@ -2452,7 +2619,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |4: // RA is not an integer. | addw TMP2, TMP2, TMP3 + |.if FFI + | bgeu CARG3, TISNUM, >7 + |.else | bgeu CARG3, TISNUM, <2 + |.endif | fmv.d.x FTMP0, CARG1 | fmv.d.x FTMP2, CARG2 | bne CARG4, TISNUM, >5 @@ -2465,11 +2636,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | j <1 | |6: // RA is an integer, RD is a number. + |.if FFI + | bgeu CARG4, TISNUM, >8 + |.else | bgeu CARG4, TISNUM, <2 + |.endif | fcvt.d.w FTMP0, CARG1 | fmv.d.x FTMP2, CARG2 | j <5 | + |.if FFI + |7: // RA not int, not number + | li TMP0, LJ_TCDATA + | bne CARG3, TMP0, <2 + | j ->vmeta_equal_cd + | + |8: // RD not int, not number + | li TMP0, LJ_TCDATA + | bne CARG4, TMP0, <2 + | j ->vmeta_equal_cd + |.endif break; case BC_ISEQP: case BC_ISNEP: @@ -2483,6 +2669,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | gettp TMP1, TMP1 | addi PC, PC, 4 | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D + |.if FFI + | li TMP3, LJ_TCDATA + | bxeq TMP1, TMP3, ->vmeta_equal_cd + |.endif | decode_BC4b TMP2 | lui TMP3, (-(BCBIAS_J*4 >> 12)) & 0xfffff // -BCBIAS_J*4 | addw TMP2, TMP2, TMP3 // TMP2=(jump-0x8000)<<2 @@ -2823,6 +3013,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next break; case BC_KCDATA: + |.if FFI + | // RA = dst*8, RD = cdata_const*8 (~) + | sub TMP1, KBASE, RD + | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 + | li TMP2, LJ_TCDATA + | add RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next + |.endif break; case BC_KSHORT: | // RA = dst*8, RD = int16_literal*8 From e36801cf0aae428801a4b6a2f69da8e6e5269e9a Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:27:26 +0800 Subject: [PATCH 09/22] riscv(support): add extension detection --- src/lib_jit.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lj_jit.h | 11 +++++ 2 files changed, 135 insertions(+) diff --git a/src/lib_jit.c b/src/lib_jit.c index d1f0213ae..ce972cd01 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -697,6 +697,111 @@ JIT_PARAMDEF(JIT_PARAMINIT) #include #endif +#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX +#include +#include +static sigjmp_buf sigbuf = {0}; +static void detect_sigill(int sig) +{ + siglongjmp(sigbuf, 1); +} + +static int riscv_compressed() +{ +#if defined(__riscv_c) || defined(__riscv_compressed) + /* Don't bother checking for RVC -- would crash before getting here. */ + return 1; +#elif defined(__GNUC__) + /* c.nop; c.nop; */ + __asm__(".4byte 0x00010001"); + return 1; +#else + return 0; +#endif +} + +static int riscv_zba() +{ +#if defined(__riscv_b) || defined(__riscv_zba) + /* Don't bother checking for Zba -- would crash before getting here. */ + return 1; +#elif defined(__GNUC__) + /* Don't bother verifying the result, just check if the instruction exists. */ + /* add.uw zero, zero, zero */ + __asm__(".4byte 0x0800003b"); + return 1; +#else + return 0; +#endif +} + +static int riscv_zbb() +{ +#if defined(__riscv_b) || defined(__riscv_zbb) + /* Don't bother checking for Zbb -- would crash before getting here. */ + return 1; +#elif defined(__GNUC__) + register int t asm ("a0"); + /* addi a0, zero, 255; sext.b a0, a0; */ + __asm__("addi a0, zero, 255\n\t.4byte 0x60451513"); + return t < 0; +#else + return 0; +#endif +} + +static int riscv_zicond() +{ +#if defined(__riscv_zicond) + /* Don't bother checking for Zicond -- would crash before getting here. */ + return 1; +#elif defined(__GNUC__) + /* czero.eqz zero, zero, zero; */ + __asm__(".4byte 0x0e005033"); + return 1; +#else + return 0; +#endif +} + +static int riscv_zfa() +{ +#if defined(__riscv_zfa) + /* Don't bother checking for Zfa -- would crash before getting here. */ + return 1; +#else + return 0; +#endif +} + +static int riscv_xthead() +{ +#if (defined(__riscv_xtheadba) \ + && defined(__riscv_xtheadbb) \ + && defined(__riscv_xtheadcondmov) \ + && defined(__riscv_xtheadmac)) + /* Don't bother checking for XThead -- would crash before getting here. */ + return 1; +#elif defined(__GNUC__) + register int t asm ("a0"); + /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */ + /* Therefore assume XThead* are present if XTheadBb is present. */ + /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */ + __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b"); + return t == -1; /* In case of collision with other vendor extensions. */ +#else + return 0; +#endif +} + +static uint32_t riscv_probe(int (*func)(void), uint32_t flag) +{ + if (sigsetjmp(sigbuf, 1) == 0) { + return func() ? flag : 0; + } else return 0; +} +#endif + /* Arch-dependent CPU feature detection. */ static uint32_t jit_cpudetect(void) { @@ -769,6 +874,25 @@ static uint32_t jit_cpudetect(void) #endif #elif LJ_TARGET_S390X /* No optional CPU features to detect (for now). */ + +#elif LJ_TARGET_RISCV64 +#if LJ_HASJIT + /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */ + struct sigaction old = {0}, act = {0}; + act.sa_handler = detect_sigill; + sigaction(SIGILL, &act, &old); + flags |= riscv_probe(riscv_compressed, JIT_F_RVC); + flags |= riscv_probe(riscv_zba, JIT_F_RVZba); + flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); + flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond); + flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa); + flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); + sigaction(SIGILL, &old, NULL); + + /* Detect V/P? */ + /* V have no hardware available, P not ratified yet. */ +#endif + #else #error "Missing CPU detection for this architecture" #endif diff --git a/src/lj_jit.h b/src/lj_jit.h index a11ef7292..a0296f2e7 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -68,6 +68,17 @@ #endif #endif +#elif LJ_TARGET_RISCV64 + +#define JIT_F_RVC (JIT_F_CPU << 0) +#define JIT_F_RVZba (JIT_F_CPU << 1) +#define JIT_F_RVZbb (JIT_F_CPU << 2) +#define JIT_F_RVZicond (JIT_F_CPU << 3) +#define JIT_F_RVZfa (JIT_F_CPU << 4) +#define JIT_F_RVXThead (JIT_F_CPU << 5) + +#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead" + #else #define JIT_F_CPUSTRING "" From 686ebf56843fb97b4f71a6943e290d9963a4bbdd Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:29:41 +0800 Subject: [PATCH 10/22] riscv(jit): add insn emitter --- src/lj_emit_riscv.h | 574 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 574 insertions(+) create mode 100644 src/lj_emit_riscv.h diff --git a/src/lj_emit_riscv.h b/src/lj_emit_riscv.h new file mode 100644 index 000000000..d4160663e --- /dev/null +++ b/src/lj_emit_riscv.h @@ -0,0 +1,574 @@ +/* +** RISC-V instruction emitter. +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by gns from PLCT Lab, ISCAS. +*/ + +static intptr_t get_k64val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_KINT64) { + return (intptr_t)ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); + } else { + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); + return ir->i; /* Sign-extended. */ + } +} + +#define get_kval(as, ref) get_k64val(as, ref) + +/* -- Emit basic instructions --------------------------------------------- */ + +static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2); +} + +#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0) +#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2) +#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2) + +static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3); +} + +#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3) + +static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff); +} + +#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i) +#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i) +#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f) + +static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff); +} + +#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i) + +/* +static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe); +} +*/ + +static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff); +} + +#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i) + +/* +static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe); +} +*/ + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); +static void ra_allockreg(ASMState *as, intptr_t k, Reg r); +static Reg ra_scratch(ASMState *as, RegSet allow); + +static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs) +{ + lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs); + switch (riscvi) { + case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB: + case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU: + case RISCVI_FLW: case RISCVI_FLD: + emit_dsi(as, riscvi, data, base, ofs); + break; + case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB: + case RISCVI_FSW: case RISCVI_FSD: + emit_s1s2i(as, riscvi, base, data, ofs); + break; + default: lj_assertA(0, "invalid lso"); break; + } +} + +static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp, + int32_t shamt) +{ + if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) { + if (!(as->flags & JIT_F_RVZbb)) switch (riscvi) { + case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break; + case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break; + default: lj_assertA(0, "invalid roti op"); break; + } + emit_dsshamt(as, riscvi, rd, rs1, shamt); + } else { + RISCVIns ai, bi; + int32_t shwid, shmsk; + switch (riscvi) { + case RISCVI_RORI: + ai = RISCVI_SRLI, bi = RISCVI_SLLI; + shwid = 64, shmsk = 63; + break; + case RISCVI_RORIW: + ai = RISCVI_SRLIW, bi = RISCVI_SLLIW; + shwid = 32, shmsk = 31; + break; + default: + lj_assertA(0, "invalid roti op"); + return; + } + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk); + emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk); + } +} + +static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, riscvi, rd, rs1, rs2); + } else { + RISCVIns sai, sbi; + switch (riscvi) { + case RISCVI_ROL: + sai = RISCVI_SLL, sbi = RISCVI_SRL; + break; + case RISCVI_ROR: + sai = RISCVI_SRL, sbi = RISCVI_SLL; + break; + case RISCVI_ROLW: + sai = RISCVI_SLLW, sbi = RISCVI_SRLW; + break; + case RISCVI_RORW: + sai = RISCVI_SRLW, sbi = RISCVI_SLLW; + break; + default: + lj_assertA(0, "invalid rot op"); + return; + } + if (rd == rs2) { + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_ds1s2(as, sbi, tmp, rs1, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); + emit_ds2(as, RISCVI_NEG, tmp, rs2); + } else { + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); + emit_ds1s2(as, sbi, tmp, rs1, tmp); + emit_ds2(as, RISCVI_NEG, tmp, rs2); + } + } +} + +static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1) +{ + if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) || + (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) { + emit_ds(as, riscvi, rd, rs1); + } else if (as->flags & JIT_F_RVXThead) { + uint32_t hi, sext; + switch (riscvi) { + case RISCVI_ZEXT_B: + case RISCVI_SEXT_W: + emit_ds(as, riscvi, rd, rs1); + return; + case RISCVI_ZEXT_H: + hi = 15, sext = 0; + break; + case RISCVI_ZEXT_W: + hi = 31, sext = 0; + break; + case RISCVI_SEXT_B: + hi = 7, sext = 1; + break; + case RISCVI_SEXT_H: + hi = 15, sext = 1; + break; + default: + lj_assertA(0, "invalid ext op"); + return; + } + emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU, + rd, rs1, hi << 6); + } else { + RISCVIns sli, sri; + int32_t shamt; + switch (riscvi) { + case RISCVI_ZEXT_B: + case RISCVI_SEXT_W: + emit_ds(as, riscvi, rd, rs1); + return; + case RISCVI_ZEXT_H: + sli = RISCVI_SLLI, sri = RISCVI_SRLI; + shamt = 48; + break; + case RISCVI_ZEXT_W: + sli = RISCVI_SLLI, sri = RISCVI_SRLI; + shamt = 32; + break; + case RISCVI_SEXT_B: + sli = RISCVI_SLLI, sri = RISCVI_SRAI; + shamt = 56; + break; + case RISCVI_SEXT_H: + sli = RISCVI_SLLI, sri = RISCVI_SRAI; + shamt = 48; + break; + default: + lj_assertA(0, "invalid ext op"); + return; + } + emit_dsshamt(as, sri, rd, rd, shamt); + emit_dsshamt(as, sli, rd, rs1, shamt); + } +} + +static void emit_cleartp(ASMState *as, Reg rd, Reg rs1) +{ + if (as->flags & JIT_F_RVXThead) { + emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6); + } else { + emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17); + emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17); + } +} + +/* +static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp); + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} +*/ + +/* +static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp); + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} +*/ + +static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2); + } else { + emit_ds(as, RISCVI_NOT, rd, rd); + emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2); + } +} + +static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt) +{ + if (as->flags & JIT_F_RVZba) { + switch (shamt) { + case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break; + case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break; + case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break; + default: return; + } + } else if (as->flags & JIT_F_RVXThead) { + emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp); + emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt); + } +} + +#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1) +#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2) +#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3) + +static void emit_loadk12(ASMState *as, Reg rd, int32_t i) +{ + emit_di(as, RISCVI_ADDI, rd, i); +} + +static void emit_loadk32(ASMState *as, Reg rd, int32_t i) +{ + if (checki12((int64_t)i)) { + emit_loadk12(as, rd, i); + } else { + if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0)) + emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i)); + else + emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i)); + emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i)); + } +} + +/* -- Emit loads/stores --------------------------------------------------- */ + +/* Prefer rematerialization of BASE/L from global_State over spills. */ +#define emit_canremat(ref) ((ref) <= REF_BASE) + + +/* Load a 32 bit constant into a GPR. */ +#define emit_loadi(as, r, i) emit_loadk32(as, r, i); + +/* Load a 64 bit constant into a GPR. */ +static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) +{ + int64_t u64_delta = (int64_t)((intptr_t)u64 - (intptr_t)(as->mcp - 2)); + if (checki32((int64_t)u64)) { + emit_loadk32(as, r, (int32_t)u64); + } else if (checki32auipc(u64_delta)) { + emit_dsi(as, RISCVI_ADDI, r, r, RISCVF_LO(u64_delta)); + emit_du(as, RISCVI_AUIPC, r, RISCVF_HI(u64_delta)); + } else { + uint32_t lo32 = u64 & 0xfffffffful; + if (checku11(lo32)) { + if (lo32 > 0) emit_dsi(as, RISCVI_ADDI, r, r, lo32); + emit_dsshamt(as, RISCVI_SLLI, r, r, 32); + } else { + RISCVIns li_insn[7] = {0}; + int shamt = 0, step = 0; + for(int bit = 0; bit < 32; bit++) { + if (lo32 & (1u << bit)) { + if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); + int inc = bit+10 > 31 ? 31-bit : 10; + bit += inc, shamt = inc+1; + uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1); + uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0); + li_insn[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload); + } else shamt++; + } + if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); + + if (step < 6) { + for(int i = 0; i < step; i++) + *--as->mcp = li_insn[i]; + } else { + emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 10); + emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 11); + emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 11); + } + } + + uint32_t hi32 = u64 >> 32; + if (hi32 & 0xfff) emit_loadk32(as, r, hi32); + else emit_du(as, RISCVI_LUI, r, hi32 >> 12); + } +} + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) + +/* Get/set from constant pointer. */ +static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow) +{ + emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0); +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + Reg r64 = r; + if (rset_test(RSET_FPR, r)) { + if (as->flags & JIT_F_RVZfa) { + uint8_t sign = (*k >> 63) & 1; + uint16_t k_hi16 = (*k >> 48) & 0xffff; + uint64_t k_lo48 = *k & 0xffffffffffff; + uint16_t mk_hi16 = k_hi16 & 0x7fff; + if (!k_lo48) { + if (riscv_fli_map_hi16[0] == k_hi16) { + emit_ds(as, RISCVI_FLI_D, r, 0); + return; + } + for (int i = 1; i < 32; i++) { + if (riscv_fli_map_hi16[i] == mk_hi16) { + if (sign) + emit_ds1s2(as, RISCVI_FNEG_D, r, r, r); + emit_ds(as, RISCVI_FLI_D, r, i); + return; + } + } + } + } + r64 = RID_TMP; + emit_ds(as, RISCVI_FMV_D_X, r, r64); + } + emit_loadu64(as, r64, *k); +} + +/* Get/set global_State fields. */ +static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs) +{ + emit_lso(as, riscvi, r, RID_GL, ofs); +} + +#define emit_getgl(as, r, field) \ + emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field)) +#define emit_setgl(as, r, field) \ + emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field)) + +/* Trace number is determined from per-trace exit stubs. */ +#define emit_setvmstate(as, i) UNUSED(i) + +/* -- Emit control-flow instructions -------------------------------------- */ + +/* Label for internal jumps. */ +typedef MCode *MCLabel; + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (char *)target - (char *)(p - 1); + switch (jump) { + case -1: + lj_assertA(((delta + 0x10000) >> 13) == 0, "branch target out of range"); /* B */ + *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); + break; + case 0: case 1: + lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); /* ^B+J */ + if (checki13(delta) && !jump) { + *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); + *--p = RISCVI_NOP; + } else { + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */ + *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); + } + break; + default: + lj_assertA(0, "invalid jump type"); + break; + } + as->mcp = p; +} + +static void emit_jump(ASMState *as, MCode *target, int jump) +{ + MCode *p = as->mcp; + ptrdiff_t delta; + switch(jump) { + case -1: + delta = (char *)target - (char *)(p - 1); + lj_assertA(((delta + 0x100000) >> 21) == 0, "jump target out of range"); /* J */ + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); + break; + case 0: case 1: + delta = (char *)target - (char *)(p - 2); + lj_assertA(checki32auipc(delta), "jump target out of range"); /* AUIPC+JALR */ + if (checki21(delta) && !jump) { + *--p = RISCVI_NOP; + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); + } else { + *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + } + break; + default: + lj_assertA(0, "invalid jump type"); + break; + } + as->mcp = p; +} + +#define emit_jmp(as, target) emit_jump(as, target, 0) + +#define emit_mv(as, dst, src) \ + emit_ds(as, RISCVI_MV, (dst), (src)) + +static void emit_call(ASMState *as, void *target, int needcfa) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (char *)target - (char *)(p - 2); + if (checki21(delta)) { + *--p = RISCVI_NOP; + *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta); + } else if (checki32(delta)) { + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + needcfa = 1; + } else { + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0); + needcfa = 2; + } + as->mcp = p; + if (needcfa > 1) + ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); +} + +/* -- Emit generic operations --------------------------------------------- */ + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) +{ + if (src < RID_MAX_GPR && dst < RID_MAX_GPR) + emit_mv(as, dst, src); + else if (src < RID_MAX_GPR) + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src); + else if (dst < RID_MAX_GPR) + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src); + else + emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src); +} + +/* Emit an arithmetic operation with a constant operand. */ +static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src, + Reg tmp, intptr_t k) +{ + if (checki12(k)) emit_dsi(as, riscvi, dest, src, k); + else { + switch (riscvi) { + case RISCVI_ADDI: riscvi = RISCVI_ADD; break; + case RISCVI_XORI: riscvi = RISCVI_XOR; break; + case RISCVI_ORI: riscvi = RISCVI_OR; break; + case RISCVI_ANDI: riscvi = RISCVI_AND; break; + default: lj_assertA(0, "NYI arithmetic RISCVIns"); return; + } + emit_ds1s2(as, riscvi, dest, src, tmp); + emit_loadu64(as, tmp, (uintptr_t)k); + } +} + +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r < RID_MAX_GPR) + emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs); + else + emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs); +} + +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r < RID_MAX_GPR) + emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs); + else + emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs); +} + +/* Add offset to pointer. */ +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) + emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs); +} + + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) From 6cd6f3ffcfbd464ce978e4e1c8e402967a8a95fd Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:32:53 +0800 Subject: [PATCH 11/22] riscv(jit): add IR assembler --- src/lj_asm.c | 4 + src/lj_asm_riscv64.h | 2037 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2041 insertions(+) create mode 100644 src/lj_asm_riscv64.h diff --git a/src/lj_asm.c b/src/lj_asm.c index 33287af1e..06c813c50 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -227,6 +227,8 @@ static Reg rset_pickrandom(ASMState *as, RegSet rs) #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS #include "lj_emit_mips.h" +#elif LJ_TARGET_RISCV64 +#include "lj_emit_riscv.h" #else #error "Missing instruction emitter for target CPU" #endif @@ -1710,6 +1712,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_mips.h" #elif LJ_TARGET_S390X #include "lj_asm_s390x.h" +#elif LJ_TARGET_RISCV64 +#include "lj_asm_riscv64.h" #else #error "Missing assembler for target CPU" #endif diff --git a/src/lj_asm_riscv64.h b/src/lj_asm_riscv64.h new file mode 100644 index 000000000..2ee63fa10 --- /dev/null +++ b/src/lj_asm_riscv64.h @@ -0,0 +1,2037 @@ +/* +** RISC-V IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by gns from PLCT Lab, ISCAS. +*/ + +/* -- Register allocator extensions --------------------------------------- */ + +/* Allocate a register with a hint. */ +static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; +} + +/* Allocate a register or RID_ZERO. */ +static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) + return RID_ZERO; + r = ra_allocref(as, ref, allow); + } else { + ra_noweak(as, r); + } + return r; +} + +/* Allocate two source registers for three-operand instructions. */ +static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + Reg left = irl->r, right = irr->r; + if (ra_hasreg(left)) { + ra_noweak(as, left); + if (ra_noreg(right)) + right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); + else + ra_noweak(as, right); + } else if (ra_hasreg(right)) { + ra_noweak(as, right); + left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); + } else if (ra_hashint(right)) { + right = ra_alloc1z(as, ir->op2, allow); + left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); + } else { + left = ra_alloc1z(as, ir->op1, allow); + right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); + } + return left | (right << 8); +} + +/* -- Guard handling ------------------------------------------------------ */ + +/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */ +#define RISCV_SPAREJUMP 4 + +/* Setup spare long-range jump (trampoline?) slots per mcarea. */ + +static void asm_sparejump_setup(ASMState *as) +{ + MCode *mxp = as->mctop; + if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { + for (int i = RISCV_SPAREJUMP*2; i--; ) + *--mxp = RISCVI_EBREAK; + as->mctop = mxp; + } +} + +static MCode *asm_sparejump_use(MCode *mcarea, MCode *target) +{ + MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); + int slot = RISCV_SPAREJUMP; + RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr; + while (slot--) { + mxp -= 2; + ptrdiff_t delta = (char *)target - (char *)mxp; + tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)), + tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + if (mxp[0] == tauipc && mxp[1] == tjalr) { + return mxp; + } else if (mxp[0] == tslot) { + mxp[0] = tauipc, mxp[1] = tjalr; + return mxp; + } + } + return NULL; +} + +/* Setup exit stub after the end of each trace. */ +static void asm_exitstub_setup(ASMState *as, ExitNo nexits) +{ + ExitNo i; + MCode *mxp = as->mctop; + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) + asm_mclimit(as); + for (i = nexits-1; (int32_t)i >= 0; i--) + *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i))); + ptrdiff_t delta = (char *)lj_vm_exit_handler - (char *)(mxp-3); + /* 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... */ + *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno); + *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) + | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta)); + *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP) + | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta)); + *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP); + as->mctop = mxp; +} + +static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) +{ + /* Keep this in-sync with exitstub_trace_addr(). */ + return as->mctop + exitno + 4; +} + +/* Emit conditional branch to exit for guard. */ +static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + as->mcp = ++p; + *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p); + riscvi = riscvi^RISCVF_FUNCT3(1); /* Invert cond. */ + target = p - 1; /* Patch target later in asm_loop_fixup. */ + } + ptrdiff_t delta = (char *)target - (char *)(p - 1); + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); + *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); + as->mcp = p; +} + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse the array base of colocated arrays. */ +static int32_t asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) + return (int32_t)sizeof(GCtab); + return 0; +} + +/* Fuse array/hash/upvalue reference into register+offset operand. */ +static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (checki12(ofs)) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } + } + } else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (checki12(ofs)) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; + intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g)); + if (checki12(ofs)) { + *ofsp = (int32_t)ofs; + return RID_GL; + } + } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)offsetof(global_State, tmptv); + return RID_GL; + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref, + RegSet allow, int32_t ofs) +{ + IRIns *ir = IR(ref); + Reg base; + if (ra_noreg(ir->r) && canfuse(as, ir)) { + intptr_t ofs2; + if (ir->o == IR_ADD) { + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), + checki12(ofs2))) { + ref = ir->op1; + ofs = (int32_t)ofs2; + } + } else if (ir->o == IR_STRREF) { + ofs2 = 4096; + lj_assertA(ofs == 0, "bad usage"); + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { + ofs2 = ofs + get_kval(as, ir->op2); + ref = ir->op1; + } else if (irref_isk(ir->op1)) { + ofs2 = ofs + get_kval(as, ir->op1); + ref = ir->op2; + } + if (!checki12(ofs2)) { + /* NYI: Fuse ADD with constant. */ + Reg right, left = ra_alloc2(as, ir, allow); + right = (left >> 8); left &= 255; + emit_lso(as, riscvi, rd, RID_TMP, ofs); + emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right); + return; + } + ofs = ofs2; + } + } + base = ra_alloc1(as, ref, allow); + emit_lso(as, riscvi, rd, base, ofs); +} + +/* Fuse Integer multiply-accumulate. */ + +static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_GPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_GPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_ds1s2(as, riscvi, dest, left, right); + if (dest != add) emit_mv(as, dest, add); + return 1; + } + return 0; +} + +/* Fuse FP multiply-add/sub. */ + +static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if ((as->flags & JIT_F_OPT_FMA) && + lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_ds1s2s3(as, riscvi, dest, left, right, add); + return 1; + } + return 0; +} +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 0; + Reg gpr, fpr = REGARG_FIRSTFPR; + if ((void *)ci->func) + emit_call(as, (void *)ci->func, 1); + for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) + as->cost[gpr] = REGCOST(~0u, ASMREF_L); + gpr = REGARG_FIRSTGPR; + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + IRIns *ir = IR(ref); + if (ref) { + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Must have been evicted. */ + ra_leftov(as, fpr, ref); + fpr++; if(ci->flags & CCI_VARARG) gpr++; + } else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_FPR); + emit_spstore(as, ir, r, ofs); + ofs += 8; + } + } else { + if (gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; if(ci->flags & CCI_VARARG) fpr++; + } else { + Reg r = ra_alloc1z(as, ref, RSET_GPR); + emit_spstore(as, ir, r, ofs); + ofs += 8; + } + } + } + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + RegSet drop = RSET_SCRATCH; + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + if (hiop && ra_hasreg((ir+1)->r)) + rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (irt_isfp(ir->t)) { + if ((ci->flags & CCI_CASTU64)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, + dest, RID_RET); + } else { + ra_destreg(as, ir, RID_FPRET); + } + } else if (hiop) { + ra_destpair(as, ir); + } else { + ra_destreg(as, ir, RID_RET); + } + } +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX*2]; + CCallInfo ci; + IRRef func; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { /* Call to constant address. */ + ci.func = (ASMFunction)(void *)get_kval(as, func); + } else { /* Need specific register for indirect calls. */ + Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); + MCode *p = as->mcp; + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r); + if (r == RID_CFUNCADDR) + *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); + else + *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); + as->mcp = p; + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + +/* -- Returns ------------------------------------------------------------- */ + +/* Return to lower frame. Guard that it goes to the right spot. */ +static void asm_retf(ASMState *as, IRIns *ir) +{ + Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); + void *pc = ir_kptr(IR(ir->op2)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); + as->topslot -= (BCReg)delta; + if ((int32_t)as->topslot < 0) as->topslot = 0; + irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ + emit_setgl(as, base, jit_base); + emit_addptr(as, base, -8*delta); + asm_guard(as, RISCVI_BNE, RID_TMP, + ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); + emit_lso(as, RISCVI_LD, RID_TMP, base, -8); +} + +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp); + emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG); + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); + asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO); + emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left); + emit_ds(as, RISCVI_FCVT_D_W, tmp, dest); + emit_ds(as, RISCVI_FCVT_W_D, dest, left); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_FPR; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, allow); + Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); + Reg tmp = ra_scratch(as, rset_clear(allow, right)); + emit_ds(as, RISCVI_FMV_X_W, dest, tmp); + emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right); +} + +static void asm_conv(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + IRRef lref = ir->op1; + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + /* Use GPR to pass floating-point arguments */ + if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg ftmp = ra_scratch(as, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp); + emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, + ftmp, ra_alloc1(as, lref, RSET_FPR)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : + (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : + (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); + emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp); + emit_ds(as, riscvi, ftmp, left); + } + } else if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, + dest, ra_alloc1(as, lref, RSET_FPR)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : + (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : + (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); + emit_ds(as, riscvi, dest, left); + } + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_GPR); + RISCVIns riscvi = irt_is64(ir->t) ? + (st == IRT_NUM ? + (irt_isi64(ir->t) ? RISCVI_FCVT_L_D : RISCVI_FCVT_LU_D) : + (irt_isi64(ir->t) ? RISCVI_FCVT_L_S : RISCVI_FCVT_LU_S)) : + (st == IRT_NUM ? + (irt_isint(ir->t) ? RISCVI_FCVT_W_D : RISCVI_FCVT_WU_D) : + (irt_isint(ir->t) ? RISCVI_FCVT_W_S : RISCVI_FCVT_WU_S)); + emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left); + } + } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B : + st == IRT_U8 ? RISCVI_ZEXT_B : + st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H; + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + emit_ext(as, riscvi, dest, left); + } else { /* 32/64 bit integer conversions. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irt_is64(ir->t)) { + if (st64) { + /* 64/64 bit no-op (cast)*/ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } else { /* 32 to 64 bit sign extension. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ + emit_ext(as, RISCVI_SEXT_W, dest, left); + } else { /* 32 to 64 bit zero extension. */ + emit_ext(as, RISCVI_ZEXT_W, dest, left); + } + } + } else { + if (st64 && !(ir->op2 & IRCONV_NONE)) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_ext(as, RISCVI_ZEXT_W, dest, left); + } else { /* 32/32 bit no-op (cast). */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } + } + } +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + int32_t ofs = SPOFS_TMP; + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ + ra_evictset(as, drop); + if (ir->s) ofs = sps_scale(ir->s); + asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + /* Store the result to the spill slot or temp slots. */ + Reg tmp = ra_releasetmp(as, ASMREF_TMP1); + emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs); +} + +/* -- Memory references --------------------------------------------------- */ + +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + rset_clear(allow, src); + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + emit_lso(as, RISCVI_SD, RID_TMP, base, ofs); + if (irt_isinteger(ir->t)) { + if (as->flags & JIT_F_RVZba) { + emit_ds1s2(as, RISCVI_ADD_UW, RID_TMP, src, type); + } else { + emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type); + emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src); + } + } else { + emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type); + } + } +} + +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) // todo-new +{ + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, igcptr(ir_knum(ir)), dest); + return; + } + emit_lso(as, RISCVI_FSD, ra_alloc1(as, ref, RSET_FPR), dest, 0); + } else { + asm_tvstore64(as, dest, 0, ref); + } + } + /* g->tmptv holds the TValue(s). */ + emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv)); +} + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx, base; + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (checki12(ofs)) { + base = ra_alloc1(as, refa, RSET_GPR); + emit_dsi(as, RISCVI_ADDI, dest, base, ofs); + return; + } + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); + emit_sh3add(as, dest, base, idx, RID_TMP); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir, IROp merge) +{ + RegSet allow = RSET_GPR; + int destused = ra_used(ir); + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; + Reg cmp64 = RID_NONE; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); + IRType1 kt = irkey->t; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + rset_clear(allow, tab); + tmp1 = ra_scratch(as, allow); + rset_clear(allow, tmp1); + tmp2 = ra_scratch(as, allow); + rset_clear(allow, tmp2); + + if (irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); + } else { + /* Allocate cmp64 register used for 64-bit comparisons */ + if (!isk && irt_isaddr(kt)) { + cmp64 = tmp2; + } else { + int64_t k; + if (isk && irt_isaddr(kt)) { + k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + k = ~((int64_t)~irt_toitype(kt) << 47); + } + cmp64 = ra_allock(as, k, allow); + rset_clear(allow, cmp64); + } + if (!irt_ispri(kt)) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } + } + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + int is_lend_exit = 0; + as->invmcp = NULL; + if (merge == IR_NE) + asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO); + else if (destused) + emit_loada(as, dest, niltvg(J2G(as->J))); + + /* Follow hash chain until the end. */ + l_loop = --as->mcp; + emit_mv(as, dest, tmp1); + emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next)); + l_next = emit_label(as); + + /* Type and value comparison. */ + if (merge == IR_EQ) { /* Must match asm_guard(). */ + l_end = asm_exitstub_addr(as, as->snapno); + is_lend_exit = 1; + } + if (irt_isnum(kt)) { + emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit); + emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key); + emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, -1); + emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM)); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47); + emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1); + } else { + emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit); + } + emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO) + | RISCVF_IMMB((char *)as->mcp-(char *)l_loop); + if (!isk && irt_isaddr(kt)) { + type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); + emit_ds1s2(as, RISCVI_ADD, tmp2, key, type); + rset_clear(allow, type); + } + + /* Load main position relative to tab->node into dest. */ + khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); + } else { + Reg tmphash = tmp1; + if (isk) + tmphash = ra_allock(as, khash, allow); + /* node = tab->node + (idx*32-idx*8) */ + emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1); + lj_assertA(sizeof(Node) == 24, "bad Node size"); + emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1); + emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3); + emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5); + emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask + emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); + if (isk) { + /* Nothing to do. */ + } else if (irt_isstr(kt)) { + emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2); + emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f); + emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2); + emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f); + emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest); + emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1); + emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f); + if (irt_isnum(kt)) { + emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi + emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1); // lo + emit_ds(as, RISCVI_FMV_X_D, tmp1, key); + } else { + checkmclim(as); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi + emit_ext(as, RISCVI_SEXT_W, tmp2, key); // lo + emit_ds1s2(as, RISCVI_ADD, tmp1, key, type); + } + } + } +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + int bigofs = !checki12(kofs); + Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; + int64_t k; + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (bigofs) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } else if (ra_hasreg(dest)) { + emit_dsi(as, RISCVI_ADDI, dest, node, ofs); + } + if (irt_ispri(irkey->t)) { + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = (int64_t)ir_knum(irkey)->u64; + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); + } + asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow)); + emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs); + if (bigofs) + emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow)); +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR); + } else { + if (guarded) + asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO); + if (ir->o == IR_UREFC) + emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); + else + emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loada(as, dest, o); + } else { + emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); + } + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); + lj_assertA(!ra_used(ir), "unfused FREF"); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + rset_clear(allow, base); + if (irref_isk(ir->op2) && checki12(ofs + irr->i)) { + emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i); + } else { + emit_dsi(as, RISCVI_ADDI, dest, dest, ofs); + emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow)); + } +} + +/* -- Loads and stores ---------------------------------------------------- */ + +static RISCVIns asm_fxloadins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: return RISCVI_LB; + case IRT_U8: return RISCVI_LBU; + case IRT_I16: return RISCVI_LH; + case IRT_U16: return RISCVI_LHU; + case IRT_NUM: return RISCVI_FLD; + case IRT_FLOAT: return RISCVI_FLW; + default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW; + } +} + +static RISCVIns asm_fxstoreins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return RISCVI_SB; + case IRT_I16: case IRT_U16: return RISCVI_SH; + case IRT_NUM: return RISCVI_FSD; + case IRT_FLOAT: return RISCVI_FSW; + default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg idx, dest = ra_dest(as, ir, allow); + rset_clear(allow, dest); + RISCVIns riscvi = asm_fxloadins(ir); + int32_t ofs; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_GL; + ofs = (ir->op2 << 2) - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, allow); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dsi(as, RISCVI_ADDI, dest, idx, ofs); + return; + } + } + ofs = field_ofs[ir->op2]; + lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); + } + rset_clear(allow, idx); + emit_lso(as, riscvi, dest, idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); + emit_lso(as, asm_fxstoreins(ir), src, idx, ofs); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); +} + +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src), ofs); + } +} + +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + +static void asm_ahuvload(ASMState *as, IRIns *ir) +{ + Reg dest = RID_NONE, type = RID_TMP, idx; + RegSet allow = RSET_GPR; + int32_t ofs = 0; + IRType1 t = ir->t; + if (ra_used(ir)) { + lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + rset_clear(allow, dest); + if (irt_isaddr(t)) { + emit_cleartp(as, dest, dest); + } else if (irt_isint(t)) + emit_ext(as, RISCVI_SEXT_W, dest, dest); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + rset_clear(allow, idx); + if (irt_isnum(t)) { + asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); + } else { + asm_guard(as, RISCVI_BNE, type, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + if (ra_hasreg(dest)) { + if (irt_isnum(t)) { + emit_lso(as, RISCVI_FLD, dest, idx, ofs); + dest = type; + } + } else { + dest = type; + } + emit_dsshamt(as, RISCVI_SRAI, type, dest, 47); + emit_lso(as, RISCVI_LD, dest, idx, ofs); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, type = RID_NONE; + int32_t ofs = 0; + if (ir->r == RID_SINK) + return; + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_lso(as, RISCVI_FSD, src, idx, ofs); + } else { + Reg tmp = RID_TMP; + if (irt_ispri(ir->t)) { + tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, tmp); + } else { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_lso(as, RISCVI_SD, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + if (as->flags & JIT_F_RVZba) { + emit_ds1s2(as, RISCVI_ADD_UW, tmp, src, type); + } else { + emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type); + emit_ext(as, RISCVI_ZEXT_W, tmp, src); + } + } else { + emit_ds1s2(as, RISCVI_ADD, tmp, src, type); + } + } + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + Reg dest = RID_NONE, type = RID_NONE, base; + RegSet allow = RSET_GPR; + IRType1 t = ir->t; + int32_t ofs = 8*((int32_t)ir->op1-2); + lj_assertA(checki12(ofs), "sload IR operand out of range"); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else if (ra_used(ir)) { + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); + lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); + if (irt_isaddr(t)) { /* Clear type from pointers. */ + emit_cleartp(as, dest, dest); + } else if (ir->op2 & IRSLOAD_CONVERT) { + if (irt_isint(t)) { + emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp); + /* If value is already loaded for type check, move it to FPR. */ + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_ds(as, RISCVI_FMV_D_X, tmp, dest); + else + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + emit_ds(as, RISCVI_FCVT_D_W, dest, tmp); + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + /* Sign-extend integers. */ + emit_ext(as, RISCVI_SEXT_W, dest, dest); + } + goto dotypecheck; + } + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); +dotypecheck: + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + type = dest < RID_MAX_GPR ? dest : RID_TMP; + if (irt_ispri(t)) { + asm_guard(as, RISCVI_BNE, type, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); + } else if ((ir->op2 & IRSLOAD_KEYINDEX)) { + asm_guard(as, RISCVI_BNE, RID_TMP, + ra_allock(as, (int32_t)LJ_KEYINDEX, allow)); + emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 32); + } else { + if (irt_isnum(t)) { + asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM); + if (ra_hasreg(dest)) { + emit_lso(as, RISCVI_FLD, dest, base, ofs); + } + } else { + asm_guard(as, RISCVI_BNE, RID_TMP, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47); + } + emit_lso(as, RISCVI_LD, type, base, ofs); + } else if (ra_hasreg(dest)) { + emit_lso(as, irt_isnum(t) ? RISCVI_FLD : + irt_isint(t) ? RISCVI_LW : RISCVI_LD, + dest, base, ofs); + } +} + +/* -- Allocations --------------------------------------------------------- */ + +#if LJ_HASFFI +static void asm_cnew(ASMState *as, IRIns *ir) +{ + CTState *cts = ctype_ctsG(J2G(as->J)); + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet drop = RSET_SCRATCH; + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); + + as->gcsteps++; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + if (ra_used(ir)) + ra_destreg(as, ir, RID_RET); /* GCcdata * */ + + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow), + RID_RET, (sizeof(GCcdata))); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; + } + + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ + emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))); + emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))); + emit_loadk12(as, RID_RET+1, ~LJ_TCDATA); + emit_loadk32(as, RID_TMP, id); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ + asm_gencall(as, ci, args); + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); +} +#endif + +/* -- Write barriers ------------------------------------------------------ */ + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + Reg link = RID_TMP; + MCLabel l_end = emit_label(as); + emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_setgl(as, tab, gc.grayagain); // make tab gray again + emit_getgl(as, link, gc.grayagain); + emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: not jump + emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP); // mark=0: gray + emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK); + emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked))); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; + MCLabel l_end; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); + emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); + emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, -1); + emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, -1); // black: jump + emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK); + emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); + val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); + emit_lso(as, RISCVI_LBU, tmp, obj, + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))); + emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))); +} + +/* -- Arithmetic and logic operations ------------------------------------- */ + +static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, riscvi, dest, left, right); +} + +static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + switch(riscvi) { + case RISCVI_FROUND_S_RTZ: case RISCVI_FROUND_S_RDN: case RISCVI_FROUND_S_RUP: + case RISCVI_FROUND_D_RTZ: case RISCVI_FROUND_D_RDN: case RISCVI_FROUND_D_RUP: + case RISCVI_FSQRT_S: case RISCVI_FSQRT_D: + emit_ds(as, riscvi, dest, left); + break; + case RISCVI_FMV_S: case RISCVI_FMV_D: + case RISCVI_FABS_S: case RISCVI_FABS_D: + case RISCVI_FNEG_S: case RISCVI_FNEG_D: + emit_ds1s2(as, riscvi, dest, left, left); + break; + default: + lj_assertA(0, "bad fp unary instruction"); + return; + } +} + +static void asm_fpround(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + MCLabel l_end = emit_label(as); + + if (dest != left) { + emit_ds1s2(as, RISCVI_FSGNJ_D, dest, dest, left); + emit_ds(as, RISCVI_FCVT_D_L, dest, RID_TMP); + } else { + Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); + emit_ds1s2(as, RISCVI_FSGNJ_D, dest, ftmp, left); + emit_ds(as, RISCVI_FCVT_D_L, ftmp, RID_TMP); + } + emit_ds(as, riscvi, RID_TMP, left); + emit_branch(as, RISCVI_BLT, RID_ZERO, RID_TMP, l_end, 0); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1075); + emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, 0x7ff); + emit_dsi(as, RISCVI_SRLI, RID_TMP, RID_TMP, 52); + if (dest != left) + emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); + emit_ds(as, RISCVI_FMV_X_D, RID_TMP, left); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + IRFPMathOp fpm = (IRFPMathOp)ir->op2; + if (fpm <= IRFPM_TRUNC) + if (as->flags & JIT_F_RVZfa) { + asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FROUND_D_RDN : + fpm == IRFPM_CEIL ? RISCVI_FROUND_D_RUP : RISCVI_FROUND_D_RTZ); + } else { + asm_fpround(as, ir, fpm == IRFPM_FLOOR ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RDN) : + fpm == IRFPM_CEIL ? RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RUP) : + RISCVI_FCVT_L_D | RISCVF_RM(RISCVRM_RTZ)); + } + else if (fpm == IRFPM_SQRT) + asm_fpunary(as, ir, RISCVI_FSQRT_D); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); +} + +static void asm_add(ASMState *as, IRIns *ir) +{ + IRType1 t = ir->t; + if (irt_isnum(t)) { + if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D)) + asm_fparith(as, ir, RISCVI_FADD_D); + return; + } else { + if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA)) + return; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if (checki12(k)) { + if (irt_is64(t)) { + emit_dsi(as, RISCVI_ADDI, dest, left, k); + } else { + emit_dsi(as, RISCVI_ADDIW, dest, left, k); + } + return; + } + } + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest, + left, right); + } +} + +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D)) + asm_fparith(as, ir, RISCVI_FSUB_D); + return; + } else { + if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS)) + return; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, + left, right); + } +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, RISCVI_FMUL_D); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest, + left, right); + } +} + +static void asm_fpdiv(ASMState *as, IRIns *ir) +{ + asm_fparith(as, ir, RISCVI_FDIV_D); +} + +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, RISCVI_FNEG_D); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, + RID_ZERO, left); + } +} + +#define asm_abs(as, ir) asm_fpunary(as, ir, RISCVI_FABS_D) + +static void asm_arithov(ASMState *as, IRIns *ir) +{ + Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); + lj_assertA(!irt_is64(ir->t), "bad usage"); + if (irref_isk(ir->op2)) { + int k = IR(ir->op2)->i; + if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); + if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ + left = ra_alloc1(as, ir->op1, RSET_GPR); + asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left); + emit_dsi(as, RISCVI_ADDI, dest, left, k); + if (dest == left) emit_mv(as, RID_TMP, left); + return; + } + } + left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), + right), dest)); + asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO); + emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp); + if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ + emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); + } else { /* ((dest^left) & (dest^~right)) < 0 */ + emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right); + } + emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left); + emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right); + if (dest == left || dest == right) + emit_mv(as, RID_TMP, dest == left ? left : right); +} + +#define asm_addov(as, ir) asm_arithov(as, ir) +#define asm_subov(as, ir) asm_arithov(as, ir) + +static void asm_mulov(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + asm_guard(as, RISCVI_BNE, dest, RID_TMP); + emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP); // dest: [31:0]+signextend + emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right); // RID_TMP: [63:0] +} + +static void asm_bnot(ASMState *as, IRIns *ir) +{ + Reg left, right, dest = ra_dest(as, ir, RSET_GPR); + IRIns *irl = IR(ir->op1); + if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) { + left = ra_alloc2(as, irl, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, RISCVI_XNOR, dest, left, right); + } else { + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_ds(as, RISCVI_NOT, dest, left); + } +} + +static void asm_bswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left); + if (as->flags & JIT_F_RVZbb) { + if (!irt_is64(ir->t)) + emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32); + emit_ds(as, RISCVI_REV8, dest, left); + } else if (as->flags & JIT_F_RVXThead) { + emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW, + dest, left); + } else if (irt_is64(ir->t)) { + Reg tmp1, tmp2, tmp3, tmp4; + tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); + tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2); + tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3); + tmp4 = ra_scratch(as, allow); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); + emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40); + emit_dsshamt(as, RISCVI_SLLI, dest, left, 56); + emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3); + emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP); + emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32); + emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24); + emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2); + emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1); + emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3); + emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24); + emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24); + emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1); + emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8); + emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3); + emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u)); + emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP); + emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); + emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); + emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40); + } else { + Reg tmp1, tmp2; + tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); + tmp2 = ra_scratch(as, allow); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1); + emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8); + emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2); + emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP); + emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP); + emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); + emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); + emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8); + } +} + +static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left, right; + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if (checki12(k)) { + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dsi(as, riscvik, dest, left, k); + return; + } + } else if (as->flags & JIT_F_RVZbb) { + if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) { + left = ra_alloc1(as, irl->op1, RSET_GPR); + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvin, dest, right, left); + return; + } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) { + left = ra_alloc1(as, ir->op1, RSET_GPR); + right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvin, dest, left, right); + return; + } + } + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvi, dest, left, right); +} + +#define asm_band(as, ir) asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN) +#define asm_bor(as, ir) asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN) +#define asm_bxor(as, ir) asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR) + +static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + uint32_t shmsk = irt_is64(ir->t) ? 63 : 31; + if (irref_isk(ir->op2)) { /* Constant shifts. */ + uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk); + switch (riscvik) { + case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI: + case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW: + emit_dsshamt(as, riscvik, dest, left, shift); + break; + case RISCVI_ADDI: shift = (-shift) & shmsk; + case RISCVI_RORI: + emit_roti(as, RISCVI_RORI, dest, left, RID_TMP, shift); + break; + case RISCVI_ADDIW: shift = (-shift) & shmsk; + case RISCVI_RORIW: + emit_roti(as, RISCVI_RORIW, dest, left, RID_TMP, shift); + break; + default: + lj_assertA(0, "bad shift instruction"); + return; + } + } else { + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + switch (riscvi) { + case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL: + case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW: + emit_ds1s2(as, riscvi, dest, left, right); + break; + case RISCVI_ROR: case RISCVI_ROL: + case RISCVI_RORW: case RISCVI_ROLW: + emit_rot(as, riscvi, dest, left, right, RID_TMP); + break; + default: + lj_assertA(0, "bad shift instruction"); + return; + } + } +} + +#define asm_bshl(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \ + asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW)) +#define asm_bshr(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \ + asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW)) +#define asm_bsar(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \ + asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW)) +#define asm_brol(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_ROL, RISCVI_ADDI) : \ + asm_bitshift(as, ir, RISCVI_ROLW, RISCVI_ADDIW)) + // ROLI -> ADDI, ROLIW -> ADDIW; Hacky but works. +#define asm_bror(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \ + asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW)) + +static void asm_min_max(ASMState *as, IRIns *ir, int ismax) +{ + if (irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + MCLabel l_ret_left, l_end; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + l_end = emit_label(as); + + if (dest != left) + emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); + l_ret_left = emit_label(as); + + if (dest != left) + emit_jump(as, l_end, -1); + if (dest != right) + emit_ds1s2(as, RISCVI_FMV_D, dest, right, right); + + emit_branch(as, RISCVI_BNE, RID_TMP, RID_ZERO, l_ret_left, -1); + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, ismax ? right : left, + ismax ? left : right); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right); + } else { + if (as->flags & JIT_F_RVXThead) { + if (left == right) { + if (dest != left) emit_mv(as, dest, left); + } else { + if (dest == left) { + emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP); + } else { + emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP); + if (dest != right) emit_mv(as, dest, right); + } + } + } else if (as->flags & JIT_F_RVZicond) { + emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP); + if (dest != right) { + emit_ds1s2(as, RISCVI_CZERO_EQZ, RID_TMP, right, RID_TMP); + emit_ds1s2(as, RISCVI_CZERO_NEZ, dest, left, RID_TMP); + } else { + emit_ds1s2(as, RISCVI_CZERO_NEZ, RID_TMP, left, RID_TMP); + emit_ds1s2(as, RISCVI_CZERO_EQZ, dest, right, RID_TMP); + } + } else { + if (dest != right) { + emit_ds1s2(as, RISCVI_XOR, dest, right, dest); + emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); + emit_ds1s2(as, RISCVI_XOR, dest, right, left); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1); + } else { + emit_ds1s2(as, RISCVI_XOR, dest, left, dest); + emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); + emit_ds1s2(as, RISCVI_XOR, dest, left, right); + emit_ds1s2(as, RISCVI_SUB, RID_TMP, RID_ZERO, RID_TMP); + } + } + emit_ds1s2(as, RISCVI_SLT, RID_TMP, + ismax ? left : right, ismax ? right : left); + } + } +} + +#define asm_min(as, ir) asm_min_max(as, ir, 0) +#define asm_max(as, ir) asm_min_max(as, ir, 1) + +/* -- Comparisons --------------------------------------------------------- */ + +/* FP comparisons. */ +static void asm_fpcomp(ASMState *as, IRIns *ir) +{ + IROp op = ir->o; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + asm_guard(as, (op < IR_EQ ? (op&4) : (op&1)) + ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); + switch (op) { + case IR_LT: case IR_UGE: + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right); + break; + case IR_LE: case IR_UGT: case IR_ABC: + emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right); + break; + case IR_GT: case IR_ULE: + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left); + break; + case IR_GE: case IR_ULT: + emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left); + break; + case IR_EQ: case IR_NE: + emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right); + break; + default: + break; + } +} + +/* Integer comparisons. */ +static void asm_intcomp(ASMState *as, IRIns *ir) +{ + /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ + /* 00 01 10 11 100 101 110 111 */ + IROp op = ir->o; + Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); + if (op == IR_ABC) op = IR_UGT; + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { + switch (op) { + case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break; + case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break; + case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break; + case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break; + default: break; + } + return; + } + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if ((op&2)) k++; + if (checki12(k)) { + asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k); + return; + } + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1), + (op&2) ? right : left, (op&2) ? left : right); +} + +static void asm_comp(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else + asm_intcomp(as, ir); +} + +static void asm_equal(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpcomp(as, ir); + } else { + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right); + } +} + +/* -- Split register ops -------------------------------------------------- */ + +/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +static void asm_hiop(ASMState *as, IRIns *ir) +{ + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { + case IR_CALLN: + case IR_CALLL: + case IR_CALLS: + case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); + emit_lsglptr(as, RISCVI_LBU, RID_TMP, + (int32_t)offsetof(global_State, hookmask)); +} + +/* -- Stack handling ------------------------------------------------------ */ + +/* Check Lua stack size for overflow. Use exit handler as fallback. */ +static void asm_stack_check(ASMState *as, BCReg topslot, + IRIns *irp, RegSet allow, ExitNo exitno) +{ + /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */ + Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; + ExitNo oldsnap = as->snapno; + rset_clear(allow, pbase); + as->snapno = exitno; + asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); + as->snapno = oldsnap; + if (allow) { + tmp = rset_pickbot(allow); + ra_modified(as, tmp); + } else { // allow == RSET_EMPTY + tmp = RID_RET; + emit_lso(as, RISCVI_LD, tmp, RID_SP, 0); /* Restore tmp1 register. */ + } + emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); + emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase); + emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack)); + if (pbase == RID_TMP) + emit_getgl(as, RID_TMP, jit_base); + emit_getgl(as, tmp, cur_L); + if (allow == RSET_EMPTY) /* Spill temp register. */ + emit_lso(as, RISCVI_SD, tmp, RID_SP, 0); +} + +/* Restore Lua stack from on-trace state. */ +static void asm_stack_restore(ASMState *as, SnapShot *snap) +{ + SnapEntry *map = &as->T->snapmap[snap->mapofs]; +#ifdef LUA_USE_ASSERT + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif + MSize n, nent = snap->nent; + /* Store the value of all modified slots to the Lua stack. */ + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + BCReg s = snap_slot(sn); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); + IRRef ref = snap_ref(sn); + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs); + } else { + if ((sn & SNAP_KEYINDEX)) { + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + int64_t kki = (int64_t)LJ_KEYINDEX << 32; + if (irref_isk(ref)) { + emit_lso(as, RISCVI_SD, + ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow), + RID_BASE, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + Reg rki = ra_allock(as, kki, rset_exclude(allow, src)); + emit_lso(as, RISCVI_SD, RID_TMP, RID_BASE, ofs); + emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, rki); + } + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } + } + checkmclim(as); + } + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); +} + +/* -- GC handling --------------------------------------------------------- */ + +/* Marker to prevent patching the GC check exit. */ +#define RISCV_NOPATCH_GC_CHECK \ + (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP)) + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; + Reg tmp; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */ + *--as->mcp = RISCV_NOPATCH_GC_CHECK; + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); + emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + tmp = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, -1); + emit_getgl(as, tmp, gc.threshold); + emit_getgl(as, RID_TMP, gc.total); + as->gcsteps = 0; + checkmclim(as); +} + +/* -- Loop handling ------------------------------------------------------- */ + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + MCode *p = as->mctop; + MCode *target = as->mcp; + ptrdiff_t delta; + if (as->loopinv) { /* Inverted loop branch? */ + delta = (char *)target - (char *)(p - 2); + /* asm_guard* already inverted the branch, and patched the final b. */ + lj_assertA(checki21(delta), "branch target out of range"); + p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta); + } else { + /* J */ + delta = (char *)target - (char *)(p - 1); + p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta); + } +} + +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do(?) */ +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Coalesce BASE register for a root trace. */ +static void asm_head_root_base(ASMState *as) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (r != RID_BASE) + emit_mv(as, r, RID_BASE); + } +} + +/* Coalesce BASE register for a side trace. */ +static Reg asm_head_side_base(ASMState *as, IRIns *irp) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { + return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */ + return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } + } + return RID_NONE; +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + MCode *p = as->mctop; + MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; + int32_t spadj = as->T->spadjust; + if (spadj == 0) { + p[-3] = RISCVI_NOP; + // as->mctop = p-2; + } else { + /* Patch stack adjustment. */ + p[-3] = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj); + } + /* Patch exit jump. */ + ptrdiff_t delta = (char *)target - (char *)(p - 2); + p[-2] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + p[-1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); +} + +/* Prepare tail of code. */ +static void asm_tail_prep(ASMState *as) +{ + MCode *p = as->mctop - 2; /* Leave room for exitstub. */ + if (as->loopref) { + as->invmcp = as->mcp = p; + } else { + as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + as->invmcp = NULL; + } + p[0] = p[1] = RISCVI_NOP; /* Prevent load/store merging. */ +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX*2]; + uint32_t i, nargs = CCI_XNARGS(ci); + int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + if (args[i] && irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) { + nfpr--; if(ci->flags & CCI_VARARG) ngpr--; + } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--; + else nslots += 2; + } else { + if (ngpr > 0) { + ngpr--; if(ci->flags & CCI_VARARG) nfpr--; + } else nslots += 2; + } + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); +} + +static void asm_setup_target(ASMState *as) +{ + asm_sparejump_setup(as); + asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); +} + +/* -- Trace patching ------------------------------------------------------ */ + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) +{ + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); + MCode *px = exitstub_trace_addr(T, exitno); + MCode *cstart = NULL; + MCode *mcarea = lj_mcode_patch(J, p, 0); + + for (; p < pe; p++) { + /* Look for exitstub branch, replace with branch to target. */ + ptrdiff_t odelta = (char *)px - (char *)(p+1), + ndelta = (char *)target - (char *)(p+1); + if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 && + ((p[0] & 0x0000007fu) == 0x63u) && + ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && + ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) || + (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && + ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) { + lj_assertJ(checki32(ndelta), "branch target out of range"); + /* Patch jump, if within range. */ + patchbranch: + if (checki21(ndelta)) { /* Patch jump */ + p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta); + if (!cstart) cstart = p + 1; + } else { /* Branch out of range. Use spare jump slot in mcarea. */ + MCode *mcjump = asm_sparejump_use(mcarea, target); + if (mcjump) { + lj_mcode_sync(mcjump, mcjump+2); + ndelta = (char *)mcjump - (char *)(p+1); + if (checki21(ndelta)) { + goto patchbranch; + } else { + lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); + } + } + /* Ignore jump slot overflow. Child trace is simply not attached. */ + } + } else if (p+2 == pe) { + if (p[0] == RISCVI_NOP && p[1] == RISCVI_NOP) { + ptrdiff_t delta = (char *)target - (char *)p; + lj_assertJ(checki32(delta), "jump target out of range"); + p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + if (!cstart) cstart = p; + } + } + } + if (cstart) lj_mcode_sync(cstart, px+1); + lj_mcode_patch(J, mcarea, 1); +} From d8c992d888137be24076a392860f963f678e96c3 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:36:58 +0800 Subject: [PATCH 12/22] riscv(interp): add VM builder support --- src/host/buildvm.c | 2 ++ src/host/buildvm_asm.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/host/buildvm.c b/src/host/buildvm.c index d460b3144..ff4e01e11 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -69,6 +69,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); #include "../dynasm/dasm_mips.h" #elif LJ_TARGET_S390X #include "../dynasm/dasm_s390x.h" +#elif LJ_TARGET_RISCV64 +#include "../dynasm/dasm_riscv.h" #else #error "No support for this architecture (yet)" #endif diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index c0b0594c8..c8d609dd2 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -208,6 +208,34 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, "Error: unsupported opcode %08x for %s symbol relocation.\n", ins, sym); exit(1); +#elif LJ_TARGET_RISCV64 + if ((ins & 0x7f) == 0x17u) { + fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym); + } else if ((ins & 0x7f) == 0x67u) { + fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym); + } else if ((ins & 0x7f) == 0x6fu) { + fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym); + } else if ((ins & 0x7f) == 0x03u) { + uint8_t funct3 = (ins >> 12) & 7; + uint8_t rd = (ins >> 7) & 31, rs1 = (ins >> 15) & 31; + switch (funct3) { + case 0: fprintf(ctx->fp, "\tlb"); break; + case 1: fprintf(ctx->fp, "\tlh"); break; + case 2: fprintf(ctx->fp, "\tlw"); break; + case 3: fprintf(ctx->fp, "\tld"); break; + case 4: fprintf(ctx->fp, "\tlbu"); break; + case 5: fprintf(ctx->fp, "\tlhu"); break; + case 6: fprintf(ctx->fp, "\tlwu"); break; + default: goto rv_reloc_err; + } + fprintf(ctx->fp, " x%d, %s(x%d)\n", rd, sym, rs1); + } else { +rv_reloc_err: + fprintf(stderr, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); + } #else #error "missing relocation support for this architecture" #endif @@ -303,6 +331,9 @@ void emit_asm(BuildCtx *ctx) #endif #if LJ_TARGET_MIPS fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); +#endif +#if LJ_TARGET_RISCV64 + fprintf(ctx->fp, ".option arch, -c\n.option norelax\n"); #endif emit_asm_align(ctx, 4); From 7a0691dc94278cc6eb05d66542fb3d22bb0369dc Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:38:50 +0800 Subject: [PATCH 13/22] riscv(misc): add bytecode listing support --- src/jit/bcsave.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index f0ca5514b..7caea74a1 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -103,6 +103,7 @@ local map_arch = { mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, s390x = { e = "be", b = 64, m = 22, }, + riscv64 = { e = "le", b = 64, m = 243, f = 0x00000004, }, } local map_os = { From 6f1f257bffdf6f66eaa2f2c6a5f9c38d02fc23f6 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:40:51 +0800 Subject: [PATCH 14/22] riscv(jit): add hooks in interpreter --- src/vm_riscv64.dasc | 388 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index 4c8189b54..db91430fa 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -448,6 +448,24 @@ |.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro |.macro st_vmstate; sw TMP0, GL->vmstate; .endmacro | +|.macro hotcheck, delta, target +| srli TMP1, PC, 1 +| andi TMP1, TMP1, 126 +| add TMP1, TMP1, DISPATCH +| lhu TMP2, GG_DISP2HOT(TMP1) +| addiw TMP2, TMP2, -delta +| sh TMP2, GG_DISP2HOT(TMP1) +| bxltz TMP2, target +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP, ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL, ->vm_hotcall +|.endmacro +| |// Move table write barrier back. Overwrites mark and tmp. |.macro barrierback, tab, mark, tmp, target | ld tmp, GL->gc.grayagain @@ -1145,8 +1163,15 @@ static void build_subroutines(BuildCtx *ctx) | sd PC, SAVE_PC(sp) | mv MULTRES, INS | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base) + |.if JIT + | decode_OP1 TMP0, MULTRES + | li TMP1, BC_JFORI + |.endif | decode_RA8 RA, MULTRES | decode_RD8 RD, MULTRES + |.if JIT + | bxeq TMP0, TMP1, =>BC_JFORI + |.endif | j =>BC_FORI | |//----------------------------------------------------------------------- @@ -2141,6 +2166,20 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |->vm_record: // Dispatch target for recording phase. + |.if JIT + | lbu TMP3, GL->hookmask + | andi TMP1, TMP3, HOOK_VMEVENT // No recording while in vmevent. + | bnez TMP1, >5 + | // Decrement the hookcount for consistency, but always do the call. + | lw TMP2, GL->hookcount + | andi TMP1, TMP3, HOOK_ACTIVE + | bnez TMP1, >1 + | addiw TMP2, TMP2, -1 + | andi TMP1, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | beqz TMP1, >1 + | sw TMP2, GL->hookcount + | j >1 + |.endif | |->vm_rethook: // Dispatch target for return hooks. | lbu TMP3, GL->hookmask @@ -2186,10 +2225,103 @@ static void build_subroutines(BuildCtx *ctx) | lw MULTRES, -24(RB) // Restore MULTRES for *M ins. | j <4 | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | ld LFUNC:TMP1, FRAME_FUNC(BASE) + | addi CARG1, GL, GG_G2J + | cleartp LFUNC:TMP1 + | sd PC, SAVE_PC(sp) + | ld TMP1, LFUNC:TMP1->pc + | mv CARG2, PC + | sd L, (offsetof(jit_State, L))(CARG1) + | lbu TMP1, PC2PROTO(framesize)(TMP1) + | sd BASE, L->base + | slli TMP1, TMP1, 3 + | add TMP1, BASE, TMP1 + | sd TMP1, L->top + | call_intern vm_hotloop, lj_trace_hot // (jit_State *J, const BCIns *pc) + | j <3 + |.endif + | | |->vm_callhook: // Dispatch target for call hooks. + | mv CARG2, PC + |.if JIT + | j >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | ori CARG2, PC, 1 + |1: + |.endif + | add TMP0, BASE, RC + | sd PC, SAVE_PC(sp) + | sd BASE, L->base + | sub RA, RA, BASE + | sd TMP0, L->top + | mv CARG1, L + | call_intern vm_hotcall, lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // Returns ASMFunction. + | ld BASE, L->base + | ld TMP0, L->top + | sd x0, SAVE_PC(sp) // Invalidate for subsequent line hook. + | add RA, BASE, RA + | sub NARGS8:RC, TMP0, BASE + | ld LFUNC:RB, FRAME_FUNC(BASE) + | cleartp LFUNC:RB + | lw INS, -4(PC) + | jr CRET1 | |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | ld TRACE:TMP2, -40(RB) // Save previous trace. + | decode_RA8 RC, INS + | addi TMP1, MULTRES, -8 + | cleartp TRACE:TMP2 + | add RC, BASE, RC // Call base. + | beqz TMP1, >2 + |1: // Move results down. + | ld CARG1, 0(RA) + | addi TMP1, TMP1, -8 + | addi RA, RA, 8 + | sd CARG1, 0(RC) + | addi RC, RC, 8 + | bnez TMP1, <1 + |2: + | decode_RA8 RA, INS + | decode_RB8 RB, INS + | add RA, RA, RB + | add RA, BASE, RA + |3: + | bltu RC, RA, >8 // More results wanted? + | + | lhu TMP3, TRACE:TMP2->traceno + | lhu RD, TRACE:TMP2->link + | bxeq RD, TMP3, ->cont_nop // Blacklisted. + | slliw RD, RD, 3 + | bxnez RD, =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | addi CARG1, GL, GG_G2J + | // addi CARG2, CARG1, 1 // We don't care what's on the verge. + | addi CARG2, CARG1, 2047 // jit_State too large. + | sw TMP3, (offsetof(jit_State, exitno)-2047)(CARG2) + | sd L, (offsetof(jit_State, L)-2047)(CARG2) + | sd BASE, L->base + | mv CARG2, PC + | // (jit_State *J, const BCIns *pc) + | call_intern cont_stitch, lj_dispatch_stitch + | ld BASE, L->base + | j ->cont_nop + | + |8: + | sd TISNIL, 0(RC) + | addi RC, RC, 8 + | j <3 + |.endif | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE @@ -2204,6 +2336,149 @@ static void build_subroutines(BuildCtx *ctx) | ld BASE, L->base | j ->cont_nop #endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b + | fsd f..a, a*8(sp) + | fsd f..b, b*8(sp) + | sd x..a, 32*8+a*8(sp) + | sd x..b, 32*8+b*8(sp) + |.endmacro + | + |->vm_exit_handler: + |.if JIT + | addi sp, sp, -(32*8+32*8) + | savex_ 0, 5 + | savex_ 6, 7 + | savex_ 8, 9 + | savex_ 10, 11 + | savex_ 12, 13 + | savex_ 14, 15 + | savex_ 16, 17 + | savex_ 18, 19 + | savex_ 20, 21 + | savex_ 22, 23 + | savex_ 24, 25 + | savex_ 26, 27 + | savex_ 28, 29 + | savex_ 30, 31 + | fsd f1, 1*8(sp) + | fsd f2, 2*8(sp) + | fsd f3, 3*8(sp) + | fsd f4, 4*8(sp) + | sd x0, 32*8+1*8(sp) // Clear RID_TMP. + | ld TMP1, 32*8+32*8(sp) // Load exit pc. + | addi TMP2, sp, 32*8+32*8 // Recompute original value of sp. + | addxi DISPATCH, GL, GG_G2DISP + | sd TMP2, 32*8+2*8(sp) // Store sp in RID_SP + | addi CARG1, GL, GG_G2J + | li_vmstate EXIT + | // addi CARG2, CARG1, 1 // We don't care what's on the verge. + | addi CARG2, CARG1, 2047 // jit_State too large. + | sub TMP1, TMP1, ra + | lw TMP2, 0(ra) // Load trace number. + | st_vmstate + | srli TMP1, TMP1, 2 + | ld L, GL->cur_L + | ld BASE, GL->jit_base + | srli TMP2, TMP2, 12 + | addi TMP1, TMP1, -2 + | sd L, (offsetof(jit_State, L)-2047)(CARG2) + | sw TMP2, (offsetof(jit_State, parent)-2047)(CARG2) // Store trace number. + | sd BASE, L->base + | sw TMP1, (offsetof(jit_State, exitno)-2047)(CARG2) // Store exit number. + | sd x0, GL->jit_base + | mv CARG2, sp + | call_intern vm_exit_handler, lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ld TMP1, L->cframe + | ld BASE, L->base + | andi sp, TMP1, CFRAME_RAWMASK + | ld PC, SAVE_PC(sp) // Get SAVE_PC. + | sd L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield). + | j >1 + |.endif + | + |->vm_exit_interp: + |.if JIT + | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. + | ld L, SAVE_L(sp) + | addxi DISPATCH, GL, GG_G2DISP + | sd BASE, L->base + |1: + | ld LFUNC:RB, FRAME_FUNC(BASE) + | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. + | beqz TMP0, >9 + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | slli MULTRES, CRET1, 3 + | cleartp LFUNC:RB + | sw MULTRES, TMPD(sp) + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | slli TMP3, TMP3, 32 + | ld TMP1, LFUNC:RB->pc + | sd x0, GL->jit_base + | ld KBASE, PC2PROTO(k)(TMP1) + | fmv.d.x TOBIT, TMP3 + | // Modified copy of ins_next which handles function header dispatch, too. + | lw INS, 0(PC) + | addi PC, PC, 4 + | addiw CRET1, CRET1, 17 // Static dispatch? + | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 + | sw TISNIL, GL->vmstate + | decode_RD8a RD, INS + | beqz CRET1, >5 + | decode_OP8 TMP1, INS + | add TMP0, DISPATCH, TMP1 + | sltiu TMP2, TMP1, BC_FUNCF*8 + | ld TMP3, 0(TMP0) + | decode_RA8 RA, INS + | beqz TMP2, >2 + | decode_RD8b RD + | jr TMP3 + |2: + | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? + | ld TMP1, FRAME_PC(BASE) + | bnez TMP2, >3 + | // Check frame below fast function. + | andi TMP0, TMP1, FRAME_TYPE + | bnez TMP0, >3 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | lw TMP2, -4(TMP1) + | decode_RA8 TMP0, TMP2 + | sub TMP1, BASE, TMP0 + | ld LFUNC:TMP2, -32(TMP1) + | cleartp LFUNC:TMP2 + | ld TMP1, LFUNC:TMP2->pc + | ld KBASE, PC2PROTO(k)(TMP1) + |3: + | addi RC, MULTRES, -8 + | add RA, RA, BASE + | jr TMP3 + | + |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. + | ld TMP0, GL_J(trace)(GL) + | decode_RD8b RD + | add TMP0, TMP0, RD + | ld TRACE:TMP2, 0(TMP0) + | lw INS, TRACE:TMP2->startins + | decode_OP8 TMP1, INS + | add TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld TMP3, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | decode_RA8b RA + | jr TMP3 + | + |9: // Rethrow error from the right C frame. + | negw CARG2, CRET1 + | mv CARG1, L + | call_intern vm_exit_interp, lj_err_trace // (lua_State *L, int errcode) + |.endif | |//----------------------------------------------------------------------- |//-- Math helper functions ---------------------------------------------- @@ -2230,6 +2505,10 @@ static void build_subroutines(BuildCtx *ctx) | vm_round rdn |->vm_ceil: | vm_round rup + |->vm_trunc: + |.if JIT + | vm_round rtz + |.endif | | |//----------------------------------------------------------------------- @@ -2243,6 +2522,67 @@ static void build_subroutines(BuildCtx *ctx) | ret |.endif | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, TMP0 + |.define NEXT_TMP1, TMP1 + |.define NEXT_TMP2, TMP2 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, 0(sp) + |.define NEXT_RES_KEY, 8(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT + | lw NEXT_ASIZE, NEXT_TAB->asize + | ld NEXT_TMP0, NEXT_TAB->array + | li NEXT_NIL, LJ_TNIL + |1: // Traverse array part. + | bgeu NEXT_IDX, NEXT_ASIZE, >5 + | slliw NEXT_TMP1, NEXT_IDX, 3 + | add NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 + | li TMP3, LJ_TISNUM + | ld NEXT_TMP2, 0(NEXT_TMP1) + | slli TMP3, TMP3, 47 + | or NEXT_TMP1, NEXT_IDX, TMP3 + | addiw NEXT_IDX, NEXT_IDX, 1 + | beq NEXT_TMP2, NEXT_NIL, <1 + | sd NEXT_TMP2, NEXT_RES_VAL + | sd NEXT_TMP1, NEXT_RES_KEY + | mv NEXT_RES_VK, NEXT_RES_PTR + | mv NEXT_RES_IDX, NEXT_IDX + | ret + | + |5: // Traverse hash part. + | subw NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE + | lw NEXT_TMP0, NEXT_TAB->hmask + | ld NODE:NEXT_RES_VK, NEXT_TAB->node + | slliw NEXT_TMP2, NEXT_RES_IDX, 5 + | slliw TMP3, NEXT_RES_IDX, 3 + | subw TMP3, NEXT_TMP2, TMP3 + | add NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, TMP3 + |6: + | bltu NEXT_TMP0, NEXT_RES_IDX, >8 + | ld NEXT_TMP2, NODE:NEXT_RES_VK->val + | addiw NEXT_RES_IDX, NEXT_RES_IDX, 1 + | bne NEXT_TMP2, NEXT_NIL, >9 + | // Skip holes in hash part. + | addi NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) + | j <6 + | + |8: // End of iteration. Set the key to nil (not the value). + | sd NEXT_NIL, NEXT_RES_KEY + | mv NEXT_RES_VK, NEXT_RES_PTR + |9: + | addw NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE + | ret + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -3733,6 +4073,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + |.if JIT + | hotloop + |.endif |->vm_IITERN: | add RA, BASE, RA | ld TAB:RB, -16(RA) @@ -3817,8 +4160,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) | add PC, TMP0, TMP2 + |.if JIT + | lb TMP0, OFS_OP(PC) + | li TMP3, BC_ITERN + | lhu TMP2, OFS_RD(PC) + | bne TMP0, TMP3, >6 + |.endif | sb TMP1, OFS_OP(PC) | j <1 + |.if JIT + |6: // Unpatch JLOOP. + | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. + | slliw TMP2, TMP2, 3 + | add TMP0, TMP0, TMP2 + | ld TRACE:TMP2, 0(TMP0) + | lw TMP0, TRACE:TMP2->startins + | andi TMP0, TMP0, -256 + | or TMP0, TMP0, TMP1 + | sw TMP0, 0(PC) + | j <1 + |.endif break; case BC_VARG: @@ -3984,6 +4345,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) /* -- Loops and branches ------------------------------------------------ */ case BC_FORL: + |.if JIT + | hotloop + |.endif | // Fall through. Assumes BC_IFORL follows. break; @@ -4104,6 +4468,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERL: + |.if JIT + | hotloop + |.endif | // Fall through. Assumes BC_IITERL follows. break; @@ -4128,6 +4495,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_LOOP: + | // RA = base*8, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif | // Fall through. Assumes BC_ILOOP follows. break; @@ -4137,6 +4510,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_JLOOP: + |.if JIT + | // RA = base*8 (ignored), RD = traceno*8 + | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. + | add TMP0, TMP0, RD + | // Traces on RISC-V don't store the trace number, so use 0. + | sd x0, GL->vmstate + | ld TRACE:TMP1, 0(TMP0) + | sd BASE, GL->jit_base // store Current JIT code L->base + | ld TMP1, TRACE:TMP1->mcode + | sd L, GL->tmpbuf.L + | jr TMP1 + |.endif break; case BC_JMP: @@ -4148,6 +4533,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) /* -- Function headers -------------------------------------------------- */ case BC_FUNCF: + |.if JIT + | hotcall + |.endif case BC_FUNCV: /* NYI: compiled vararg functions. */ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. break; From ab2db4bdaabbb72943aaa4e0cbeaad7f0a66939e Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:42:05 +0800 Subject: [PATCH 15/22] riscv(interp): add DWARF info --- src/vm_riscv64.dasc | 132 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 131 insertions(+), 1 deletion(-) diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index db91430fa..09ac0cf05 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -4679,5 +4679,135 @@ static int build_backend(BuildCtx *ctx) /* Emit pseudo frame-info for all assembler functions. */ static void emit_asm_debug(BuildCtx *ctx) { - + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.4byte .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.4byte 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 1\n" /* Return address is in ra. */ + "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ + "\t.align 3\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.4byte .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.4byte .Lframe0\n" + "\t.8byte .Lbegin\n" + "\t.8byte %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" + "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */, + fcofs, CFRAME_SIZE); + for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); + fprintf(ctx->fp, + "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ + "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); + for (i = 27; i >= 18; i--) /* offset f31-f18 */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); + fprintf(ctx->fp, + "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ + "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ + "\t.align 3\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.4byte .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.4byte .Lframe0\n" + "\t.4byte lj_vm_ffi_call\n" + "\t.4byte %d\n" + "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ + "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ + "\t.byte 0xd\n\t.uleb128 0x12\n" + "\t.align 3\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe1:\n" + "\t.4byte .LECIE1-.LSCIE1\n" + ".LSCIE1:\n" + "\t.4byte 0\n" + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 1\n" /* Return address is in ra. */ + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" + "\t.4byte lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" + "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ + "\t.align 2\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, + ".LSFDE2:\n" + "\t.4byte .LEFDE2-.LASFDE2\n" + ".LASFDE2:\n" + "\t.4byte .LASFDE2-.Lframe1\n" + "\t.4byte .Lbegin-.\n" + "\t.4byte %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 %d\n" + "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */ + fcofs, CFRAME_SIZE); + for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); + fprintf(ctx->fp, + "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ + "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); + for (i = 27; i >= 18; i--) /* offset f31-f18 */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); + fprintf(ctx->fp, + "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ + "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ + "\t.align 2\n" + ".LEFDE2:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".Lframe2:\n" + "\t.4byte .LECIE2-.LSCIE2\n" + ".LSCIE2:\n" + "\t.4byte 0\n" + "\t.byte 0x1\n" + "\t.string \"zR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 1\n" /* Return address is in ra. */ + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" + "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ + "\t.align 2\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, + ".LSFDE3:\n" + "\t.4byte .LEFDE3-.LASFDE3\n" + ".LASFDE3:\n" + "\t.4byte .LASFDE3- .Lframe2\n" + "\t.4byte lj_vm_ffi_call-.\n" + "\t.4byte %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ + "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ + "\t.byte 0xd\n\t.uleb128 0x12\n" + "\t.align 2\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); +#endif +#endif + break; + default: + break; + } } From 2e8e5ab7162dca64c0180998a290dc1e02b51942 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:42:34 +0800 Subject: [PATCH 16/22] riscv(jit): add GDBJIT support --- src/lj_gdbjit.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 56094cf10..f1a208bd4 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -306,6 +306,9 @@ enum { #elif LJ_TARGET_MIPS DW_REG_SP = 29, DW_REG_RA = 31, +#elif LJ_TARGET_RISCV64 + DW_REG_SP = 2, + DW_REG_RA = 1, #else #error "Unsupported target architecture" #endif @@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = { .machine = 20, #elif LJ_TARGET_MIPS .machine = 8, +#elif LJ_TARGET_RISCV64 + .machine = 243, #else #error "Unsupported target architecture" #endif @@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } } +#elif LJ_TARGET_RISCV64 + { + int i; + for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); } + DB(DW_CFA_offset|9); DUV(17); + DB(DW_CFA_offset|8); DUV(18); + for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); } + DB(DW_CFA_offset|32|9); DUV(29); + DB(DW_CFA_offset|32|8); DUV(30); + } #else #error "Unsupported target architecture" #endif From 012b47ef5c2839ec5ffa764aad0a3fd5ce900dae Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:43:46 +0800 Subject: [PATCH 17/22] riscv(support,linux): add Linux specfic icache sync codepath --- src/lj_mcode.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 0f87caf75..44ef1d48f 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -38,6 +38,12 @@ void sys_icache_invalidate(void *start, size_t len); #endif +#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX +#include +#include +#include +#endif + /* Synchronize data/instruction cache. */ void lj_mcode_sync(void *start, void *end) { @@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *end) sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC lj_vm_cachesync(start, end); +#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX +#if (defined(__GNUC__) || defined(__clang__)) + __asm__ volatile("fence rw, rw"); +#else + lj_vm_fence_rw_rw(); +#endif +#ifdef __GLIBC__ + __riscv_flush_icache(start, end, 0); +#else + syscall(__NR_riscv_flush_icache, start, end, 0UL); +#endif #elif defined(__GNUC__) || defined(__clang__) __clear_cache(start, end); #else From 2b0c38c54f587b57e9da5a8680c236ef6b02fe58 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:47:58 +0800 Subject: [PATCH 18/22] riscv(support,linux): make mremap() non-moving due to VA space woes --- src/lj_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_alloc.c b/src/lj_alloc.c index cb704f7b3..9039d8053 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) #define CALL_MREMAP_NOMOVE 0 #define CALL_MREMAP_MAYMOVE 1 -#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) +#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64) #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE #else #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE From e425d91fa6274d06aee2090dd9323417adbc7bf3 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:48:43 +0800 Subject: [PATCH 19/22] riscv(misc): add disassmbler support --- src/jit/dis_riscv.lua | 979 ++++++++++++++++++++++++++++++++++++++++ src/jit/dis_riscv64.lua | 16 + 2 files changed, 995 insertions(+) create mode 100644 src/jit/dis_riscv.lua create mode 100644 src/jit/dis_riscv64.lua diff --git a/src/jit/dis_riscv.lua b/src/jit/dis_riscv.lua new file mode 100644 index 000000000..8de563a72 --- /dev/null +++ b/src/jit/dis_riscv.lua @@ -0,0 +1,979 @@ +------------------------------------------------------------------------------ +-- LuaJIT RISC-V disassembler module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +-- +-- Contributed by Milos Poletanovic from Syrmia.com. +-- Contributed by gns from PLCT Lab, ISCAS. +------------------------------------------------------------------------------ +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- It disassembles most standard RISC-V instructions. +-- Mode is little-endian +------------------------------------------------------------------------------ + +local type = type +local byte, format = string.byte, string.format +local match, gmatch = string.match, string.gmatch +local concat = table.concat +local bit = require("bit") +local band, bor, tohex = bit.band, bit.bor, bit.tohex +local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift +local jit = require("jit") + +local jstat = { jit.status() } +local function is_opt_enabled(opt) + for _, v in ipairs(jstat) do + if v == opt then + return true + end + end + return false +end +local xthead = is_opt_enabled("XThead") + +------------------------------------------------------------------------------ +-- Opcode maps +------------------------------------------------------------------------------ + +--RVC32 extension + +local map_quad0 = { + shift = 13, mask = 7, + [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn", + false, "c.fsdNMh", "c.swZMn", "c.fswNMn" +} + +local map_sub2quad1 = { + shift = 5, mask = 3, + [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ" +} + +local map_sub1quad1 = { + shift = 10, mask = 3, + [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1 +} + +local map_quad1 = { + shift = 13, mask = 7, + [0] = { + shift = 7, mask = 31, + [0] = "c.nop", _ = "c.addiDx" + }, + [1] = "c.jalT", [2] = "c.liDx", + [3] = { + shift = 7, mask = 31, + [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX", + _ = "c.luiDK" + }, + [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq" +} + +local map_sub1quad2 = { + shift = 12, mask = 1, + [0] = { + shift = 2, mask = 31, + [0] = "c.jrD", _ = "c.mvDE" + }, + [1] = { + shift = 2, mask = 31, + [0] = { + shift = 7, mask = 31, + [0] = "c.ebreak", _ = "c.jalrD" + }, + _ = "c.addDE" + } +} + +local map_quad2 = { + shift = 13, mask = 7, + [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY", + [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu" +} + +local map_compr = { + [0] = map_quad0, map_quad1, map_quad2 +} + +--RV32M +local map_mext = { + shift = 12, mask = 7, + [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr", + "divDRr", "divuDRr", "remDRr", "remuDRr" +} + +--RV64M +local map_mext64 = { + shift = 12, mask = 7, + [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr", + [7] = "remuwDRr" +} + +--RV32F, RV64F, RV32D, RV64D +local map_fload = { + shift = 12, mask = 7, + [2] = "flwFL", [3] = "fldFL" +} + +local map_fstore = { + shift = 12, mask = 7, + [2] = "fswSg", [3] = "fsdSg" +} + +local map_fmadd = { + shift = 25, mask = 3, + [0] = "fmadd.sFGgHo", "fmadd.dFGgHo" +} + +local map_fmsub = { + shift = 25, mask = 3, + [0] = "fmsub.sFGgHo", "fmsub.dFGgHo" +} + +local map_fnmsub = { + shift = 25, mask = 3, + [0] = "fnmsub.sFGgHo", "fnmsub.dFGgHo" +} + +local map_fnmadd = { + shift = 25, mask = 3, + [0] = "fnmadd.sFGgHo", "fnmadd.dFGgHo" +} + +local map_fsgnjs = { + shift = 12, mask = 7, + [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6" +} + +local map_fsgnjd = { + shift = 12, mask = 7, + [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6" +} + +local map_fms = { + shift = 12, mask = 7, + [0] = "fmin.sFGg", "fmax.sFGg", "fminm.sFGg", "fmaxm.sFGg" +} + +local map_fmd = { + shift = 12, mask = 7, + [0] = "fmin.dFGg", "fmax.dFGg", "fminm.dFGg", "fmaxm.dFGg" +} + +local map_fcomps = { + shift = 12, mask = 7, + [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg", + [4] = "fleq.sDGg", "fltq.sDGg" +} + +local map_fcompd = { + shift = 12, mask = 7, + [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg", + [4] = "fleq.dDGg", "fltq.dDGg" +} + +local map_fcvtwls = { + shift = 20, mask = 31, + [0] = "fcvt.w.sDGo", "fcvt.wu.sDGo", "fcvt.l.sDGo", "fcvt.lu.sDGo" +} + +local map_fcvtwld = { + shift = 20, mask = 31, + [0] = "fcvt.w.dDGo", "fcvt.wu.dDGo", "fcvt.l.dDGo", "fcvt.lu.dDGo", + [8] = { + shift = 12, mask = 7, + [1] = "fcvtmodw.dDG" + } +} + +local map_fcvts = { + shift = 20, mask = 31, + [0] = "fcvt.s.wFRo", "fcvt.s.wuFRo", "fcvt.s.lFRo", "fcvt.s.luFRo" +} + +local map_fcvtd = { + shift = 20, mask = 31, + [0] = "fcvt.d.wFRo", "fcvt.d.wuFRo", "fcvt.d.lFRo", "fcvt.d.luFRo" +} + +local map_fcvtsd = { + shift = 20, mask = 31, + [0] = "fcvt.s.dFGo", + [4] = "fround.sFGo", [5] = "froundnx.sFGo" +} + +local map_fcvtds = { + shift = 20, mask = 31, + [0] = "fcvt.d.sFGo", + [4] = "fround.dFGo", [5] = "froundnx.dFGo" +} + +local map_fmvwx = { + shift = 20, mask = 31, + [0] = "fmv.w.xFR", [1] = "fli.sFy" +} + +local map_fmvdx = { + shift = 20, mask = 31, + [0] = "fmv.d.xFR", [1] = "fli.dFy" +} + +local map_fext = { + shift = 25, mask = 127, + [0] = "fadd.sFGgo", [1] = "fadd.dFGgo", [4] = "fsub.sFGgo", [5] = "fsub.dFGgo", + [8] = "fmul.sFGgo", [9] = "fmul.dFGgo", [12] = "fdiv.sFGgo", [13] = "fdiv.dFGgo", + [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd, + [32] = map_fcvtsd, [33] = map_fcvtds,[44] = "fsqrt.sFGo", [45] = "fsqrt.dFGo", + [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld, + [104] = map_fcvts, [105] = map_fcvtd, + [112] = { + shift = 12, mask = 7, + [0] = "fmv.x.wDG", "fclass.sDG" + }, + [113] = { + shift = 12, mask = 7, + [0] = "fmv.x.dDG", "fclass.dDG" + }, + [120] = map_fmvwx, [121] = map_fmvdx +} + +--RV32A, RV64A +local map_aext = { + shift = 27, mask = 31, + [0] = { + shift = 12, mask = 7, + [2] = "amoadd.wDrO", [3] = "amoadd.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "amoswap.wDrO", [3] = "amoswap.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "lr.wDO", [3] = "lr.dDO" + }, + { + shift = 12, mask = 7, + [2] = "sc.wDrO", [3] = "sc.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "amoxor.wDrO", [3] = "amoxor.dDrO" + }, + [8] = { + shift = 12, mask = 7, + [2] = "amoor.wDrO", [3] = "amoor.dDrO" + }, + [12] = { + shift = 12, mask = 7, + [2] = "amoand.wDrO", [3] = "amoand.dDrO" + }, + [16] = { + shift = 12, mask = 7, + [2] = "amomin.wDrO", [3] = "amomin.dDrO" + }, + [20] = { + shift = 12, mask = 7, + [2] = "amomax.wDrO", [3] = "amomax.dDrO" + }, + [24] = { + shift = 12, mask = 7, + [2] = "amominu.wDrO", [3] = "amominu.dDrO" + }, + [28] = { + shift = 12, mask = 7, + [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO" + }, +} + +-- RV32I, RV64I +local map_load = { + shift = 12, mask = 7, + [0] = "lbDL", "lhDL", "lwDL", "ldDL", + "lbuDL", "lhuDL", "lwuDL" +} + +local map_opimm = { + shift = 12, mask = 7, + [0] = { + shift = 7, mask = 0x1ffffff, + [0] = "nop", _ = "addi|li|mvDR0I2" + }, + { + shift = 25, mask = 127, + [48] = { + shift = 20, mask = 31, + [4] = "sext.bDR", [5] = "sext.hDR" + }, + _ = "slliDRi", + }, "sltiDRI", "sltiu|seqzDRI5", + "xori|notDRI4", + { + shift = 26, mask = 63, + [0] = "srliDRi", [16] = "sraiDRi", [24] = "roriDRi", + [26] = { + shift = 20, mask = 63, + [56] = "rev8DR" + } + }, + "oriDRI", "andiDRI" +} + +local map_branch = { + shift = 12, mask = 7, + [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false, + "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB" +} + +local map_store = { + shift = 12, mask = 7, + [0] = "sbSr", "shSr", "swSr", "sdSr" +} + +local map_op = { + shift = 25, mask = 127, + [0] = { + shift = 12, mask = 7, + [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r", + "xorDRr", "srlDRr", "orDRr", "andDRr" + }, + [1] = map_mext, + [4] = { + + }, + [5] = { -- Zbb + shift = 12, mask = 7, + [4] = "minDRr", [5] = "minuDRr", [6] = "maxDRr", [7] = "maxuDRr" + }, + [7] = { -- Zicond + shift = 12, mask = 7, + [5] = "czero.eqzDRr", [7] = "czero.nezDRr" + }, + [16] = { -- Zba + shift = 12, mask = 7, + [2] = "sh1addDRr", [4] = "sh2addDRr", [6] = "sh3addDRr" + }, + [32] = { -- Zbb + shift = 12, mask = 7, + [0] = "sub|negDR0r", [4] = "xnorDRr", [5] = "sraDRr", [6] = "ornDRr", [7] = "andnDRr" + }, + [48] = { -- Zbb + shift = 12, mask = 7, + [1] = "rolDRr", [5] = "rorDRr" + } +} + +--- 64I +local map_opimm32 = { + shift = 12, mask = 7, + [0] = "addiw|sext.wDRI0", "slliwDRi", + [2] = { -- Zba + shift = 25, mask = 127, + [1] = "slli.uwDRi" + }, + [5] = { -- 64I + shift = 25, mask = 127, + [0] = "srliwDRi", [32] = "sraiwDRi", [48] = "roriwDRi" + }, + [48] = { -- Zbb + shift = 25, mask = 127, + [5] = "roriwDRi" + } +} + +local map_op32 = { + shift = 25, mask = 127, + [0] = { -- 64I + shift = 12, mask = 7, + [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr" + }, + [1] = map_mext64, + [4] = { -- Zba & Zbb + shift = 12, mask = 7, + [0] = "add.uw|zext.w|DRr0", [4] = "zext.hDRr" + }, + [16] = { -- Zba + shift = 12, mask = 7, + [2] = "sh1add.uw", [4] = "sh2add.uw", [6] = "sh3add.uw" + }, + [32] = { -- 64I + shift = 12, mask = 7, + [0] = "subw|negwDR0r", [5] = "srawDRr" + }, + [48] = { -- Zbb + shift = 12, mask = 7, + [1] = "rolwDRr", [5] = "rorwDRr" + } +} + +local map_ecabre = { + shift = 12, mask = 7, + [0] = { + shift = 20, mask = 4095, + [0] = "ecall", "ebreak" + } +} + +local map_fence = { + shift = 12, mask = 1, + [0] = "fence", --"fence.i" ZIFENCEI EXTENSION +} + +local map_jalr = { + shift = 7, mask = 0x1ffffff, + _ = "jalr|jrDRI7", [256] = "ret" +} + +local map_xthead_custom0 = { + shift = 12, mask = 7, + [1] = { -- Arithmetic + shift = 27, mask = 31, + [0] = "th.addslDRrv", + [2] = { + shift = 26, mask = 63, + [4] = "th.srriDRi", + [5] = { + shift = 25, mask = 127, + [10] = "th.srriwDRi" + } + }, + [4] = { -- XTheadMac + shift = 25, mask = 3, + [0] = "th.mulaDRr", "th.mulsDRr", "th.mulawDRr", "th.mulswDRr" + }, + [5] = { -- XTheadMac + shift = 25, mask = 3, + [0] = "th.mulahDRr", "th.mulshDRr" + }, + [8] = { -- XTheadCondMov + shift = 25, mask = 3, + [0] = "th.mveqzDRr", "th.mvnezDRr" + }, + [16] = { -- XTheadBb + shift = 20, mask = 31, + [0] = { + shift = 25, mask = 3, + [0] = "th.tstnbzDRi", "th.revDR", "th.ff0DR", "th.ff1DR" + } + }, + [17] = { -- XTheadBb + shift = 26, mask = 1, + [0] = "th.tstDRi" + }, + [18] = { -- XTheadBb + shift = 20, mask = 31, + [0] = { + shift = 25, mask = 3, + [0] = "th.revwDR" + } + } + }, + [2] = "th.extDRji", [3] = "th.extuDRji", + { -- MemLoad + shift = 29, mask = 7, + [7] = { -- XTheadMemPair + shift = 25, mask = 3, + [0] = "th.lwdDrP", [2] = "th.lwudDrP", "th.lddDrP" + } + }, + { -- MemStore + shift = 29, mask = 7, + [7] = { -- XTheadMemPair + shift = 25, mask = 3, + [0] = "th.swdDrP", [3] = "th.sddDrP" + } + } +} + +local map_custom0 = xthead and map_xthead_custom0 or nil + +local map_pri = { + [3] = map_load, [7] = map_fload, [11] = map_custom0, [15] = map_fence, [19] = map_opimm, + [23] = "auipcDA", [27] = map_opimm32, + [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_op, + [55] = "luiDU", [59] = map_op32, [67] = map_fmadd, [71] = map_fmsub, + [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext, + [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre +} + +------------------------------------------------------------------------------ + +local map_gpr = { + [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31", +} + +local map_fgpr = { + [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", +} + +local map_rm = { + [0] = "rne", "rtz", "rdn", "rup", "rmm", [7] = "dyn" +} + +local map_fli = { + [0] = "-1.0", + "min", + "0x1p-16", "0x1p-15", "0x1p-8", "0x1p-7", + "0.0625", "0.125", + "0.25", "0.3125", "0.375", "0.4375", + "0.5", "0.625", "0.75", "0.875", + "1.0", "1.25", "1.5", "1.75", + "2.0", "2.5", "3.0", + "4.0", "8.0", "16.0", "128.0", "256.0", + "32768.0", "65536.0", "inf", "nan" +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local pos = ctx.pos + local extra = "" + if ctx.rel then + local sym = ctx.symtab[ctx.rel] + if sym then extra = "\t->"..sym end + end + if ctx.hexdump > 0 then + ctx.out:write((format("%08x %s %-7s %s%s\n", + ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra))) + else + ctx.out(format("%08x %-7s %s%s\n", + ctx.addr+pos, text, concat(operands, ", "), extra)) + end + local pos = ctx.pos + local first_byte = byte(ctx.code, ctx.pos+1) + --Examine if the next instruction is 16-bits or 32-bits + if(band(first_byte, 3) < 3) then + ctx.pos = pos + 2 + else + ctx.pos = pos + 4 + end +end + +-- Fallback for unknown opcodes. +local function unknown(ctx) + return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) +end + +local function get_le(ctx) + local pos = ctx.pos + --Examine if the next instruction is 16-bits or 32-bits + local first_byte = byte(ctx.code, pos+1) + if(band(first_byte, 3) < 3) then --checking first two bits of opcode + local b0, b1 = byte(ctx.code, pos+1, pos+2) + return bor(lshift(b1, 8), b0) + else + local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) + return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) + end +end + +local function parse_W(opcode) + local part1 = band(rshift(opcode, 7), 15) --9:6 + local part2 = band(rshift(opcode, 11), 3) --5:4 + local part3 = band(rshift(opcode, 5), 1)--3 + local part4 = band(rshift(opcode, 6), 1)--2 + return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4), + lshift(part3, 3), lshift(part4, 2)) +end + +local function parse_x(opcode) + local part1 = band(rshift(opcode, 12), 1) --5 + local part2 = band(rshift(opcode, 2), 31) --4:0 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +local function parse_X(opcode) + local part1 = band(rshift(opcode, 12), 1) --12 + local part2 = band(rshift(opcode, 3), 3) --8:7 + local part3 = band(rshift(opcode, 5), 1) --6 + local part4 = band(rshift(opcode, 2), 1) --5 + local part5 = band(rshift(opcode, 6), 1) --4 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7), + lshift(part3, 6), lshift(part4, 5), lshift(part5, 4)) + else + return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6), + lshift(part4, 5), lshift(part5, 4)) + end +end + +local function parse_S(opcode) + local part1 = band(rshift(opcode, 25), 127) --11:5 + local sign = band(rshift(part1, 6), 1) + local part2 = band(rshift(opcode, 7), 31) --4:0 + if (sign == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +local function parse_B(opcode) + local part1 = band(rshift(opcode, 7), 1) --11 + local part2 = band(rshift(opcode, 25), 63) --10:5 + local part3 = band(rshift(opcode, 8), 15) -- 4 : 1 + if (part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), + lshift(part2, 5), lshift(part3, 1), 0) + else + return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5), + lshift(part3, 1), 0) + end +end + +local function parse_q(opcode) + local part1 = band(rshift(opcode, 12), 1) --8 + local part2 = band(rshift(opcode, 5), 3) --7:6 + local part3 = band(rshift(opcode, 2), 1) --5 + local part4 = band(rshift(opcode, 10), 3) --4:3 + local part5 = band(rshift(opcode, 3), 3) --2:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6), + lshift(part3, 5), lshift(part4, 3), lshift(part5, 1)) + else + return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5), + lshift(part4, 3), lshift(part5, 1)) + end +end + +local function parse_J(opcode) + local part1 = band(rshift(opcode, 31), 1) --20 + local part2 = band(rshift(opcode, 12), 255) -- 19:12 + local part3 = band(rshift(opcode, 20), 1) --11 + local part4 = band(rshift(opcode, 21), 1023) --10:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12), + lshift(part3, 11), lshift(part4, 1)) + else + return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12), + lshift(part3, 11), lshift(part4, 1)) + end +end + +local function parse_T(opcode) + local part1 = band(rshift(opcode, 12), 1) --11 + local part2 = band(rshift(opcode, 8), 1) --10 + local part3 = band(rshift(opcode, 9), 3)--9:8 + local part4 = band(rshift(opcode, 6), 1) --7 + local part5 = band(rshift(opcode, 7), 1) -- 6 + local part6 = band(rshift(opcode, 2), 1) --5 + local part7 = band(rshift(opcode, 11), 1) --4 + local part8 = band(rshift(opcode, 3), 7) --3:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), + lshift(part2, 10), lshift(part3, 8), lshift(part4, 7), + lshift(part5, 6), lshift(part6, 5), lshift(part7, 4), + lshift(part8, 1)) + else + return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10), + lshift(part3, 8), lshift(part4, 7), lshift(part5, 6), + lshift(part6, 5), lshift(part7, 4), lshift(part8, 1)) + end +end + +local function parse_K(opcode) + local part1 = band(rshift(opcode, 12), 1) --5 17 + local part2 = band(rshift(opcode, 2), 31) --4:0 16:12 + if(part1 == 1) then + return bor(lshift(0, 31), lshift(0x7fff, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +-- Disassemble a single instruction. +local function disass_ins(ctx) + local op = ctx:get() + local operands = {} + local last = nil + ctx.op = op + ctx.rel =nil + + local opat = 0 + --for compressed instructions + if(band(op, 3) < 3) then + opat = ctx.map_compr[band(op, 3)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + local test = band(rshift(op, opat.shift), opat.mask) + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + else + opat = ctx.map_pri[band(op,127)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + end + local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") + local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") + local a1, a2 = 0 + if altname then + pat = pat2 + end + + local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0 + + for p in gmatch(pat, ".") do + local x = nil + if p == "D" then + x = map_gpr[band(rshift(op, 7), 31)] + elseif p == "F" then + x = map_fgpr[band(rshift(op, 7), 31)] + elseif p == "R" then + x = map_gpr[band(rshift(op, 15), 31)] + elseif p == "G" then + x = map_fgpr[band(rshift(op, 15), 31)] + elseif p == "r" then + x = map_gpr[band(rshift(op, 20), 31)] + if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then + local temp = last --because of the diffrent order of the characters + operands[#operands] = x + x = temp + end + elseif p == "g" then + x = map_fgpr[band(rshift(op, 20), 31)] + if(name == "fsw" or name == "fsd") then + local temp = last + operands[#operands] = x + x = temp + end + elseif p == "Z" then + x = map_gpr[8 + band(rshift(op, 2), 7)] + elseif p == "N" then + x = map_fgpr[8 + band(rshift(op, 2), 7)] + elseif p == "M" then + x = map_gpr[8 + band(rshift(op, 7), 7)] + elseif p == "E" then + x = map_gpr[band(rshift(op, 2), 31)] + elseif p == "W" then + local uimm = parse_W(op) + x = format("%s,%d", "sp", uimm) + elseif p == "x" then + x = parse_x(op) + elseif p == "h" then + local part1 = band(rshift(op, 5), 3) --7:6 + local part2 = band(rshift(op, 10), 7) --5:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3)) + operands[#operands] = format("%d(%s)", uimm, last) + elseif p == "X" then + local imm = parse_X(op) + x = format("%s,%d", "sp", imm) + elseif p == "O" then + x = format("(%s)", map_gpr[band(rshift(op, 15), 31)]) + elseif p == "H" then + x = map_fgpr[band(rshift(op, 27), 31)] + elseif p == "L" then + local register = map_gpr[band(rshift(op, 15), 31)] + local disp = arshift(op, 20) + x = format("%d(%s)", disp, register) + elseif p == "P" then -- XTheadMemPair + local register = map_gpr[band(rshift(op, 15), 31)] + local disp = band(arshift(op, 25), 3) + local isword = bxor(band(arshift(op, 26), 1), 1) + x = format("(%s), %d, %d", register, disp, isword and 3 or 4) + elseif p == "I" then + x = arshift(op, 20) + --different for jalr + if(name == "jalr") then + local reg = map_gpr[band(rshift(op, 15), 31)] + if(ctx.reltab[reg] == nil) then + operands[#operands] = format("%d(%s)", x, last) + else + local target = ctx.reltab[reg] + x + operands[#operands] = format("%d(%s) #0x%08x", x, last, target) + ctx.rel = target + ctx.reltab[reg] = nil --assume no reuses of the register + end + x = nil --not to add additional operand + end + elseif p == "i" then + --both for RV32I AND RV64I + local value = band(arshift(op, 20), 63) + x = string.format("%d", value) + elseif p == "j" then -- XThead imm1[31..26] + local value = band(rshift(op, 26), 63) + x = string.format("%d", value) + elseif p == "v" then --XThead imm[2][26..25] + local value = band(rshift(op, 25), 3) + x = string.format("%d", value) + elseif p == "S" then + local register = map_gpr[band(rshift(op, 15), 31)] --register + local imm = parse_S(op) + x = format("%d(%s)", imm, register) + elseif p == "n" then + local part1 = band(rshift(op, 5), 1) --6 + local part2 = band(rshift(op, 10), 7) --5:3 + local part3 = band(rshift(op, 6), 1) --2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3), + lshift(part3, 2)) + operands[#operands] = format("%d(%s)", uimm, last) + elseif p == "A" then + local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)] + ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12) + x = format("0x%x", value) + elseif p == "B" then + x = ctx.addr + ctx.pos + parse_B(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "U" then + local value = band(rshift(op, 12), 0xfffff) + x = string.format("0x%x", value) + elseif p == "Q" then + local part1 = band(rshift(op, 2), 7) --8:6 + local part2 = band(rshift(op, 12), 1) --5 + local part3 = band(rshift(op, 5), 3) --4:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 3)) + x = format("%d(%s)", uimm, "sp") + elseif p == "q" then + x = ctx.addr + ctx.pos + parse_q(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "J" then + x = ctx.addr + ctx.pos + parse_J(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "K" then + local value = parse_K(op) + x = string.format("0x%x", value) + elseif p == "Y" then + local part1 = band(rshift(op, 2), 3) --7:6 + local part2 = band(rshift(op, 12), 1) --5 + local part3 = band(rshift(op, 4), 7) --4:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 2)) + x = format("%d(%s)", uimm, "sp") + elseif p == "o" then -- rounding mode + x = map_rm[band(rshift(op, 12), 7)] + elseif p == "y" then -- fli lut + x = map_fli[band(rshift(op, 15), 31)] + elseif p == "1" then + local part1 = band(rshift(op, 12), 1) --5 + local part2 = band(rshift(op, 2), 31) --4:0 + local uimm = bor(lshift(0, 31), lshift(part1, 5), part2) + x = string.format("0x%x", uimm) + elseif p == "T" then + x = ctx.addr + ctx.pos + parse_T(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "t" then + local part1 = band(rshift(op, 7), 7) --8:6 + local part2 = band(rshift(op, 10), 7) --5:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3)) + x = format("%d(%s)", uimm, "sp") + elseif p == "u" then + local part1 = band(rshift(op, 7), 3) --7:6 + local part2 = band(rshift(op, 9), 15) --5:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2)) + x = format("%d(%s)", uimm, "sp") + elseif p == "V" then + x = map_fgpr[band(rshift(op, 2), 31)] + elseif p == "0" then --PSEUDOINSTRUCTIONS + if (last == "zero" or last == 0) then + local n = #operands + operands[n] = nil + last = operands[n-1] + local a1, a2 = match(altname, "([^|]*)|(.*)") + if a1 then name, altname = a1, a2 + else name = altname end + alias_done = true + end + elseif (p == "4") then + if(last == -1) then + name = altname + operands[#operands] = nil + end + elseif (p == "5") then + if(last == 1) then + name = altname + operands[#operands] = nil + end + elseif (p == "6") then + if(last == operands[#operands - 1]) then + name = altname + operands[#operands] = nil + end + elseif (p == "7") then --jalr rs + local value = string.sub(operands[#operands], 1, 1) + local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1) + if(value == "0" and + (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then + if(operands[#operands - 1] == "zero") then + name = altname + end + operands[#operands] = nil + operands[#operands] = reg + end + elseif (p == "2" and alias_done == false) then + if (last == "zero" or last == 0) then + local a1, a2 = match(altname, "([^|]*)|(.*)") + name = a2 + operands[#operands] = nil + end + end + if x then operands[#operands+1] = x; last = x end + end + return putop(ctx, name, operands) +end + +------------------------------------------------------------------------------ + +-- Disassemble a block of code. +local function disass_block(ctx, ofs, len) + if not ofs then + ofs = 0 + end + local stop = len and ofs+len or #ctx.code + --instructions can be both 32 and 16 bits + stop = stop - stop % 2 + ctx.pos = ofs - ofs % 2 + ctx.rel = nil + while ctx.pos < stop do disass_ins(ctx) end +end + +-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). +local function create(code, addr, out) + local ctx = {} + ctx.code = code + ctx.addr = addr or 0 + ctx.out = out or io.write + ctx.symtab = {} + ctx.disass = disass_block + ctx.hexdump = 8 + ctx.get = get_le + ctx.map_pri = map_pri + ctx.map_compr = map_compr + ctx.reltab = {} + return ctx +end + +-- Simple API: disassemble code (a string) at address and output via out. +local function disass(code, addr, out) + create(code, addr, out):disass(addr) +end + +-- Return register name for RID. +local function regname(r) + if r < 32 then return map_gpr[r] end + return "f"..(r-32) +end + +-- Public module functions. +return { + create = create, + disass = disass, + regname = regname +} diff --git a/src/jit/dis_riscv64.lua b/src/jit/dis_riscv64.lua new file mode 100644 index 000000000..fd6ce2768 --- /dev/null +++ b/src/jit/dis_riscv64.lua @@ -0,0 +1,16 @@ +---------------------------------------------------------------------------- +-- LuaJIT RISC-V 64 disassembler wrapper module. +-- +-- Copyright (C) 2005-2025 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the default riscv little-endian functions from the +-- RISC-V disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv") +return { + create = dis_riscv.create, + disass = dis_riscv.disass, + regname = dis_riscv.regname +} \ No newline at end of file From 52af6d8ba541f349a4e26a07ed35c24034551381 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 6 Mar 2024 09:50:08 +0800 Subject: [PATCH 20/22] riscv(misc): add support in Makefile --- Makefile | 1 + src/Makefile | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/Makefile b/Makefile index 6b67f54d2..ccd3468a3 100644 --- a/Makefile +++ b/Makefile @@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ dis_mips64.lua dis_mips64el.lua \ dis_mips64r6.lua dis_mips64r6el.lua \ + dis_riscv.lua dis_riscv64.lua \ vmdef.lua ifeq (,$(findstring Windows,$(OS))) diff --git a/src/Makefile b/src/Makefile index 44a739530..6471afeac 100644 --- a/src/Makefile +++ b/src/Makefile @@ -52,6 +52,7 @@ CCOPT_arm= CCOPT_arm64= CCOPT_ppc= CCOPT_mips= +CCOPT_riscv64= # #CCDEBUG= # Uncomment the next line to generate debug information: @@ -269,6 +270,9 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) else TARGET_LJARCH= mips endif +else +ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= riscv64 else $(error Unsupported target architecture) endif @@ -278,6 +282,7 @@ endif endif endif endif +endif ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) TARGET_SYS= PS3 @@ -484,6 +489,9 @@ ifeq (ppc,$(TARGET_LJARCH)) DASM_AFLAGS+= -D ELFV2 endif endif +ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D RISCV64 +endif endif endif From 9615a2204ab9f28c926ccdfc07e7104a849f7db9 Mon Sep 17 00:00:00 2001 From: gns Date: Wed, 21 Aug 2024 16:39:26 +0800 Subject: [PATCH 21/22] riscv(support,linux): use HWPROBE for ISE detection Current SIGILL handler appears to have weird issues with libluajit on some platform. Considering 6.6 kernel is becoming more common, switch to HWPROBE for better compatibility. --- src/lib_jit.c | 79 +++++++++++++++++++++++++-------------------------- src/lj_jit.h | 29 +++++++++++++++++++ 2 files changed, 67 insertions(+), 41 deletions(-) diff --git a/src/lib_jit.c b/src/lib_jit.c index ce972cd01..1f5c81d1d 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -698,23 +698,26 @@ JIT_PARAMDEF(JIT_PARAMINIT) #endif #if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX -#include -#include -static sigjmp_buf sigbuf = {0}; -static void detect_sigill(int sig) -{ - siglongjmp(sigbuf, 1); -} + +#if LJ_TARGET_LINUX +#include + +struct riscv_hwprobe hwprobe_requests[] = { + {RISCV_HWPROBE_KEY_IMA_EXT_0} +}; + +const uint64_t *hwprobe_ext = &hwprobe_requests[0].value; + +int hwprobe_ret = 0; +#endif static int riscv_compressed() { #if defined(__riscv_c) || defined(__riscv_compressed) /* Don't bother checking for RVC -- would crash before getting here. */ return 1; -#elif defined(__GNUC__) - /* c.nop; c.nop; */ - __asm__(".4byte 0x00010001"); - return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_IMA_C)) ? 1 : 0; #else return 0; #endif @@ -725,11 +728,8 @@ static int riscv_zba() #if defined(__riscv_b) || defined(__riscv_zba) /* Don't bother checking for Zba -- would crash before getting here. */ return 1; -#elif defined(__GNUC__) - /* Don't bother verifying the result, just check if the instruction exists. */ - /* add.uw zero, zero, zero */ - __asm__(".4byte 0x0800003b"); - return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBA)) ? 1 : 0; #else return 0; #endif @@ -740,11 +740,8 @@ static int riscv_zbb() #if defined(__riscv_b) || defined(__riscv_zbb) /* Don't bother checking for Zbb -- would crash before getting here. */ return 1; -#elif defined(__GNUC__) - register int t asm ("a0"); - /* addi a0, zero, 255; sext.b a0, a0; */ - __asm__("addi a0, zero, 255\n\t.4byte 0x60451513"); - return t < 0; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBB)) ? 1 : 0; #else return 0; #endif @@ -755,10 +752,8 @@ static int riscv_zicond() #if defined(__riscv_zicond) /* Don't bother checking for Zicond -- would crash before getting here. */ return 1; -#elif defined(__GNUC__) - /* czero.eqz zero, zero, zero; */ - __asm__(".4byte 0x0e005033"); - return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZICOND)) ? 1 : 0; #else return 0; #endif @@ -769,6 +764,8 @@ static int riscv_zfa() #if defined(__riscv_zfa) /* Don't bother checking for Zfa -- would crash before getting here. */ return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZFA)) ? 1 : 0; #else return 0; #endif @@ -782,23 +779,19 @@ static int riscv_xthead() && defined(__riscv_xtheadmac)) /* Don't bother checking for XThead -- would crash before getting here. */ return 1; -#elif defined(__GNUC__) - register int t asm ("a0"); - /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */ - /* Therefore assume XThead* are present if XTheadBb is present. */ - /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */ - __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b"); - return t == -1; /* In case of collision with other vendor extensions. */ #else - return 0; +/* +** Hardcoded as there's no easy way of detection: +** - SIGILL have some trouble with libluajit as we speak +** - Checking mvendorid looks good, but might not be reliable. +*/ + return 0; #endif } static uint32_t riscv_probe(int (*func)(void), uint32_t flag) { - if (sigsetjmp(sigbuf, 1) == 0) { - return func() ? flag : 0; - } else return 0; + return func() ? flag : 0; } #endif @@ -877,17 +870,21 @@ static uint32_t jit_cpudetect(void) #elif LJ_TARGET_RISCV64 #if LJ_HASJIT - /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */ - struct sigaction old = {0}, act = {0}; - act.sa_handler = detect_sigill; - sigaction(SIGILL, &act, &old); + +#if LJ_TARGET_LINUX + /* HWPROBE-based detection of RVC, Zba, Zbb and Zicond. */ + hwprobe_ret = syscall(__NR_riscv_hwprobe, &hwprobe_requests, + sizeof(hwprobe_requests) / sizeof(struct riscv_hwprobe), 0, + NULL, 0); + flags |= riscv_probe(riscv_compressed, JIT_F_RVC); flags |= riscv_probe(riscv_zba, JIT_F_RVZba); flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond); flags |= riscv_probe(riscv_zfa, JIT_F_RVZfa); flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); - sigaction(SIGILL, &old, NULL); + +#endif /* Detect V/P? */ /* V have no hardware available, P not ratified yet. */ diff --git a/src/lj_jit.h b/src/lj_jit.h index a0296f2e7..bf1a625c3 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -79,6 +79,35 @@ #define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\003Zfa\006XThead" +#if LJ_TARGET_LINUX +#include + +#ifndef __NR_riscv_hwprobe +#ifndef __NR_arch_specific_syscall +#define __NR_arch_specific_syscall 244 +#endif +#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14) +#endif + +struct riscv_hwprobe { + int64_t key; + uint64_t value; +}; + +#define RISCV_HWPROBE_KEY_MVENDORID 0 +#define RISCV_HWPROBE_KEY_MARCHID 1 +#define RISCV_HWPROBE_KEY_MIMPID 2 +#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3 +#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 + +#define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_EXT_ZBA (1 << 3) +#define RISCV_HWPROBE_EXT_ZBB (1 << 4) +#define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) +#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35) + +#endif + #else #define JIT_F_CPUSTRING "" From ea214b29758c1cb4da90a15f07e67f7cf99721fd Mon Sep 17 00:00:00 2001 From: gns Date: Thu, 16 Jan 2025 01:02:19 +0800 Subject: [PATCH 22/22] riscv(interp): strip excessive extended branch (^B+J) --- src/vm_riscv64.dasc | 104 ++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index 09ac0cf05..67f8f2c0a 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -552,7 +552,7 @@ static void build_subroutines(BuildCtx *ctx) | | // Return from pcall or xpcall fast func. | mov_true TMP1 - | bxeqz TMP0, ->cont_dispatch + | beqz TMP0, ->cont_dispatch | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. | mv BASE, TMP2 // Restore caller base. | // Prepending may overwrite the pcall frame, so do it at the end. @@ -563,9 +563,9 @@ static void build_subroutines(BuildCtx *ctx) | addiw RD, RD, 8 // RD = (nresults+1)*8. | andi TMP0, PC, FRAME_TYPE | li CRET1, LUA_YIELD - | bxeqz RD, ->vm_unwind_c_eh + | beqz RD, ->vm_unwind_c_eh | mv MULTRES, RD - | bxeqz TMP0, ->BC_RET_Z // Handle regular return to Lua. + | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. | |->vm_return: | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return @@ -573,7 +573,7 @@ static void build_subroutines(BuildCtx *ctx) | andi TMP2, PC, ~FRAME_TYPEP | xori TMP0, TMP0, FRAME_C | sub TMP2, BASE, TMP2 // TMP2 = previous base. - | bxnez TMP0, ->vm_returnp + | bnez TMP0, ->vm_returnp | | addiw TMP1, RD, -8 | sd TMP2, L->base @@ -742,7 +742,7 @@ static void build_subroutines(BuildCtx *ctx) | andi TMP0, PC, FRAME_TYPE | li TISNIL, LJ_TNIL | li TISNUM, LJ_TISNUM - | bxeqz TMP0, ->BC_RET_Z + | beqz TMP0, ->BC_RET_Z | j ->vm_return | |->vm_pcall: // Setup protected C frame and enter VM. @@ -1002,7 +1002,7 @@ static void build_subroutines(BuildCtx *ctx) | // Returns 0/1 or TValue * (metamethod). |3: | sltiu TMP1, CRET1, 2 - | bxeqz TMP1, ->vmeta_binop + | beqz TMP1, ->vmeta_binop | negw TMP2, CRET1 |4: | lhu RD, OFS_RD(PC) @@ -1090,7 +1090,7 @@ static void build_subroutines(BuildCtx *ctx) | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | call_intern vmeta_arith, lj_meta_arith | // Returns NULL (finished) or TValue * (metamethod). - | bxeqz CRET1, ->cont_nop + | beqz CRET1, ->cont_nop | | // Call metamethod for binary op. |->vmeta_binop: @@ -1114,7 +1114,7 @@ static void build_subroutines(BuildCtx *ctx) | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o) | // Returns NULL (retry) or TValue * (metamethod base). #if LJ_52 - | bxnez CRET1, ->vmeta_binop // Binop call for compatibility. + | bnez CRET1, ->vmeta_binop // Binop call for compatibility. | mv CARG1, MULTRES | j ->BC_LEN_Z #else @@ -1200,7 +1200,7 @@ static void build_subroutines(BuildCtx *ctx) |->ff_ .. name: | ld CARG1, 0(BASE) | fld FARG1, 0(BASE) - | bxeqz NARGS8:RC, ->fff_fallback + | beqz NARGS8:RC, ->fff_fallback | checknum CARG1, ->fff_fallback |.endmacro | @@ -1209,7 +1209,7 @@ static void build_subroutines(BuildCtx *ctx) | ld CARG1, 0(BASE) | sltiu TMP0, NARGS8:RC, 16 | ld CARG2, 8(BASE) - | bxnez TMP0, ->fff_fallback + | bnez TMP0, ->fff_fallback | gettp TMP1, CARG1 | gettp TMP2, CARG2 | sltiu TMP1, TMP1, LJ_TISNUM @@ -1217,7 +1217,7 @@ static void build_subroutines(BuildCtx *ctx) | fld FARG1, 0(BASE) | and TMP1, TMP1, TMP2 | fld FARG2, 8(BASE) - | bxeqz TMP1, ->fff_fallback + | beqz TMP1, ->fff_fallback |.endmacro | |// Inlined GC threshold check. @@ -1277,7 +1277,7 @@ static void build_subroutines(BuildCtx *ctx) |2: | ld STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] | li CARG1, LJ_TNIL - | bxeqz TAB:RB, ->fff_restv + | beqz TAB:RB, ->fff_restv | lw TMP0, TAB:RB->hmask | lw TMP1, STR:RC->sid | ld NODE:TMP2, TAB:RB->node @@ -1299,7 +1299,7 @@ static void build_subroutines(BuildCtx *ctx) | settp CARG1, RB, TMP3 | j ->fff_restv // Not found, keep default result. |5: - | bxne CARG1, TISNIL, ->fff_restv + | bne CARG1, TISNIL, ->fff_restv | j <4 // Ditto for nil value. | |6: @@ -1325,7 +1325,7 @@ static void build_subroutines(BuildCtx *ctx) | bxnez TMP3, ->fff_fallback | andi TMP3, TMP2, LJ_GC_BLACK // isblack(table) | sd TAB:CARG2, TAB:TMP1->metatable - | bxeqz TMP3, ->fff_restv + | beqz TMP3, ->fff_restv | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv | |.ffunc rawget @@ -1360,7 +1360,7 @@ static void build_subroutines(BuildCtx *ctx) | gettp TMP0, CARG1 | addi TMP1, TMP0, -LJ_TSTR | // A __tostring method in the string base metatable is ignored. - | bxeqz TMP1, ->fff_restv // String key? + | beqz TMP1, ->fff_restv // String key? | // Handle numbers inline, unless a number base metatable is present. | ld TMP1, GL->gcroot[GCROOT_BASEMT_NUM] | sltu TMP0, TISNUM, TMP0 @@ -1390,10 +1390,10 @@ static void build_subroutines(BuildCtx *ctx) | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o) | // Returns 1=found, 0=end, -1=error. | li RD, (2+1)*8 - | bxgtz CRET1, ->fff_res // Found key/value. + | bgtz CRET1, ->fff_res // Found key/value. | mv TMP1, CRET1 | mv CARG1, TISNIL - | bxeqz TMP1, ->fff_restv // End of traversal: return nil. + | beqz TMP1, ->fff_restv // End of traversal: return nil. | ld CFUNC:RB, FRAME_FUNC(BASE) | li RC, 2*8 | cleartp CFUNC:RB @@ -1433,19 +1433,19 @@ static void build_subroutines(BuildCtx *ctx) | ld TMP1, 0(TMP3) |1: | li RD, (0+1)*8 - | bxeq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. + | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. | sd TMP1, -8(BASE) | li RD, (2+1)*8 | j ->fff_res |2: // Check for empty hash part first. Otherwise call C function. | lw TMP0, TAB:CARG1->hmask | li RD, (0+1)*8 - | bxeqz TMP0, ->fff_res + | beqz TMP0, ->fff_res | mv CARG2, TMP2 | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key) | // Returns cTValue * or NULL. | li RD, (0+1)*8 - | bxeqz CRET1, ->fff_res + | beqz CRET1, ->fff_res | ld TMP1, 0(CRET1) | j <1 | @@ -1482,7 +1482,7 @@ static void build_subroutines(BuildCtx *ctx) | srliw TMP3, TMP3, HOOK_ACTIVE_SHIFT | andi TMP3, TMP3, 1 | addi PC, TMP3, 16+FRAME_PCALL - | bxeqz NARGS8:RC, ->vm_call_dispatch + | beqz NARGS8:RC, ->vm_call_dispatch |1: | add TMP0, BASE, NARGS8:RC |2: @@ -1539,17 +1539,17 @@ static void build_subroutines(BuildCtx *ctx) | xor CARG2, CARG2, TMP3 // CARG2 = TMP4 ? CARG2 : TMP3 | and CARG2, CARG2, TMP4 | xor CARG2, TMP3, CARG2 - | bxgtz CARG4, ->fff_fallback // st > LUA_YIELD? + | bgtz CARG4, ->fff_fallback // st > LUA_YIELD? | xor TMP2, TMP2, CARG3 | or CARG4, TMP2, TMP0 - | bxnez TMP1, ->fff_fallback // cframe != 0? + | bnez TMP1, ->fff_fallback // cframe != 0? | ld TMP0, L:CARG1->maxstack | ld PC, FRAME_PC(BASE) - | bxeqz CARG4, ->fff_fallback // base == top && st == 0? + | beqz CARG4, ->fff_fallback // base == top && st == 0? | add TMP2, CARG2, NARGS8:RC | sd BASE, L->base | sd PC, SAVE_PC(sp) - | bxltu TMP0, TMP2, ->fff_fallback // Stack overflow? + | bltu TMP0, TMP2, ->fff_fallback // Stack overflow? |1: |.if resume | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. @@ -1654,7 +1654,7 @@ static void build_subroutines(BuildCtx *ctx) | sd BASE, L->base | andi TMP0, TMP0, CFRAME_RESUME | sd TMP1, L->top - | bxeqz TMP0, ->fff_fallback + | beqz TMP0, ->fff_fallback | sd x0, L->cframe | sb CRET1, L->status | j ->vm_leave_unw @@ -1665,14 +1665,14 @@ static void build_subroutines(BuildCtx *ctx) |->ff_math_ .. func: | ld CARG1, 0(BASE) | gettp TMP0, CARG1 - | bxeqz NARGS8:RC, ->fff_fallback + | beqz NARGS8:RC, ->fff_fallback | fmv.d.x FARG1, CARG1 - | bxeq TMP0, TISNUM, ->fff_restv + | beq TMP0, TISNUM, ->fff_restv | srli TMP1, CARG1, 52 // Extract exponent (and sign). - | bxgeu TMP0, TISNUM, ->fff_fallback + | bgeu TMP0, TISNUM, ->fff_fallback | andi TMP1, TMP1, 0x7ff // Extract exponent. | slti TMP2, TMP1, 1023 + 52 + 1 // 1023: Bias, 52: Max fraction - | bxeqz TMP2, ->fff_resn // Less than 2^52 / Not NaN? + | beqz TMP2, ->fff_resn // Less than 2^52 / Not NaN? | fcvt.l.d TMP3, FARG1, rm | fcvt.d.l FTMP1, TMP3 | fsgnj.d FRET1, FTMP1, FARG1 @@ -1692,7 +1692,7 @@ static void build_subroutines(BuildCtx *ctx) | sub CARG1, TMP1, TMP0 | slli TMP3, CARG1, 32 | settp CARG1, TISNUM - | bxgez TMP3, ->fff_restv + | bgez TMP3, ->fff_restv | lui CARG1, 0x41e00 // 2^31 as a double. | slli CARG1, CARG1, 32 | j ->fff_restv @@ -1700,7 +1700,7 @@ static void build_subroutines(BuildCtx *ctx) | sltiu TMP2, CARG2, LJ_TISNUM | slli CARG1, CARG1, 1 | srli CARG1, CARG1, 1 - | bxeqz TMP2, ->fff_fallback // int + | beqz TMP2, ->fff_fallback // int |// fallthrough | |->fff_restv: @@ -1756,7 +1756,7 @@ static void build_subroutines(BuildCtx *ctx) | li TMP1, 8 | ld CARG1, 0(BASE) | fld FARG1, 0(BASE) - | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. + | bne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. | checknum CARG1, ->fff_fallback | call_extern ff_math_log, log | j ->fff_resn @@ -1810,7 +1810,7 @@ static void build_subroutines(BuildCtx *ctx) | checkint CARG1, >4 |1: // Handle integers. | ld CARG2, 0(RA) - | bxeq RA, RB, ->fff_restv + | beq RA, RB, ->fff_restv | sext.w CARG1, CARG1 | checkint CARG2, >3 | sext.w CARG2, CARG2 @@ -1839,7 +1839,7 @@ static void build_subroutines(BuildCtx *ctx) |5: // Handle numbers. | ld CARG2, 0(RA) | fld FARG2, 0(RA) - | bxgeu RA, RB, ->fff_resn + | bgeu RA, RB, ->fff_resn | checknum CARG2, >7 |6: |.if ismax @@ -1870,7 +1870,7 @@ static void build_subroutines(BuildCtx *ctx) | addi TMP0, TMP0, -LJ_TSTR | or TMP1, TMP1, TMP0 | cleartp STR:CARG1 - | bxnez TMP1, ->fff_fallback // Need exactly 1 string argument. + | bnez TMP1, ->fff_fallback // Need exactly 1 string argument. | lw TMP0, STR:CARG1->len | ld PC, FRAME_PC(BASE) | snez RD, TMP0 @@ -1893,7 +1893,7 @@ static void build_subroutines(BuildCtx *ctx) | sltu TMP2, TMP2, CARG1 // !(255 < n). | or TMP1, TMP1, TMP2 | li CARG3, 1 - | bxnez TMP1, ->fff_fallback + | bnez TMP1, ->fff_fallback | addi CARG2, sp, TMPD_OFS | sb CARG1, TMPD(sp) |->fff_newstr: @@ -1916,7 +1916,7 @@ static void build_subroutines(BuildCtx *ctx) | ld CARG3, 16(BASE) | addi TMP0, NARGS8:RC, -16 | gettp TMP1, CARG1 - | bxltz TMP0, ->fff_fallback + | bltz TMP0, ->fff_fallback | cleartp STR:CARG1, CARG1 | li CARG4, -1 | beqz TMP0, >1 @@ -1926,7 +1926,7 @@ static void build_subroutines(BuildCtx *ctx) | checkint CARG2, ->fff_fallback | addi TMP0, TMP1, -LJ_TSTR | sext.w CARG3, CARG2 - | bxnez TMP0, ->fff_fallback + | bnez TMP0, ->fff_fallback | lw CARG2, STR:CARG1->len | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end | addiw TMP0, CARG2, 1 @@ -1949,7 +1949,7 @@ static void build_subroutines(BuildCtx *ctx) | sub CARG3, CARG4, CARG3 // len = end - start | addi CARG2, CARG2, sizeof(GCstr)-1 | addiw CARG3, CARG3, 1 // len += 1 - | bxgez CARG3, ->fff_newstr + | bgez CARG3, ->fff_newstr |->fff_emptystr: // Return empty string. | li TMP1, LJ_TSTR | addi STR:CARG1, GL, offsetof(global_State, strempty) @@ -1960,7 +1960,7 @@ static void build_subroutines(BuildCtx *ctx) | .ffunc string_ .. name | ffgccheck | ld CARG2, 0(BASE) - | bxeqz NARGS8:RC, ->fff_fallback + | beqz NARGS8:RC, ->fff_fallback | checkstr STR:CARG2, ->fff_fallback | addi SBUF:CARG1, GL, offsetof(global_State, tmpbuf) | ld TMP0, SBUF:CARG1->b @@ -1982,7 +1982,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_tobit_fb: | fld FARG1, 0(BASE) - | bxeqz TMP1, ->fff_fallback + | beqz TMP1, ->fff_fallback | fadd.d FARG1, FARG1, TOBIT | fmv.x.w CRET1, FARG1 | zext.w CRET1, CRET1 @@ -2004,7 +2004,7 @@ static void build_subroutines(BuildCtx *ctx) | add TMP3, BASE, NARGS8:RC |1: | ld TMP1, 0(TMP2) - | bxeq TMP2, TMP3, ->fff_resi + | beq TMP2, TMP3, ->fff_resi | gettp TMP0, TMP1 | addi TMP2, TMP2, 8 | bne TMP0, TISNUM, >2 @@ -2015,7 +2015,7 @@ static void build_subroutines(BuildCtx *ctx) | fld FARG1, -8(TMP2) | sltiu TMP0, TMP0, LJ_TISNUM | fadd.d FARG1, FARG1, TOBIT - | bxeqz TMP0, ->fff_fallback + | beqz TMP0, ->fff_fallback | fmv.x.w TMP1, FARG1 | zext.w TMP1, TMP1 | bins CRET1, CRET1, TMP1 @@ -2063,7 +2063,7 @@ static void build_subroutines(BuildCtx *ctx) |1: | gettp TMP0, CARG2 | zext.w CARG2, CARG2 - | bxne TMP0, TISNUM, ->fff_fallback + | bne TMP0, TISNUM, ->fff_fallback | sext.w CARG1, CARG1 | shins CRET1, CARG1, CARG2 | zext.w CRET1, CRET1 @@ -2084,7 +2084,7 @@ static void build_subroutines(BuildCtx *ctx) |1: | gettp TMP0, CARG2 | zext.w CARG2, CARG2 - | bxne TMP0, TISNUM, ->fff_fallback + | bne TMP0, TISNUM, ->fff_fallback | sext.w CARG1, CARG1 | neg TMP2, CARG2 | rotinsa TMP1, CARG1, CARG2 @@ -2115,13 +2115,13 @@ static void build_subroutines(BuildCtx *ctx) | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | ld BASE, L->base | slliw RD, CRET1, 3 - | bxgtz CRET1, ->fff_res // Returned nresults+1? + | bgtz CRET1, ->fff_res // Returned nresults+1? |1: // Returned 0 or -1: retry fast path. | ld LFUNC:RB, FRAME_FUNC(BASE) | ld TMP0, L->top | sub NARGS8:RC, TMP0, BASE | cleartp LFUNC:RB - | bxnez CRET1, ->vm_call_tail // Returned -1? + | bnez CRET1, ->vm_call_tail // Returned -1? | ins_callt // Returned 0: retry fast path. | |// Reconstruct previous base for vmeta_call during tailcall. @@ -4296,7 +4296,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |->BC_RETV_Z: // Non-standard return case. | andi TMP2, TMP1, FRAME_TYPEP - | bxnez TMP2, ->vm_return + | bnez TMP2, ->vm_return | // Return from vararg function: relocate BASE down. | sub BASE, BASE, TMP1 | ld PC, FRAME_PC(BASE) @@ -4549,7 +4549,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ld TMP2, L->maxstack | lbu TMP1, -4+PC2PROTO(numparams)(PC) | ld KBASE, -4+PC2PROTO(k)(PC) - | bxltu TMP2, RA, ->vm_growstack_l + | bltu TMP2, RA, ->vm_growstack_l | slliw TMP1, TMP1, 3 // numparams*8 |2: | bltu NARGS8:RC, TMP1, >3 // Check for missing parameters. @@ -4586,7 +4586,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addi TMP3, RC, 16+FRAME_VARG | ld KBASE, -4+PC2PROTO(k)(PC) | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. - | bxgeu TMP0, TMP2, ->vm_growstack_l + | bgeu TMP0, TMP2, ->vm_growstack_l | lbu TMP2, -4+PC2PROTO(numparams)(PC) | mv RA, BASE | mv RC, TMP1 @@ -4632,7 +4632,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add RC, BASE, NARGS8:RC | sd BASE, L->base // base of currently excuting function | sd RC, L->top - | bxgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. + | bgtu TMP1, TMP2, ->vm_growstack_c // Need to grow stack. | li_vmstate C // li TMP0, ~LJ_VMST_C if (op == BC_FUNCCW) { | ld CARG2, CFUNC:RB->f