From 2c0893337e967698c12b298207e5b2c76839738e Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Mon, 25 Mar 2024 21:49:26 +0000 Subject: [PATCH] JIT: Remove remaining SVE references from emitarm64.cpp (#100100) --- src/coreclr/jit/emit.h | 4 +- src/coreclr/jit/emitarm64.cpp | 31123 ++++++++++++----------------- src/coreclr/jit/emitarm64.h | 9 + src/coreclr/jit/emitarm64sve.cpp | 5875 ++++++ 4 files changed, 18497 insertions(+), 18514 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 54f37632cc930a..48f5edeef728b3 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3103,8 +3103,8 @@ class emitter #ifdef DEBUG #ifndef TARGET_LOONGARCH64 void emitInsSanityCheck(instrDesc* id); -#endif -#endif +#endif // TARGET_LOONGARCH64 +#endif // DEBUG #ifdef TARGET_ARMARCH // Returns true if instruction "id->idIns()" writes to a register that might be used to contain a GC diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 31ca66562cfef5..3c4c76b70ab483 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -167,7 +167,7 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const #ifdef DEBUG /***************************************************************************** * - * The following called for each recorded instruction -- use for debugging. + * The following is called for each recorded instruction -- use for debugging. */ void emitter::emitInsSanityCheck(instrDesc* id) { @@ -951,8491 +951,2711 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(datasize == EA_8BYTE); break; - case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // NNNN + default: + // fallback to check SVE instructions. + emitInsSveSanityCheck(id); break; + } +} +#endif // DEBUG - // Scalable. - case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) - case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) - case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) - case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) - case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic - case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left - // (predicated) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; +bool emitter::emitInsMayWriteToGCReg(instrDesc* id) +{ + instruction ins = id->idIns(); + insFormat fmt = id->idInsFmt(); - // Scalable, .S or .D. - case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) - case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; + switch (fmt) + { - // Scalable, Merge or Zero predicate. - case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // nnnnn - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // ddddd - assert(isScalableVectorSize(elemsize)); - break; + // These are the formats with "destination" registers: - // Scalable, with shift immediate. - case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isValidVectorShiftAmount(emitGetInsSC(id), optGetSveElemsize(id->idInsOpt()), true)); - assert(isScalableVectorSize(elemsize)); - break; + case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw) + case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s) + case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 - // Scalable Wide. - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) - elemsize = id->idOpSize(); - assert(insOptsScalableWide(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - // Scalable to/from SIMD scalar. - case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) - case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector - // (predicated) - case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsize(elemsize)); - break; + case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh) + case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63) + case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s) + case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s) - // Scalable to FP SIMD scalar. - case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - elemsize = id->idOpSize(); - assert(insOptsScalableFloat(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsizeSveFloat(elemsize)); - break; + case IF_DR_1D: // DR_1D X............... cccc.......ddddd Rd cond - // Scalable to general register. - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isGeneralRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidScalarDatasize(elemsize)); - break; - - // Scalable, 4 regs (location of reg3 and reg4 can switch) - case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend - // (predicated) - case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand - // (predicated) - case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector) - case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); - assert(isVectorRegister(id->idReg4())); - assert(isScalableVectorSize(elemsize)); - break; - - // Scalable, unpredicated - case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) - case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high - // (unpredicated) - case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) - case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient - case IF_SVE_BR_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments - case IF_SVE_BZ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) - case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) - case IF_SVE_CA_3A: // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads - case IF_SVE_EH_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated) - case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long - case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high - case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long - case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long - case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp - case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords) - case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long - case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide - case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long - case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved - case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute - case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long - case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate - case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long - case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part - case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment) - case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp - case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; + case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond + case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm + case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63) + case IF_DR_2G: // DR_2G X............... ......nnnnnddddd Rd Rn + case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn - // Scalable, no predicates. General purpose source registers - case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register - // increment) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn - assert(isGeneralRegisterOrZR(id->idReg3())); // mmmmm - assert(isValidScalarDatasize(elemsize)); - break; + case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnddddd Rd Rn Rm + case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63) + case IF_DR_3C: // DR_3C X..........mmmmm xxxsssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4) + case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnddddd Rd Rn Rm cond + case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63) - case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation - assert(id->idInsOpt() == INS_OPTS_SCALABLE_S || id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidUimm<2>(emitGetInsSC(id))); // hh - break; + case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnddddd Rd Rn Rm Ra - case IF_SVE_BH_3B: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation - case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D_SXTW || id->idInsOpt() == INS_OPTS_SCALABLE_D_UXTW); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidUimm<2>(emitGetInsSC(id))); // hh - break; + case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov - to general) + case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov - to general) + return true; - case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count - case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_NONE); - assert(isGeneralRegister(id->idReg1())); - assert(elemsize == EA_8BYTE); - assert(isValidUimmFrom1<4>(emitGetInsSC(id))); - break; + case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general) + case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector) + case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar) + case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element) + case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register) + case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov - from general) + case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt) + case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp) + case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar) + case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) + case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1) - Vd both source and + // destination - case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count - case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isScalableVectorSize(elemsize)); - assert(isValidUimmFrom1<4>(emitGetInsSC(id))); - break; + case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, + // fminp - scalar) + case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) + case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) - case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated) - case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate - imm = emitGetInsSC(id); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidImmNRS(imm, optGetSveElemsize(id->idInsOpt()))); - break; + case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) + case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector) + case IF_DV_3B: // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector) + case IF_DV_3BI: // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) + case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) + case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) + case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) + case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) + case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) + case IF_DV_3F: // DV_3F .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) + case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) + case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) + // Tracked GC pointers cannot be placed into the SIMD registers. + return false; - case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_NONE); - assert(isGeneralRegister(id->idReg1())); - assert(isValidGeneralDatasize(elemsize)); - assert(isValidUimmFrom1<4>(emitGetInsSC(id))); - break; + // These are the load/store formats with "target" registers: - case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) - case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isValidUimm<8>(emitGetInsSC(id))); // iiiii iii - break; + case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) + case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn + case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095) + case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiP.nnnnnttttt Rt Rn imm(-256..+255) pre/post inc + case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn + case IF_LS_2E: // LS_2E .Q.............. ....ssnnnnnttttt Vt Rn + case IF_LS_2F: // LS_2F .Q.............. xx.Sssnnnnnttttt Vt[] Rn + case IF_LS_2G: // LS_2G .Q.............. xx.Sssnnnnnttttt Vt[] Rn + case IF_LS_3A: // LS_3A .X.......X.mmmmm xxxS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {} + case IF_LS_3B: // LS_3B X............... .aaaaannnnnttttt Rt Ra Rn + case IF_LS_3C: // LS_3C X.........iiiiii iaaaaannnnnttttt Rt Ra Rn imm(im7,sh) + case IF_LS_3D: // LS_3D .X.......X.mmmmm ......nnnnnttttt Wm Rt Rn + case IF_LS_3F: // LS_3F .Q.........mmmmm ....ssnnnnnttttt Vt Rn Rm + case IF_LS_3G: // LS_3G .Q.........mmmmm ...Sssnnnnnttttt Vt[] Rn Rm - case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) - { - imm = emitGetInsSC(id); - floatImm8 fpImm; - fpImm.immFPIVal = (unsigned)imm; - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidSimm<8>((ssize_t)emitDecodeFloatImm8(fpImm))); // iiiiiiii - assert(isPredicateRegister(id->idReg2())); // gggg - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - break; - } + // For the Store instructions the "target" register is actually a "source" value - case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - imm = emitGetInsSC(id); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // gggg - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - assert(isValidSimm<8>(imm)); // iiiiiiii - break; + if (emitInsIsStore(ins)) + { + return false; + } + else + { + assert(emitInsIsLoad(ins)); + return true; + } - case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated) - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // gggg - break; + case IF_LS_3E: // LS_3E .X.........mmmmm ......nnnnnttttt Rm Rt Rn ARMv8.1 LSE Atomics + // ARMv8.1 Atomics + assert(emitInsIsStore(ins)); + assert(emitInsIsLoad(ins)); + return true; - case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isVectorRegister(id->idReg2())); // nnnnn - break; + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) + return ins == INS_mrs_tpid0; - case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isValidUimm<3>(emitGetInsSC(id))); - break; + default: + return false; + } +} - case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isValidUimm<1>(emitGetInsSC(id))); // i - break; +bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) +{ + if (!id->idIsLclVar()) + return false; - case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isValidUimm<3>(emitGetInsSC(id))); // ii - break; + instruction ins = id->idIns(); - case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // NNNN - break; + // This list is related to the list of instructions used to store local vars in emitIns_S_R(). + // We don't accept writing to float local vars. - case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // NNNN - assert(isValidUimm<3>(emitGetInsSC(id))); - break; + switch (ins) + { + case INS_strb: + case INS_strh: + case INS_str: + case INS_stur: + case INS_sturb: + case INS_sturh: + return true; + default: + return false; + } +} - case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // NNNN - assert(isValidUimm<1>(emitGetInsSC(id))); // i - break; +bool emitter::emitInsWritesToLclVarStackLocPair(instrDesc* id) +{ + if (!id->idIsLclVar()) + return false; - case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // NNNN - assert(isValidUimm<2>(emitGetInsSC(id))); // ii - break; + instruction ins = id->idIns(); - case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // mmmmm - break; + // This list is related to the list of instructions used to store local vars in emitIns_S_S_R_R(). + // We don't accept writing to float local vars. - case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isGeneralRegisterOrZR(id->idReg2())); // mmmmm - break; + switch (ins) + { + case INS_stnp: + case INS_stp: + return true; + default: + return false; + } +} - case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // NNNN - assert(isPredicateRegister(id->idReg3())); // MMMM - break; +bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id) +{ + instruction ins = id->idIns(); - case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // NNNN - break; + switch (ins) + { + case INS_ldp: + case INS_ldpsw: + case INS_ldnp: + return true; + default: + return false; + } +} - case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - break; +// Takes an instrDesc 'id' and uses the instruction 'ins' to determine the +// size of the target register that is written or read by the instruction. +// Note that even if EA_4BYTE is returned a load instruction will still +// always zero the upper 4 bytes of the target register. +// This method is required so that we can distinguish between loads that are +// sign-extending as they can have two different sizes for their target register. +// Additionally for instructions like 'ldr' and 'str' these can load/store +// either 4 byte or 8 bytes to/from the target register. +// By convention the small unsigned load instructions are considered to write +// a 4 byte sized target register, though since these also zero the upper 4 bytes +// they could equally be considered to write the unsigned value to full 8 byte register. +// +emitAttr emitter::emitInsTargetRegSize(instrDesc* id) +{ + instruction ins = id->idIns(); + emitAttr result = EA_UNKNOWN; - // Scalable, 4 regs, to predicate register. - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm + // This is used to determine the size of the target registers for a load/store instruction + + switch (ins) + { + case INS_ldxrb: + case INS_ldarb: + case INS_ldaprb: + case INS_ldaxrb: + case INS_stxrb: + case INS_stlrb: + case INS_stlxrb: + case INS_ldrb: + case INS_strb: + case INS_ldurb: + case INS_ldapurb: + case INS_sturb: + case INS_stlurb: + result = EA_4BYTE; break; - case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableWide(id->idInsOpt())); // xx - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm + case INS_ldxrh: + case INS_ldarh: + case INS_ldaprh: + case INS_ldaxrh: + case INS_stxrh: + case INS_stlrh: + case INS_stlxrh: + case INS_ldrh: + case INS_strh: + case INS_ldurh: + case INS_sturh: + case INS_ldapurh: + case INS_stlurh: + result = EA_4BYTE; break; - case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isValidSimm<5>(emitGetInsSC(id))); // iiiii - break; - - case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isValidUimm<7>(emitGetInsSC(id))); // iiiii - break; - - case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments - case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long - case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate - case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) - case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) - case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product - case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product - case IF_SVE_GJ_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations - case IF_SVE_GN_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long - case IF_SVE_GO_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long - case IF_SVE_GW_3B: // ...........mmmmm ......nnnnnddddd -- SVE FP clamp - case IF_SVE_HA_3A: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - case IF_SVE_HB_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long - case IF_SVE_HD_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate - case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate - case IF_SVE_HK_3B: // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) - case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations - assert(insOptsScalable(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm - assert(isVectorRegister(id->idReg3())); // mmmmm/aaaaa - break; - - case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) - case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) - assert(insOptsNone(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn/aaaaa - assert(isVectorRegister(id->idReg3())); // mmmmm - break; - - case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) - case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) - case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) - case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmm - assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7)); - assert(isValidUimm<2>(emitGetInsSC(id))); // ii - break; - - case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) - case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) - case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) - case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) - case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmm - assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7)); - assert(isValidUimm<3>(emitGetInsSC(id))); // iii - break; - - case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) - case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) - case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) - case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) - assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isLowVectorRegister(id->idReg3())); // mmmm - assert(isValidUimm<2>(emitGetInsSC(id))); // ii - break; - - case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) - case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isLowVectorRegister(id->idReg3())); // mmmm - assert(isValidUimm<1>(emitGetInsSC(id))); // i - break; - - case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // gggg - assert(isPredicateRegister(id->idReg3())); // NNNN - - switch (id->idIns()) - { - case INS_sve_and: - case INS_sve_ands: - case INS_sve_bic: - case INS_sve_bics: - case INS_sve_eor: - case INS_sve_eors: - case INS_sve_nand: - case INS_sve_nands: - case INS_sve_nor: - case INS_sve_nors: - case INS_sve_orn: - case INS_sve_orns: - case INS_sve_orr: - case INS_sve_orrs: - case INS_sve_sel: - assert(isPredicateRegister(id->idReg4())); // MMMM - break; - - case INS_sve_mov: - case INS_sve_movs: - case INS_sve_not: - case INS_sve_nots: - // no fourth register - break; - - default: - unreached(); - break; - } - break; - - case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // NNNN + case INS_ldrsb: + case INS_ldursb: + case INS_ldrsh: + case INS_ldursh: + if (id->idOpSize() == EA_8BYTE) + result = EA_8BYTE; + else + result = EA_4BYTE; break; - case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition - case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition - case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // gggg - assert(isPredicateRegister(id->idReg3())); // NNNN + case INS_ldrsw: + case INS_ldursw: + case INS_ldpsw: + result = EA_8BYTE; break; - case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // gggg - assert(isPredicateRegister(id->idReg3())); // NNNN - assert(isPredicateRegister(id->idReg4())); // MMMM + case INS_ldp: + case INS_stp: + case INS_ldnp: + case INS_stnp: + result = id->idOpSize(); break; - case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active - case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated) - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // gggg + case INS_ldxr: + case INS_ldar: + case INS_ldapr: + case INS_ldaxr: + case INS_stxr: + case INS_stlr: + case INS_stlxr: + case INS_ldr: + case INS_str: + case INS_ldur: + case INS_stur: + case INS_ldapur: + case INS_stlur: + result = id->idOpSize(); break; - case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(insOptsScalableStandard(id->idInsOpt())); // xx + default: + NO_WAY("unexpected instruction"); break; + } + return result; +} - case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active - case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // gggg - break; +// Takes an instrDesc and uses the instruction to determine the 'size' of the +// data that is loaded from memory. +// +emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) +{ + instruction ins = id->idIns(); + emitAttr result = EA_UNKNOWN; - case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated) - case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isPredicateRegister(id->idReg1())); // DDDD - break; + // The 'result' returned is the 'size' of the data that is loaded from memory. - case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count - assert(id->idOpSize() == EA_8BYTE); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isGeneralRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // gggg - assert(isPredicateRegister(id->idReg3())); // NNNN + switch (ins) + { + case INS_ldarb: + case INS_ldaprb: + case INS_stlrb: + case INS_ldrb: + case INS_strb: + case INS_ldurb: + case INS_ldapurb: + case INS_sturb: + case INS_stlurb: + case INS_ldrsb: + case INS_ldursb: + result = EA_1BYTE; break; - case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableAtMaxHalf(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm + case INS_ldarh: + case INS_ldaprh: + case INS_stlrh: + case INS_ldrh: + case INS_strh: + case INS_ldurh: + case INS_sturh: + case INS_ldrsh: + case INS_ldursh: + case INS_ldapurh: + case INS_stlurh: + result = EA_2BYTE; break; - case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements - switch (id->idIns()) - { - case INS_sve_fcvtnt: - case INS_sve_fcvtlt: - assert(insOptsConvertFloatStepwise(id->idInsOpt())); - FALLTHROUGH; - case INS_sve_fcvtxnt: - case INS_sve_bfcvtnt: - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - break; - default: - assert(!"unreachable"); - break; - } + case INS_ldrsw: + case INS_ldursw: + case INS_ldpsw: + result = EA_4BYTE; break; - case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision - assert(id->idInsOpt() == INS_OPTS_S_TO_H); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn + case INS_ldp: + case INS_stp: + case INS_ldnp: + case INS_stnp: + result = id->idOpSize(); break; - case IF_SVE_HO_3B: - assert(insOptsConvertFloatToFloat(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn + case INS_ldar: + case INS_ldapr: + case INS_stlr: + case INS_ldr: + case INS_str: + case INS_ldur: + case INS_stur: + case INS_ldapur: + case INS_stlur: + result = id->idOpSize(); break; - case IF_SVE_HO_3C: - assert(id->idInsOpt() == INS_OPTS_D_TO_S); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn + default: + NO_WAY("unexpected instruction"); break; + } + return result; +} - case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer - assert(insOptsScalableFloat(id->idInsOpt()) || id->idInsOpt() == INS_OPTS_H_TO_S || - id->idInsOpt() == INS_OPTS_H_TO_D || id->idInsOpt() == INS_OPTS_S_TO_D || - id->idInsOpt() == INS_OPTS_D_TO_S); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - break; +/*****************************************************************************/ - case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point - assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || id->idInsOpt() == INS_OPTS_S_TO_H || - id->idInsOpt() == INS_OPTS_S_TO_D || id->idInsOpt() == INS_OPTS_D_TO_H || - id->idInsOpt() == INS_OPTS_D_TO_S); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - break; +// clang-format off +static const char * const xRegNames[] = +{ + #define REGDEF(name, rnum, mask, xname, wname) xname, + #include "register.h" +}; - case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableFloat(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm - break; - - // Scalable FP. - case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations - elemsize = id->idOpSize(); - assert(insOptsScalableFloat(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; +static const char * const wRegNames[] = +{ + #define REGDEF(name, rnum, mask, xname, wname) wname, + #include "register.h" +}; - case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; +static const char * const vRegNames[] = +{ + "v0", "v1", "v2", "v3", "v4", + "v5", "v6", "v7", "v8", "v9", + "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", + "v20", "v21", "v22", "v23", "v24", + "v25", "v26", "v27", "v28", "v29", + "v30", "v31" +}; - // Scalable to Simd Vector. - case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) - case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) - case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) - datasize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(datasize == EA_8BYTE); - break; +static const char * const qRegNames[] = +{ + "q0", "q1", "q2", "q3", "q4", + "q5", "q6", "q7", "q8", "q9", + "q10", "q11", "q12", "q13", "q14", + "q15", "q16", "q17", "q18", "q19", + "q20", "q21", "q22", "q23", "q24", + "q25", "q26", "q27", "q28", "q29", + "q30", "q31" +}; - // Scalable FP to Simd Vector. - case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords) - datasize = id->idOpSize(); - assert(insOptsScalableFloat(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(datasize == EA_8BYTE); - break; +static const char * const hRegNames[] = +{ + "h0", "h1", "h2", "h3", "h4", + "h5", "h6", "h7", "h8", "h9", + "h10", "h11", "h12", "h13", "h14", + "h15", "h16", "h17", "h18", "h19", + "h20", "h21", "h22", "h23", "h24", + "h25", "h26", "h27", "h28", "h29", + "h30", "h31" +}; +static const char * const bRegNames[] = +{ + "b0", "b1", "b2", "b3", "b4", + "b5", "b6", "b7", "b8", "b9", + "b10", "b11", "b12", "b13", "b14", + "b15", "b16", "b17", "b18", "b19", + "b20", "b21", "b22", "b23", "b24", + "b25", "b26", "b27", "b28", "b29", + "b30", "b31" +}; - // Scalable, widening to scalar SIMD. - case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) - elemsize = id->idOpSize(); - assert(insOptsScalableWide(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsizeWidening(elemsize)); - break; +// clang-format on - // Scalable, possibly FP. - case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) - switch (id->idIns()) - { - case INS_sve_fabs: - case INS_sve_fneg: - assert(insOptsScalableFloat(id->idInsOpt())); // xx - break; +//------------------------------------------------------------------------ +// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name. +// +// Arguments: +// reg - A general-purpose register or SIMD and floating-point register. +// size - A register size. +// varName - unused parameter. +// +// Return value: +// A string that represents a general-purpose register name or SIMD and floating-point scalar register name. +// +const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) const +{ + assert(reg < REG_COUNT); - default: - assert(insOptsScalableStandard(id->idInsOpt())); // xx - break; - } - elemsize = id->idOpSize(); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; + const char* rn = nullptr; - // Scalable, various sizes. - case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated) - case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements - switch (id->idIns()) - { - case INS_sve_abs: - case INS_sve_neg: - case INS_sve_rbit: - assert(insOptsScalableStandard(id->idInsOpt())); - break; + if (size == EA_8BYTE) + { + rn = xRegNames[reg]; + } + else if (size == EA_4BYTE) + { + rn = wRegNames[reg]; + } + else if (isVectorRegister(reg)) + { + if (size == EA_16BYTE) + { + rn = qRegNames[reg - REG_V0]; + } + else if (size == EA_2BYTE) + { + rn = hRegNames[reg - REG_V0]; + } + else if (size == EA_1BYTE) + { + rn = bRegNames[reg - REG_V0]; + } + else if (size == EA_SCALABLE) + { + rn = emitSveRegName(reg); + } + } - case INS_sve_sxtb: - case INS_sve_uxtb: - case INS_sve_revb: - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - break; + assert(rn != nullptr); - case INS_sve_sxth: - case INS_sve_uxth: - case INS_sve_revh: - assert(insOptsScalableWords(id->idInsOpt())); - break; + return rn; +} - default: - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - break; - } - elemsize = id->idOpSize(); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive) - case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive) - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); // xx - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // VVV - assert(isVectorRegister(id->idReg3())); // nnnnn - break; - - case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated) - elemsize = id->idOpSize(); - assert(isScalableVectorSize(elemsize)); // xx - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // VVVV - assert(isVectorRegister(id->idReg3())); // nnnnn - if (id->idIns() == INS_sve_sel) - { - assert(isVectorRegister(id->idReg4())); // mmmmm - } - break; - - // Scalable from general scalar (possibly SP) - case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegisterOrZR(id->idReg3())); // mmmmm - assert(isValidScalarDatasize(elemsize)); - break; - - // Scalable, .H, .S or .D - case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long - case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - // Scalable, possibly fixed to .S - case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated) - elemsize = id->idOpSize(); - switch (id->idIns()) - { - case INS_sve_sqabs: - case INS_sve_sqneg: - assert(insOptsScalableStandard(id->idInsOpt())); - break; - - default: - assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); - break; - } - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow - assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); - assert(isVectorRegister(id->idReg1())); // nnnn - assert(isVectorRegister(id->idReg2())); // ddddd - assert(isEvenRegister(id->idReg2())); - assert(isScalableVectorSize(id->idOpSize())); - break; +//------------------------------------------------------------------------ +// emitVectorRegName: Returns a SIMD vector register name. +// +// Arguments: +// reg - A SIMD and floating-point register. +// +// Return value: +// A string that represents a SIMD vector register name. +// +const char* emitter::emitVectorRegName(regNumber reg) +{ + assert((reg >= REG_V0) && (reg <= REG_V31)); - case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter) - assert(id->idOpSize() == EA_8BYTE); + int index = (int)reg - (int)REG_V0; - FALLTHROUGH; - case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count - case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - assert(isGeneralRegister(id->idReg1())); // ddddd - assert(isPredicateRegister(id->idReg2())); // MMMM - assert(isValidGeneralDatasize(id->idOpSize())); - break; + return vRegNames[index]; +} - case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count - case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx - assert(isPredicateRegister(id->idReg1())); // MMMM - assert(isVectorRegister(id->idReg2())); // ddddd - assert(isScalableVectorSize(id->idOpSize())); - break; +/***************************************************************************** + * + * Returns the base encoding of the given CPU instruction. + */ - case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise - break; +emitter::insFormat emitter::emitInsFormat(instruction ins) +{ + // clang-format off + const static insFormat insFormats[] = + { + #define INST1(id, nm, info, fmt, e1 ) fmt, + #define INST2(id, nm, info, fmt, e1, e2 ) fmt, + #define INST3(id, nm, info, fmt, e1, e2, e3 ) fmt, + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) fmt, + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) fmt, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) fmt, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt, + #include "instrs.h" + #define INST1(id, nm, info, fmt, e1 ) fmt, + #define INST2(id, nm, info, fmt, e1, e2 ) fmt, + #define INST3(id, nm, info, fmt, e1, e2, e3 ) fmt, + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) fmt, + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) fmt, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) fmt, + #define INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7 ) fmt, + #define INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) fmt, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) fmt, + #define INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,e11 ) fmt, + #define INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13) fmt, + #include "instrsarm64sve.h" + }; + // clang-format on - case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isPredicateRegister(id->idReg1())); // NNNN - break; + assert(ins < ArrLen(insFormats)); + assert((insFormats[ins] != IF_NONE)); - case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars - assert(insOptsNone(id->idInsOpt())); - assert(isGeneralRegister(id->idReg1())); // nnnnn - assert(isGeneralRegister(id->idReg2())); // mmmmm - assert(isValidGeneralDatasize(id->idOpSize())); // x - break; + return insFormats[ins]; +} - case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow - assert(insOptsNone(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnn - assert(isEvenRegister(id->idReg2())); - break; +#define LD 1 +#define ST 2 +#define CMP 4 +#define RSH 8 +#define WID 16 +#define LNG 32 +#define NRW 64 +#define WR2 128 // writes operand 2 instead of 1 - case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts - assert(insOptsNone(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnn - assert(isEvenRegister(id->idReg2())); - break; +// clang-format off +/*static*/ const BYTE CodeGenInterface::instInfo[] = +{ + #define INST1(id, nm, info, fmt, e1 ) info, + #define INST2(id, nm, info, fmt, e1, e2 ) info, + #define INST3(id, nm, info, fmt, e1, e2, e3 ) info, + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) info, + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) info, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) info, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) info, + #include "instrs.h" + #define INST1(id, nm, info, fmt, e1 ) info, + #define INST2(id, nm, info, fmt, e1, e2 ) info, + #define INST3(id, nm, info, fmt, e1, e2, e3 ) info, + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) info, + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) info, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) info, + #define INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7 ) info, + #define INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) info, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) info, + #define INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,e11 ) info, + #define INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13) info, + #include "instrsarm64sve.h" +}; +// clang-format on - case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // nnnnn - assert(isVectorRegister(id->idReg2())); // ddddd - assert(optGetSveElemsize(id->idInsOpt()) != EA_8BYTE); - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - // x - break; +//------------------------------------------------------------------------ +// emitInsIsCompare: Returns true if the instruction is some kind of compare or test instruction. +// +bool emitter::emitInsIsCompare(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & CMP) != 0; + else + return false; +} - case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment - assert(insOptsNone(id->idInsOpt())); - assert(id->idOpSize() == EA_8BYTE); - assert(isGeneralRegisterOrZR(id->idReg1())); // ddddd - assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn - assert(isValidSimm<6>(emitGetInsSC(id))); // iiiiii - break; +//------------------------------------------------------------------------ +// emitInsIsLoad: Returns true if the instruction is some kind of load instruction. +// +bool emitter::emitInsIsLoad(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & LD) != 0; + else + return false; +} - case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size - assert(insOptsNone(id->idInsOpt())); - assert(id->idOpSize() == EA_8BYTE); - assert(isGeneralRegister(id->idReg1())); // ddddd - assert(isValidSimm<6>(emitGetInsSC(id))); // iiiiii - break; +//------------------------------------------------------------------------ +// emitInsIsStore: Returns true if the instruction is some kind of store instruction. +// +bool emitter::emitInsIsStore(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & ST) != 0; + else + return false; +} - case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm - { - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // mmmmm - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx xx - const ssize_t imm = emitGetInsSC(id); +//------------------------------------------------------------------------ +// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load or store instruction. +// +bool emitter::emitInsIsLoadOrStore(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0; + else + return false; +} - switch (id->idInsOpt()) - { - case INS_OPTS_SCALABLE_B: - assert(isValidUimmFrom1<3>(imm)); // iii - break; +//------------------------------------------------------------------------ +// emitInsIsVectorRightShift: Returns true if the instruction is ASIMD right shift. +// +bool emitter::emitInsIsVectorRightShift(instruction ins) +{ + // We have pseudo ins like lea which are not included in emitInsLdStTab. + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & RSH) != 0; + else + return false; +} - case INS_OPTS_SCALABLE_H: - assert(isValidUimmFrom1<4>(imm)); // xiii - break; +//------------------------------------------------------------------------ +// emitInsIsVectorLong: Returns true if the instruction has the destination register that is double that of both source +// operands. Indicated by the suffix L. +// +bool emitter::emitInsIsVectorLong(instruction ins) +{ + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & LNG) != 0; + else + return false; +} - case INS_OPTS_SCALABLE_S: - assert(isValidUimmFrom1<5>(imm)); // xxiii - break; +//------------------------------------------------------------------------ +// emitInsIsVectorNarrow: Returns true if the element width of the destination register of the instruction is half that +// of both source operands. Indicated by the suffix N. +// +bool emitter::emitInsIsVectorNarrow(instruction ins) +{ + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & NRW) != 0; + else + return false; +} - case INS_OPTS_SCALABLE_D: - assert(isValidUimmFrom1<6>(imm)); // xx xiii - break; +//------------------------------------------------------------------------ +// emitInsIsVectorWide: Returns true if the element width of the destination register and the first source operand of +// the instruction is double that of the second source operand. Indicated by the suffix W. +// +bool emitter::emitInsIsVectorWide(instruction ins) +{ + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & WID) != 0; + else + return false; +} - default: - unreached(); - break; - } - break; - } +//------------------------------------------------------------------------ +// emitInsDestIsOp2: Returns true if the instruction is one of the special +// cases that has its destination register as the second register operand +// instead of the first. +// +bool emitter::emitInsDestIsOp2(instruction ins) +{ + if (ins < ArrLen(CodeGenInterface::instInfo)) + return (CodeGenInterface::instInfo[ins] & WR2) != 0; + else + return false; +} - case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate - // increment) - { - ssize_t imm1; - ssize_t imm2; - insSveDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidSimm<5>(imm1)); // iiiii - assert(isValidSimm<5>(imm2)); // iiiii - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - break; - } +#undef LD +#undef ST +#undef CMP +#undef RHS +#undef WID +#undef LNG +#undef NRW +#undef WR2 - case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register - // increment) - case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate - // increment) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidSimm<5>(emitGetInsSC(id))); // iiiii - assert(isIntegerRegister(id->idReg2())); // mmmmm - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - break; +/***************************************************************************** + * + * Returns the specific encoding of the given CPU instruction and format + */ - case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long - { - assert(insOptsScalableWide(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x xx - const ssize_t imm = emitGetInsSC(id); - - switch (id->idInsOpt()) - { - case INS_OPTS_SCALABLE_B: - assert(isValidUimm<3>(imm)); // iii - break; +emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) +{ + // clang-format off + const static code_t insCodes1[] = + { + #define INST1(id, nm, info, fmt, e1 ) e1, + #define INST2(id, nm, info, fmt, e1, e2 ) e1, + #define INST3(id, nm, info, fmt, e1, e2, e3 ) e1, + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) e1, + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e1, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e1, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1, + #include "instrs.h" + }; + const static code_t insCodes2[] = + { + #define INST1(id, nm, info, fmt, e1 ) + #define INST2(id, nm, info, fmt, e1, e2 ) e2, + #define INST3(id, nm, info, fmt, e1, e2, e3 ) e2, + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) e2, + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e2, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e2, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2, + #include "instrs.h" + }; + const static code_t insCodes3[] = + { + #define INST1(id, nm, info, fmt, e1 ) + #define INST2(id, nm, info, fmt, e1, e2 ) + #define INST3(id, nm, info, fmt, e1, e2, e3 ) e3, + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) e3, + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e3, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e3, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3, + #include "instrs.h" + }; + const static code_t insCodes4[] = + { + #define INST1(id, nm, info, fmt, e1 ) + #define INST2(id, nm, info, fmt, e1, e2 ) + #define INST3(id, nm, info, fmt, e1, e2, e3 ) + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) e4, + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e4, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e4, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4, + #include "instrs.h" + }; + const static code_t insCodes5[] = + { + #define INST1(id, nm, info, fmt, e1 ) + #define INST2(id, nm, info, fmt, e1, e2 ) + #define INST3(id, nm, info, fmt, e1, e2, e3 ) + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e5, + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e5, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5, + #include "instrs.h" + }; + const static code_t insCodes6[] = + { + #define INST1(id, nm, info, fmt, e1 ) + #define INST2(id, nm, info, fmt, e1, e2 ) + #define INST3(id, nm, info, fmt, e1, e2, e3 ) + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e6, + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6, + #include "instrs.h" + }; + const static code_t insCodes7[] = + { + #define INST1(id, nm, info, fmt, e1 ) + #define INST2(id, nm, info, fmt, e1, e2 ) + #define INST3(id, nm, info, fmt, e1, e2, e3 ) + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7, + #include "instrs.h" + }; + const static code_t insCodes8[] = + { + #define INST1(id, nm, info, fmt, e1 ) + #define INST2(id, nm, info, fmt, e1, e2 ) + #define INST3(id, nm, info, fmt, e1, e2, e3 ) + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8, + #include "instrs.h" + }; + const static code_t insCodes9[] = + { + #define INST1(id, nm, info, fmt, e1 ) + #define INST2(id, nm, info, fmt, e1, e2 ) + #define INST3(id, nm, info, fmt, e1, e2, e3 ) + #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) + #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) + #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) + #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9, + #include "instrs.h" + }; + // clang-format on - case INS_OPTS_SCALABLE_H: - assert(isValidUimm<4>(imm)); // x iii - break; + const static insFormat formatEncode9[9] = {IF_DR_2E, IF_DR_2G, IF_DI_1B, IF_DI_1D, IF_DV_3C, + IF_DV_2B, IF_DV_2C, IF_DV_2E, IF_DV_2F}; + const static insFormat formatEncode6A[6] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A, IF_DV_3A, IF_DV_3E}; + const static insFormat formatEncode6B[6] = {IF_LS_2D, IF_LS_3F, IF_LS_2E, IF_LS_2F, IF_LS_3G, IF_LS_2G}; + const static insFormat formatEncode5A[5] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A, IF_LS_1A}; + const static insFormat formatEncode5B[5] = {IF_DV_2G, IF_DV_2H, IF_DV_2I, IF_DV_1A, IF_DV_1B}; + const static insFormat formatEncode5C[5] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C, IF_DV_1B}; + const static insFormat formatEncode4A[4] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A}; + const static insFormat formatEncode4B[4] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A}; + const static insFormat formatEncode4C[4] = {IF_DR_2A, IF_DR_2B, IF_DR_2C, IF_DI_1A}; + const static insFormat formatEncode4D[4] = {IF_DV_3B, IF_DV_3D, IF_DV_3BI, IF_DV_3DI}; + const static insFormat formatEncode4E[4] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C}; + const static insFormat formatEncode4F[4] = {IF_DR_3A, IF_DR_3B, IF_DV_3C, IF_DV_1B}; + const static insFormat formatEncode4G[4] = {IF_DR_2E, IF_DR_2F, IF_DV_2M, IF_DV_2L}; + const static insFormat formatEncode4H[4] = {IF_DV_3E, IF_DV_3A, IF_DV_2L, IF_DV_2M}; + const static insFormat formatEncode4I[4] = {IF_DV_3D, IF_DV_3B, IF_DV_2G, IF_DV_2A}; + const static insFormat formatEncode4J[4] = {IF_DV_2N, IF_DV_2O, IF_DV_3E, IF_DV_3A}; + const static insFormat formatEncode4K[4] = {IF_DV_3E, IF_DV_3A, IF_DV_3EI, IF_DV_3AI}; + const static insFormat formatEncode3A[3] = {IF_DR_3A, IF_DR_3B, IF_DI_2C}; + const static insFormat formatEncode3B[3] = {IF_DR_2A, IF_DR_2B, IF_DI_1C}; + const static insFormat formatEncode3C[3] = {IF_DR_3A, IF_DR_3B, IF_DV_3C}; + const static insFormat formatEncode3D[3] = {IF_DV_2C, IF_DV_2D, IF_DV_2E}; + const static insFormat formatEncode3E[3] = {IF_DV_3B, IF_DV_3BI, IF_DV_3DI}; + const static insFormat formatEncode3F[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2H}; + const static insFormat formatEncode3G[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2I}; + const static insFormat formatEncode3H[3] = {IF_DR_3A, IF_DV_3A, IF_DV_3AI}; + const static insFormat formatEncode3I[3] = {IF_DR_2E, IF_DR_2F, IF_DV_2M}; + const static insFormat formatEncode3J[3] = {IF_LS_2D, IF_LS_3F, IF_LS_2E}; + const static insFormat formatEncode2A[2] = {IF_DR_2E, IF_DR_2F}; + const static insFormat formatEncode2B[2] = {IF_DR_3A, IF_DR_3B}; + const static insFormat formatEncode2C[2] = {IF_DR_3A, IF_DI_2D}; + const static insFormat formatEncode2D[2] = {IF_DR_3A, IF_DI_2B}; + const static insFormat formatEncode2E[2] = {IF_LS_3B, IF_LS_3C}; + const static insFormat formatEncode2F[2] = {IF_DR_2I, IF_DI_1F}; + const static insFormat formatEncode2G[2] = {IF_DV_3B, IF_DV_3D}; + const static insFormat formatEncode2H[2] = {IF_DV_2C, IF_DV_2F}; + const static insFormat formatEncode2I[2] = {IF_DV_2K, IF_DV_1C}; + const static insFormat formatEncode2J[2] = {IF_DV_2A, IF_DV_2G}; + const static insFormat formatEncode2K[2] = {IF_DV_2M, IF_DV_2L}; + const static insFormat formatEncode2L[2] = {IF_DR_2G, IF_DV_2M}; + const static insFormat formatEncode2M[2] = {IF_DV_3A, IF_DV_3AI}; + const static insFormat formatEncode2N[2] = {IF_DV_2N, IF_DV_2O}; + const static insFormat formatEncode2O[2] = {IF_DV_3E, IF_DV_3A}; + const static insFormat formatEncode2P[2] = {IF_DV_2Q, IF_DV_3B}; + const static insFormat formatEncode2Q[2] = {IF_DV_2S, IF_DV_3A}; - case INS_OPTS_SCALABLE_S: - assert(isValidUimm<5>(imm)); // xx iii - break; + code_t code = BAD_CODE; + insFormat insFmt = emitInsFormat(ins); + bool encoding_found = false; + int index = -1; - default: - unreached(); + switch (insFmt) + { + case IF_EN9: + for (index = 0; index < 9; index++) + { + if (fmt == formatEncode9[index]) + { + encoding_found = true; break; + } } break; - } - - case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow - { - assert(insOptsScalableWide(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x xx - const ssize_t imm = emitGetInsSC(id); - switch (id->idInsOpt()) + case IF_EN6A: + for (index = 0; index < 6; index++) { - case INS_OPTS_SCALABLE_B: - assert(isValidUimmFrom1<3>(imm)); // iii + if (fmt == formatEncode6A[index]) + { + encoding_found = true; break; + } + } + break; - case INS_OPTS_SCALABLE_H: - assert(isValidUimmFrom1<4>(imm)); // x iii + case IF_EN6B: + for (index = 0; index < 6; index++) + { + if (fmt == formatEncode6B[index]) + { + encoding_found = true; break; + } + } + break; - case INS_OPTS_SCALABLE_S: - assert(isValidUimmFrom1<5>(imm)); // xx iii + case IF_EN5A: + for (index = 0; index < 5; index++) + { + if (fmt == formatEncode5A[index]) + { + encoding_found = true; break; + } + } + break; - default: - unreached(); + case IF_EN5B: + for (index = 0; index < 5; index++) + { + if (fmt == formatEncode5B[index]) + { + encoding_found = true; break; + } } break; - } - case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(emitIsValidEncodedRotationImm90_or_270(emitGetInsSC(id))); // r - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN5C: + for (index = 0; index < 5; index++) + { + if (fmt == formatEncode5C[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry - assert(insOptsScalableWords(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x + case IF_EN4A: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4A[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // mmmmm - if (id->idInsOpt() == INS_OPTS_SCALABLE_S) + case IF_EN4B: + for (index = 0; index < 4; index++) { - assert(id->idIns() == INS_sve_sm4e); + if (fmt == formatEncode4B[index]) + { + encoding_found = true; + break; + } } - else + break; + + case IF_EN4C: + for (index = 0; index < 4; index++) { - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + if (fmt == formatEncode4C[index]) + { + encoding_found = true; + break; + } } - assert(isScalableVectorSize(elemsize)); break; - case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isScalableVectorSize(elemsize)); + case IF_EN4D: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4D[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare - assert(id->idOpSize() == EA_8BYTE); - - FALLTHROUGH; - case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isGeneralRegister(id->idReg2())); // nnnnn - assert(isValidGeneralDatasize(id->idOpSize())); // X - assert(isGeneralRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - break; - - case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element - { - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isPredicateRegister(id->idReg2())); // NNNN - assert(isPredicateRegister(id->idReg3())); // MMMM - assert(isGeneralRegister(id->idReg4())); // vv - assert((REG_R12 <= id->idReg4()) && (id->idReg4() <= REG_R15)); - const ssize_t imm = emitGetInsSC(id); - - switch (id->idInsOpt()) - { - case INS_OPTS_SCALABLE_B: - assert(isValidUimm<4>(imm)); + case IF_EN4E: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4E[index]) + { + encoding_found = true; break; + } + } + break; - case INS_OPTS_SCALABLE_H: - assert(isValidUimm<3>(imm)); + case IF_EN4F: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4F[index]) + { + encoding_found = true; break; + } + } + break; - case INS_OPTS_SCALABLE_S: - assert(isValidUimm<2>(imm)); + case IF_EN4G: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4G[index]) + { + encoding_found = true; break; + } + } + break; - case INS_OPTS_SCALABLE_D: - assert(isValidUimm<1>(imm)); + case IF_EN4H: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4H[index]) + { + encoding_found = true; break; + } + } + break; - default: - unreached(); + case IF_EN4I: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4I[index]) + { + encoding_found = true; break; + } } - break; - } - - case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter - assert(isValidUimm<1>(emitGetInsSC(id))); // i - FALLTHROUGH; - case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isHighPredicateRegister(id->idReg2())); // NNN - assert(isValidUimm<2>(emitGetInsSC(id))); // ii - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN4J: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4J[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate - // pair) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isLowPredicateRegister(id->idReg1())); // DDD - assert(isGeneralRegister(id->idReg2())); // nnnnn - assert(isGeneralRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN4K: + for (index = 0; index < 4; index++) + { + if (fmt == formatEncode4K[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit - // (predicate-as-counter) - assert(insOptsScalableStandard(id->idInsOpt())); // L - assert(isHighPredicateRegister(id->idReg1())); // DDD - assert(isGeneralRegister(id->idReg2())); // nnnnn - assert(isGeneralRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN3A: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3A[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isHighPredicateRegister(id->idReg1())); // DDD - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN3B: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3B[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidUimm<8>(emitGetInsSC(id))); // iiiiiiii - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN3C: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3C[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) - imm = emitGetInsSC(id); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - assert(isValidSimm<8>(imm)); // iiiiiiii + case IF_EN3D: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3D[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) - imm = emitGetInsSC(id); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - assert(isValidUimm<8>(imm)); // iiiiiiii + case IF_EN3E: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3E[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN3F: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3F[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidSimm<8>(emitGetInsSC(id)) || isValidUimm<8>(emitGetInsSC(id))); // iiiiiiii - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN3G: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3G[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidSimm<8>(emitGetInsSC(id))); // iiiiiiii - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + case IF_EN3H: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3H[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product - assert(insOptsScalableWords(id->idInsOpt())); - - FALLTHROUGH; - case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(emitIsValidEncodedRotationImm0_to_270(emitGetInsSC(id))); // rr - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx - break; - - case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) - case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) - case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmm - assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7)); - assert(isValidUimm<4>(emitGetInsSC(id))); // ii rr - break; - - case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) - case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) - case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) - case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed) - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isLowVectorRegister(id->idReg3())); // mmm - assert(isValidUimm<3>(emitGetInsSC(id))); // i rr - break; - - case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) - case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus - // immediate) - case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus - // immediate) - case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus - // immediate) - case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) - case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus - // immediate) - case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus - // immediate) - case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) - elemsize = id->idOpSize(); - assert(insOptsScalable(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isScalableVectorSize(elemsize)); + case IF_EN3I: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3I[index]) + { + encoding_found = true; + break; + } + } + break; - switch (id->idIns()) + case IF_EN3J: + for (index = 0; index < 3; index++) { - case INS_sve_ld2b: - case INS_sve_ld2h: - case INS_sve_ld2w: - case INS_sve_ld2d: - case INS_sve_ld2q: - case INS_sve_st2b: - case INS_sve_st2h: - case INS_sve_st2w: - case INS_sve_st2d: - case INS_sve_st2q: - assert((isValidSimm_MultipleOf<4, 2>(emitGetInsSC(id)))); // iiii + if (fmt == formatEncode3J[index]) + { + encoding_found = true; break; + } + } + break; - case INS_sve_ld3b: - case INS_sve_ld3h: - case INS_sve_ld3w: - case INS_sve_ld3d: - case INS_sve_ld3q: - case INS_sve_st3b: - case INS_sve_st3h: - case INS_sve_st3w: - case INS_sve_st3d: - case INS_sve_st3q: - assert((isValidSimm_MultipleOf<4, 3>(emitGetInsSC(id)))); // iiii + case IF_EN2A: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2A[index]) + { + encoding_found = true; break; + } + } + break; - case INS_sve_ld4b: - case INS_sve_ld4h: - case INS_sve_ld4w: - case INS_sve_ld4d: - case INS_sve_ld4q: - case INS_sve_st4b: - case INS_sve_st4h: - case INS_sve_st4w: - case INS_sve_st4d: - case INS_sve_st4q: - assert((isValidSimm_MultipleOf<4, 4>(emitGetInsSC(id)))); // iiii + case IF_EN2B: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2B[index]) + { + encoding_found = true; break; + } + } + break; - case INS_sve_ld1rqb: - case INS_sve_ld1rqd: - case INS_sve_ld1rqh: - case INS_sve_ld1rqw: - assert((isValidSimm_MultipleOf<4, 16>(emitGetInsSC(id)))); // iiii + case IF_EN2C: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2C[index]) + { + encoding_found = true; break; + } + } + break; - case INS_sve_ld1rob: - case INS_sve_ld1rod: - case INS_sve_ld1roh: - case INS_sve_ld1row: - assert((isValidSimm_MultipleOf<4, 32>(emitGetInsSC(id)))); // iiii + case IF_EN2D: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2D[index]) + { + encoding_found = true; break; + } + } + break; - default: - assert(isValidSimm<4>(emitGetInsSC(id))); // iiii + case IF_EN2E: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2E[index]) + { + encoding_found = true; break; + } } break; - case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - elemsize = id->idOpSize(); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); // xx - // st1h is reserved for scalable B - assert((id->idIns() == INS_sve_st1h) ? insOptsScalableAtLeastHalf(id->idInsOpt()) - : insOptsScalableStandard(id->idInsOpt())); + case IF_EN2F: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2F[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); // x - break; - - case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled - // offsets) - case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit - // unscaled offsets) - elemsize = id->idOpSize(); - assert(insOptsScalable32bitExtends(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isScalableVectorSize(elemsize)); + case IF_EN2G: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2G[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isScalableVectorSize(elemsize)); // xx - assert(isValidSimm<4>(imm)); // iiii + case IF_EN2H: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2H[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isScalableVectorSize(elemsize)); // x - assert(isValidSimm<4>(imm)); // iiii - break; - - case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - elemsize = id->idOpSize(); - assert(insOptsScalable32bitExtends(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus - // scalar) - case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus - // scalar) - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) - case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); + case IF_EN2I: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2I[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); + case IF_EN2J: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2J[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - elemsize = id->idOpSize(); - assert(insOptsScalableWordsOrQuadwords(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); + case IF_EN2K: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2K[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus - // scalar) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); + case IF_EN2L: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2L[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) - case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); + case IF_EN2M: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2M[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus - // scalar) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus - // scalar) - case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus - // scalar) - case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus - // scalar) - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); + case IF_EN2N: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2N[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - elemsize = id->idOpSize(); - assert(insOptsScalableDoubleWordsOrQuadword(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); + case IF_EN2O: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2O[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus - // scalar) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) - case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) - case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) - case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus - // scalar) - case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isGeneralRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); - break; - - case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled - // offsets) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isPredicateRegister(id->idReg2())); // ggg - assert(isGeneralRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm - assert(isScalableVectorSize(elemsize)); + case IF_EN2P: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2P[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(emitIsValidEncodedRotationImm90_or_270(imm)); - assert(isScalableVectorSize(elemsize)); + case IF_EN2Q: + for (index = 0; index < 2; index++) + { + if (fmt == formatEncode2Q[index]) + { + encoding_found = true; + break; + } + } break; - case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(isVectorRegister(id->idReg4())); - assert(emitIsValidEncodedRotationImm0_to_270(imm)); - assert(isScalableVectorSize(elemsize)); + default: + if (fmt == insFmt) + { + encoding_found = true; + index = 0; + } + else + { + encoding_found = false; + } break; + } - case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isPredicateRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(isScalableVectorSize(elemsize)); - break; + assert(encoding_found); - case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate - // (predicated) - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isScalableVectorSize(elemsize)); - assert(emitIsValidEncodedSmallFloatImm(imm)); + switch (index) + { + case 0: + assert(ins < ArrLen(insCodes1)); + code = insCodes1[ins]; break; - - case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - assert(isValidUimm<3>(imm)); - assert(isScalableVectorSize(elemsize)); + case 1: + assert(ins < ArrLen(insCodes2)); + code = insCodes2[ins]; break; - - case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(isScalableVectorSize(elemsize)); + case 2: + assert(ins < ArrLen(insCodes3)); + code = insCodes3[ins]; break; - - case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(isVectorRegister(id->idReg4())); - assert(isScalableVectorSize(elemsize)); + case 3: + assert(ins < ArrLen(insCodes4)); + code = insCodes4[ins]; break; - - case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing - // multiplicand - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(isVectorRegister(id->idReg4())); - assert(isScalableVectorSize(elemsize)); + case 4: + assert(ins < ArrLen(insCodes5)); + code = insCodes5[ins]; break; - - case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register - case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register - elemsize = id->idOpSize(); - assert(insOptsNone(id->idInsOpt())); - assert(isScalableVectorSize(elemsize)); - assert(isPredicateRegister(id->idReg1())); // TTTT - assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn - assert(isValidSimm<9>(emitGetInsSC(id))); // iii + case 5: + assert(ins < ArrLen(insCodes6)); + code = insCodes6[ins]; break; - - case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register - case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register - elemsize = id->idOpSize(); - assert(insOptsNone(id->idInsOpt())); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); // ttttt - assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn - assert(isValidSimm<9>(emitGetInsSC(id))); // iii - break; - - case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit - // element size - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidUimm<2>(emitGetInsSC(id))); // ii - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - break; - - case IF_SVE_GH_3B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidUimm<2>(emitGetInsSC(id))); // ii - assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); - break; - - case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit - // element size - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidUimm<3>(emitGetInsSC(id))); // ii - // i - assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); - break; - - case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - assert(insOptsScalable(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); // ddddd - assert(isVectorRegister(id->idReg2())); // nnnnn - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isValidUimm<1>(emitGetInsSC(id))); // i - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - break; - - case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit - // scaled offsets) - elemsize = id->idOpSize(); - assert(insOptsScalable32bitExtends(id->idInsOpt())); - assert(isLowPredicateRegister(id->idReg1())); - assert(isGeneralRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(isScalableVectorSize(elemsize)); + case 6: + assert(ins < ArrLen(insCodes7)); + code = insCodes7[ins]; break; - - case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isLowPredicateRegister(id->idReg1())); - assert(isGeneralRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(isScalableVectorSize(elemsize)); + case 7: + assert(ins < ArrLen(insCodes8)); + code = insCodes8[ins]; break; - - case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) - elemsize = id->idOpSize(); - assert(insOptsNone(id->idInsOpt())); - assert(isLowPredicateRegister(id->idReg1())); - assert(isGeneralRegister(id->idReg2())); - assert(isGeneralRegister(id->idReg3())); - assert(isScalableVectorSize(elemsize)); + case 8: + assert(ins < ArrLen(insCodes9)); + code = insCodes9[ins]; break; + } - case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isLowPredicateRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - assert(isScalableVectorSize(elemsize)); - break; + assert((code != BAD_CODE)); - case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) - elemsize = id->idOpSize(); - assert(insOptsNone(id->idInsOpt())); - assert(isLowPredicateRegister(id->idReg1())); - assert(isGeneralRegister(id->idReg2())); - assert(isScalableVectorSize(elemsize)); - break; + return code; +} - case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert(isValidUimm<5>(emitGetInsSC(id))); - break; +// true if this 'imm' can be encoded as a input operand to a mov instruction +/*static*/ bool emitter::emitIns_valid_imm_for_mov(INT64 imm, emitAttr size) +{ + // Check for "MOV (wide immediate)". + if (canEncodeHalfwordImm(imm, size)) + return true; - case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - break; + // Next try the ones-complement form of 'halfword immediate' imm(i16,hw), + // namely "MOV (inverted wide immediate)". + ssize_t notOfImm = NOT_helper(imm, getBitWidth(size)); + if (canEncodeHalfwordImm(notOfImm, size)) + return true; - case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - break; + // Finally try "MOV (bitmask immediate)" imm(N,r,s) + if (canEncodeBitMaskImm(imm, size)) + return true; - case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - break; + return false; +} - case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isVectorRegister(id->idReg3())); - assert((isValidUimm_MultipleOf<5, 8>(emitGetInsSC(id)))); - break; +// true if this 'imm' can be encoded as a input operand to a vector movi instruction +/*static*/ bool emitter::emitIns_valid_imm_for_movi(INT64 imm, emitAttr elemsize) +{ + if (elemsize == EA_8BYTE) + { + UINT64 uimm = imm; + while (uimm != 0) + { + INT64 loByte = uimm & 0xFF; + if ((loByte == 0) || (loByte == 0xFF)) + { + uimm >>= 8; + } + else + { + return false; + } + } + assert(uimm == 0); + return true; + } + else + { + // First try the standard 'byteShifted immediate' imm(i8,bySh) + if (canEncodeByteShiftedImm(imm, elemsize, true)) + return true; - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isGeneralRegister(id->idReg3())); - break; - - case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - elemsize = id->idOpSize(); - assert(insOptsScalableWords(id->idInsOpt())); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isGeneralRegister(id->idReg3())); - break; - - case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - elemsize = id->idOpSize(); - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isGeneralRegister(id->idReg3())); - break; + // Next try the ones-complement form of the 'immediate' imm(i8,bySh) + ssize_t notOfImm = NOT_helper(imm, getBitWidth(elemsize)); + if (canEncodeByteShiftedImm(notOfImm, elemsize, true)) + return true; + } + return false; +} - case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isScalableVectorSize(elemsize)); - assert(isVectorRegister(id->idReg1())); - assert(isLowPredicateRegister(id->idReg2())); - assert(isGeneralRegister(id->idReg3())); - break; +// true if this 'imm' can be encoded as a input operand to a fmov instruction +/*static*/ bool emitter::emitIns_valid_imm_for_fmov(double immDbl) +{ + if (canEncodeFloatImm8(immDbl)) + return true; - case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated) - assert(insOptsNone(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - break; + return false; +} - case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts - assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - break; +// true if this 'imm' can be encoded as a input operand to an add instruction +/*static*/ bool emitter::emitIns_valid_imm_for_add(INT64 imm, emitAttr size) +{ + if (unsigned_abs(imm) <= 0x0fff) + return true; + else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding + return true; - case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isGeneralRegisterOrZR(id->idReg2())); // ZR is SP - break; + return false; +} - case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - break; +// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction +/*static*/ bool emitter::emitIns_valid_imm_for_cmp(INT64 imm, emitAttr size) +{ + return emitIns_valid_imm_for_add(imm, size); +} - case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator - case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements - case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated) - assert(insOptsScalableAtLeastHalf(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - break; +// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction +/*static*/ bool emitter::emitIns_valid_imm_for_alu(INT64 imm, emitAttr size) +{ + if (canEncodeBitMaskImm(imm, size)) + return true; - case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated) - case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert - case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(isValidVectorShiftAmount(imm, optGetSveElemsize(id->idInsOpt()), - emitInsIsVectorRightShift(id->idIns()))); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - assert(isScalableVectorSize(elemsize)); - break; + return false; +} - case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element - imm = emitGetInsSC(id); - assert(insOptsScalable(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - assert(isValidBroadcastImm(imm, optGetSveElemsize(id->idInsOpt()))); - break; +// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction +/*static*/ bool emitter::emitIns_valid_imm_for_unscaled_ldst_offset(INT64 imm) +{ + return (imm >= -256) && (imm <= 255); +} - case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(insOptsScalableStandard(id->idInsOpt())); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - assert(isScalableVectorSize(elemsize)); -#ifdef DEBUG - switch (id->idInsOpt()) - { - case INS_OPTS_SCALABLE_B: - assert(isValidUimm<4>(imm)); - break; +// true if this 'imm' can be encoded as the offset in a ldr/str instruction +/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr) +{ + if (imm == 0) + return true; // Encodable using IF_LS_2A - case INS_OPTS_SCALABLE_H: - assert(isValidUimm<3>(imm)); - break; + if (isValidSimm<9>(imm)) + return true; // Encodable using IF_LS_2C (or possibly IF_LS_2B) - case INS_OPTS_SCALABLE_S: - assert(isValidUimm<2>(imm)); - break; + if (imm < 0) + return false; // not encodable - case INS_OPTS_SCALABLE_D: - assert(isValidUimm<1>(imm)); - break; + emitAttr size = EA_SIZE(attr); + unsigned scale = NaturalScale_helper(size); + ssize_t mask = size - 1; // the mask of low bits that must be zero to encode the immediate - default: - break; - } -#endif // DEBUG - break; + if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) + return true; // Encodable using IF_LS_2B - case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); - assert(isVectorRegister(id->idReg1())); - assert(isVectorRegister(id->idReg2())); - assert(isScalableVectorSize(elemsize)); - assert(isValidUimm<4>(imm)); - break; + return false; // not encodable +} - default: - printf("unexpected format %s\n", emitIfName(id->idInsFmt())); - assert(!"Unexpected format"); - break; - } +// true if this 'imm' can be encoded as a input operand to a ccmp instruction +/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm) +{ + return ((imm & 0x01f) == imm); } -#endif // DEBUG -bool emitter::emitInsMayWriteToGCReg(instrDesc* id) +// true if 'imm' can be encoded as an offset in a ldp/stp instruction +/*static*/ bool emitter::canEncodeLoadOrStorePairOffset(INT64 imm, emitAttr attr) { - instruction ins = id->idIns(); - insFormat fmt = id->idInsFmt(); + assert((attr == EA_4BYTE) || (attr == EA_8BYTE) || (attr == EA_16BYTE)); + const int size = EA_SIZE_IN_BYTES(attr); + return (imm % size == 0) && (imm >= -64 * size) && (imm < 64 * size); +} - switch (fmt) - { +/************************************************************************ + * + * A helper method to return the natural scale for an EA 'size' + */ - // These are the formats with "destination" registers: +/*static*/ unsigned emitter::NaturalScale_helper(emitAttr size) +{ + assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE); + return BitOperations::Log2((unsigned)size); +} - case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw) - case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s) - case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 +/************************************************************************ + * + * A helper method to perform a Rotate-Right shift operation + * the source is 'value' and it is rotated right by 'sh' bits + * 'value' is considered to be a fixed size 'width' set of bits. + * + * Example + * value is '00001111', sh is 2 and width is 8 + * result is '11000011' + */ - case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh) - case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63) - case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s) - case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s) +/*static*/ UINT64 emitter::ROR_helper(UINT64 value, unsigned sh, unsigned width) +{ + assert(width <= 64); + // Check that 'value' fits in 'width' bits + assert((width == 64) || (value < (1ULL << width))); + // We don't support shifts >= width + assert(sh < width); - case IF_DR_1D: // DR_1D X............... cccc.......ddddd Rd cond + UINT64 result; - case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond - case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm - case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63) - case IF_DR_2G: // DR_2G X............... ......nnnnnddddd Rd Rn - case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn + unsigned rsh = sh; + unsigned lsh = width - rsh; - case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnddddd Rd Rn Rm - case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63) - case IF_DR_3C: // DR_3C X..........mmmmm xxxsssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4) - case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnddddd Rd Rn Rm cond - case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63) + result = (value >> rsh); + result |= (value << lsh); - case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnddddd Rd Rn Rm Ra + if (width < 64) + { + // mask off any extra bits that we got from the left shift + result &= ((1ULL << width) - 1); + } + return result; +} +/************************************************************************ + * + * A helper method to perform a 'NOT' bitwise complement operation. + * 'value' is considered to be a fixed size 'width' set of bits. + * + * Example + * value is '01001011', and width is 8 + * result is '10110100' + */ - case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov - to general) - case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov - to general) - return true; +/*static*/ UINT64 emitter::NOT_helper(UINT64 value, unsigned width) +{ + assert(width <= 64); - case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general) - case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector) - case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar) - case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element) - case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register) - case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov - from general) - case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt) - case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp) - case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar) - case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) - case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1) - Vd both source and - // destination + UINT64 result = ~value; - case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, - // fminp - scalar) - case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) - case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) + if (width < 64) + { + // Check that 'value' fits in 'width' bits. Don't consider "sign" bits above width. + UINT64 maxVal = 1ULL << width; + UINT64 lowBitsMask = maxVal - 1; + UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit + // (sign bit) must be set. + assert((value < maxVal) || ((value & signBitsMask) == signBitsMask)); - case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) - case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector) - case IF_DV_3B: // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector) - case IF_DV_3BI: // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) - case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) - case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) - case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) - case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) - case IF_DV_3F: // DV_3F .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) - case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) - case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) - // Tracked GC pointers cannot be placed into the SIMD registers. - return false; + // mask off any extra bits that we got from the complement operation + result &= lowBitsMask; + } - // These are the load/store formats with "target" registers: + return result; +} - case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) - case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn - case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095) - case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiP.nnnnnttttt Rt Rn imm(-256..+255) pre/post inc - case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn - case IF_LS_2E: // LS_2E .Q.............. ....ssnnnnnttttt Vt Rn - case IF_LS_2F: // LS_2F .Q.............. xx.Sssnnnnnttttt Vt[] Rn - case IF_LS_2G: // LS_2G .Q.............. xx.Sssnnnnnttttt Vt[] Rn - case IF_LS_3A: // LS_3A .X.......X.mmmmm xxxS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {} - case IF_LS_3B: // LS_3B X............... .aaaaannnnnttttt Rt Ra Rn - case IF_LS_3C: // LS_3C X.........iiiiii iaaaaannnnnttttt Rt Ra Rn imm(im7,sh) - case IF_LS_3D: // LS_3D .X.......X.mmmmm ......nnnnnttttt Wm Rt Rn - case IF_LS_3F: // LS_3F .Q.........mmmmm ....ssnnnnnttttt Vt Rn Rm - case IF_LS_3G: // LS_3G .Q.........mmmmm ...Sssnnnnnttttt Vt[] Rn Rm - - // For the Store instructions the "target" register is actually a "source" value - - if (emitInsIsStore(ins)) - { - return false; - } - else - { - assert(emitInsIsLoad(ins)); - return true; - } +/************************************************************************ + * + * A helper method to perform a bit Replicate operation + * the source is 'value' with a fixed size 'width' set of bits. + * value is replicated to fill out 8/16/32/64 bits as determined by 'size'. + * + * Example + * value is '11000011' (0xE3), width is 8 and size is EA_8BYTE + * result is '11000011 11000011 11000011 11000011 11000011 11000011 11000011 11000011' + * 0xE3E3E3E3E3E3E3E3 + */ - case IF_LS_3E: // LS_3E .X.........mmmmm ......nnnnnttttt Rm Rt Rn ARMv8.1 LSE Atomics - // ARMv8.1 Atomics - assert(emitInsIsStore(ins)); - assert(emitInsIsLoad(ins)); - return true; +/*static*/ UINT64 emitter::Replicate_helper(UINT64 value, unsigned width, emitAttr size) +{ + unsigned immWidth = getBitWidth(size); + assert(width <= immWidth); - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) - return ins == INS_mrs_tpid0; + UINT64 result = value; + unsigned filledBits = width; - default: - return false; + while (filledBits < immWidth) + { + value <<= width; + result |= value; + filledBits += width; } + return result; } -bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id) -{ - if (!id->idIsLclVar()) - return false; +/************************************************************************ + * + * Convert an imm(N,r,s) into a 64-bit immediate + * inputs 'bmImm' a bitMaskImm struct + * 'size' specifies the size of the result (8/16/32/64 bits) + */ - instruction ins = id->idIns(); +/*static*/ INT64 emitter::emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size) +{ + unsigned N = bmImm.immN; // read the N,R and S values from the 'bitMaskImm' encoding + unsigned R = bmImm.immR; + unsigned S = bmImm.immS; - // This list is related to the list of instructions used to store local vars in emitIns_S_R(). - // We don't accept writing to float local vars. + unsigned elemWidth = 64; // used when N == 1 - switch (ins) + if (N == 0) // find the smaller elemWidth when N == 0 { - case INS_strb: - case INS_strh: - case INS_str: - case INS_stur: - case INS_sturb: - case INS_sturh: - return true; - default: - return false; + // Scan S for the highest bit not set + elemWidth = 32; + for (unsigned bitNum = 5; bitNum > 0; bitNum--) + { + unsigned oneBit = elemWidth; + if ((S & oneBit) == 0) + break; + elemWidth /= 2; + } + } + else + { + assert(size == EA_8BYTE); } + + unsigned maskSR = elemWidth - 1; + + S &= maskSR; + R &= maskSR; + + // encoding for S is one less than the number of consecutive one bits + S++; // Number of consecutive ones to generate in 'welem' + + // At this point: + // + // 'elemWidth' is the number of bits that we will use for the ROR and Replicate operations + // 'S' is the number of consecutive 1 bits for the immediate + // 'R' is the number of bits that we will Rotate Right the immediate + // 'size' selects the final size of the immediate that we return (64 or 32 bits) + + assert(S < elemWidth); // 'elemWidth' consecutive one's is a reserved encoding + + UINT64 welem; + UINT64 wmask; + + welem = (1ULL << S) - 1; + + wmask = ROR_helper(welem, R, elemWidth); + wmask = Replicate_helper(wmask, elemWidth, size); + + return wmask; } -bool emitter::emitInsWritesToLclVarStackLocPair(instrDesc* id) -{ - if (!id->idIsLclVar()) - return false; +/***************************************************************************** + * + * Check if an immediate can use the left shifted by 12 bits encoding + */ - instruction ins = id->idIns(); +/*static*/ bool emitter::canEncodeWithShiftImmBy12(INT64 imm) +{ + if (imm < 0) + { + imm = -imm; // convert to unsigned + } - // This list is related to the list of instructions used to store local vars in emitIns_S_S_R_R(). - // We don't accept writing to float local vars. + if (imm < 0) + { + return false; // Must be MIN_INT64 + } - switch (ins) + if ((imm & 0xfff) != 0) // Now the low 12 bits all have to be zero { - case INS_stnp: - case INS_stp: - return true; - default: - return false; + return false; } + + imm >>= 12; // shift right by 12 bits + + return (imm <= 0x0fff); // Does it fit in 12 bits } -bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id) +/***************************************************************************** + * + * Normalize the 'imm' so that the upper bits, as defined by 'size' are zero + */ + +/*static*/ INT64 emitter::normalizeImm64(INT64 imm, emitAttr size) { - instruction ins = id->idIns(); + unsigned immWidth = getBitWidth(size); + INT64 result = imm; - switch (ins) + if (immWidth < 64) { - case INS_ldp: - case INS_ldpsw: - case INS_ldnp: - return true; - default: - return false; + // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width. + INT64 maxVal = 1LL << immWidth; + INT64 lowBitsMask = maxVal - 1; + INT64 hiBitsMask = ~lowBitsMask; + INT64 signBitsMask = + hiBitsMask | (1LL << (immWidth - 1)); // The high bits must be set, and the top bit (sign bit) must be set. + assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask)); + + // mask off the hiBits + result &= lowBitsMask; } + return result; } -// Takes an instrDesc 'id' and uses the instruction 'ins' to determine the -// size of the target register that is written or read by the instruction. -// Note that even if EA_4BYTE is returned a load instruction will still -// always zero the upper 4 bytes of the target register. -// This method is required so that we can distinguish between loads that are -// sign-extending as they can have two different sizes for their target register. -// Additionally for instructions like 'ldr' and 'str' these can load/store -// either 4 byte or 8 bytes to/from the target register. -// By convention the small unsigned load instructions are considered to write -// a 4 byte sized target register, though since these also zero the upper 4 bytes -// they could equally be considered to write the unsigned value to full 8 byte register. -// -emitAttr emitter::emitInsTargetRegSize(instrDesc* id) -{ - instruction ins = id->idIns(); - emitAttr result = EA_UNKNOWN; +/************************************************************************ + * + * returns true if 'imm' of 'size bits (8/16/32/64) can be encoded + * using the ARM64 'bitmask immediate' form. + * When a non-null value is passed for 'wbBMI' then this method + * writes back the 'N','S' and 'R' values use to encode this immediate + * + */ - // This is used to determine the size of the target registers for a load/store instruction +/*static*/ bool emitter::canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI) +{ + unsigned immWidth = getBitWidth(size); + unsigned maxLen; - switch (ins) + switch (size) { - case INS_ldxrb: - case INS_ldarb: - case INS_ldaprb: - case INS_ldaxrb: - case INS_stxrb: - case INS_stlrb: - case INS_stlxrb: - case INS_ldrb: - case INS_strb: - case INS_ldurb: - case INS_ldapurb: - case INS_sturb: - case INS_stlurb: - result = EA_4BYTE; + case EA_1BYTE: + maxLen = 3; break; - case INS_ldxrh: - case INS_ldarh: - case INS_ldaprh: - case INS_ldaxrh: - case INS_stxrh: - case INS_stlrh: - case INS_stlxrh: - case INS_ldrh: - case INS_strh: - case INS_ldurh: - case INS_sturh: - case INS_ldapurh: - case INS_stlurh: - result = EA_4BYTE; + case EA_2BYTE: + maxLen = 4; break; - case INS_ldrsb: - case INS_ldursb: - case INS_ldrsh: - case INS_ldursh: - if (id->idOpSize() == EA_8BYTE) - result = EA_8BYTE; - else - result = EA_4BYTE; - break; - - case INS_ldrsw: - case INS_ldursw: - case INS_ldpsw: - result = EA_8BYTE; - break; - - case INS_ldp: - case INS_stp: - case INS_ldnp: - case INS_stnp: - result = id->idOpSize(); + case EA_4BYTE: + maxLen = 5; break; - case INS_ldxr: - case INS_ldar: - case INS_ldapr: - case INS_ldaxr: - case INS_stxr: - case INS_stlr: - case INS_stlxr: - case INS_ldr: - case INS_str: - case INS_ldur: - case INS_stur: - case INS_ldapur: - case INS_stlur: - result = id->idOpSize(); + case EA_8BYTE: + maxLen = 6; break; default: - NO_WAY("unexpected instruction"); + assert(!"Invalid size"); + maxLen = 0; break; } - return result; -} - -// Takes an instrDesc and uses the instruction to determine the 'size' of the -// data that is loaded from memory. -// -emitAttr emitter::emitInsLoadStoreSize(instrDesc* id) -{ - instruction ins = id->idIns(); - emitAttr result = EA_UNKNOWN; - // The 'result' returned is the 'size' of the data that is loaded from memory. + imm = normalizeImm64(imm, size); - switch (ins) + // Starting with len=1, elemWidth is 2 bits + // len=2, elemWidth is 4 bits + // len=3, elemWidth is 8 bits + // len=4, elemWidth is 16 bits + // len=5, elemWidth is 32 bits + // len=6, elemWidth is 64 bits + // + for (unsigned len = 1; (len <= maxLen); len++) { - case INS_ldarb: - case INS_ldaprb: - case INS_stlrb: - case INS_ldrb: - case INS_strb: - case INS_ldurb: - case INS_ldapurb: - case INS_sturb: - case INS_stlurb: - case INS_ldrsb: - case INS_ldursb: - result = EA_1BYTE; - break; + unsigned elemWidth = 1 << len; + UINT64 elemMask = ((UINT64)-1) >> (64 - elemWidth); + UINT64 tempImm = (UINT64)imm; // A working copy of 'imm' that we can mutate + UINT64 elemVal = tempImm & elemMask; // The low 'elemWidth' bits of 'imm' - case INS_ldarh: - case INS_ldaprh: - case INS_stlrh: - case INS_ldrh: - case INS_strh: - case INS_ldurh: - case INS_sturh: - case INS_ldrsh: - case INS_ldursh: - case INS_ldapurh: - case INS_stlurh: - result = EA_2BYTE; - break; + // Check for all 1's or 0's as these can't be encoded + if ((elemVal == 0) || (elemVal == elemMask)) + continue; - case INS_ldrsw: - case INS_ldursw: - case INS_ldpsw: - result = EA_4BYTE; - break; + // 'checkedBits' is the count of bits that are known to match 'elemVal' when replicated + unsigned checkedBits = elemWidth; // by definition the first 'elemWidth' bits match - case INS_ldp: - case INS_stp: - case INS_ldnp: - case INS_stnp: - result = id->idOpSize(); - break; + // Now check to see if each of the next bits match... + // + while (checkedBits < immWidth) + { + tempImm >>= elemWidth; - case INS_ldar: - case INS_ldapr: - case INS_stlr: - case INS_ldr: - case INS_str: - case INS_ldur: - case INS_stur: - case INS_ldapur: - case INS_stlur: - result = id->idOpSize(); - break; + UINT64 nextElem = tempImm & elemMask; + if (nextElem != elemVal) + { + // Not matching, exit this loop and checkedBits will not be equal to immWidth + break; + } - default: - NO_WAY("unexpected instruction"); - break; - } - return result; -} + // The 'nextElem' is matching, so increment 'checkedBits' + checkedBits += elemWidth; + } -/*****************************************************************************/ + // Did the full immediate contain bits that can be formed by repeating 'elemVal'? + if (checkedBits == immWidth) + { + // We are not quite done, since the only values that we can encode as a + // 'bitmask immediate' are those that can be formed by starting with a + // bit string of 0*1* that is rotated by some number of bits. + // + // We check to see if 'elemVal' can be formed using these restrictions. + // + // Observation: + // Rotating by one bit any value that passes these restrictions + // can be xor-ed with the original value and will result it a string + // of bits that have exactly two 1 bits: 'elemRorXor' + // Further the distance between the two one bits tells us the value + // of S and the location of the 1 bits tells us the value of R + // + // Some examples: (immWidth is 8) + // + // S=4,R=0 S=5,R=3 S=3,R=6 + // elemVal: 00001111 11100011 00011100 + // elemRor: 10000111 11110001 00001110 + // elemRorXor: 10001000 00010010 00010010 + // compute S 45678--- ---5678- ---3210- + // compute R 01234567 ---34567 ------67 -// clang-format off -static const char * const xRegNames[] = -{ - #define REGDEF(name, rnum, mask, xname, wname) xname, - #include "register.h" -}; + UINT64 elemRor = ROR_helper(elemVal, 1, elemWidth); // Rotate 'elemVal' Right by one bit + UINT64 elemRorXor = elemVal ^ elemRor; // Xor elemVal and elemRor -static const char * const wRegNames[] = -{ - #define REGDEF(name, rnum, mask, xname, wname) wname, - #include "register.h" -}; + // If we only have a two-bit change in elemROR then we can form a mask for this value + unsigned bitCount = 0; + UINT64 oneBit = 0x1; + unsigned R = elemWidth; // R is shift count for ROR (rotate right shift) + unsigned S = 0; // S is number of consecutive one bits + int incr = -1; -static const char * const vRegNames[] = -{ - "v0", "v1", "v2", "v3", "v4", - "v5", "v6", "v7", "v8", "v9", - "v10", "v11", "v12", "v13", "v14", - "v15", "v16", "v17", "v18", "v19", - "v20", "v21", "v22", "v23", "v24", - "v25", "v26", "v27", "v28", "v29", - "v30", "v31" -}; + // Loop over the 'elemWidth' bits in 'elemRorXor' + // + for (unsigned bitNum = 0; bitNum < elemWidth; bitNum++) + { + if (incr == -1) + { + R--; // We decrement R by one whenever incr is -1 + } + if (bitCount == 1) + { + S += incr; // We incr/decr S, after we find the first one bit in 'elemRorXor' + } -static const char * const qRegNames[] = -{ - "q0", "q1", "q2", "q3", "q4", - "q5", "q6", "q7", "q8", "q9", - "q10", "q11", "q12", "q13", "q14", - "q15", "q16", "q17", "q18", "q19", - "q20", "q21", "q22", "q23", "q24", - "q25", "q26", "q27", "q28", "q29", - "q30", "q31" -}; + // Is this bit position a 1 bit in 'elemRorXor'? + // + if (oneBit & elemRorXor) + { + bitCount++; + // Is this the first 1 bit that we found in 'elemRorXor'? + if (bitCount == 1) + { + // Does this 1 bit represent a transition to zero bits? + bool toZeros = ((oneBit & elemVal) != 0); + if (toZeros) + { + // S :: Count down from elemWidth + S = elemWidth; + incr = -1; + } + else // this 1 bit represent a transition to one bits. + { + // S :: Count up from zero + S = 0; + incr = +1; + } + } + else // bitCount > 1 + { + // We found the second (or third...) 1 bit in 'elemRorXor' + incr = 0; // stop decrementing 'R' -static const char * const hRegNames[] = -{ - "h0", "h1", "h2", "h3", "h4", - "h5", "h6", "h7", "h8", "h9", - "h10", "h11", "h12", "h13", "h14", - "h15", "h16", "h17", "h18", "h19", - "h20", "h21", "h22", "h23", "h24", - "h25", "h26", "h27", "h28", "h29", - "h30", "h31" -}; -static const char * const bRegNames[] = -{ - "b0", "b1", "b2", "b3", "b4", - "b5", "b6", "b7", "b8", "b9", - "b10", "b11", "b12", "b13", "b14", - "b15", "b16", "b17", "b18", "b19", - "b20", "b21", "b22", "b23", "b24", - "b25", "b26", "b27", "b28", "b29", - "b30", "b31" -}; + if (bitCount > 2) + { + // More than 2 transitions from 0/1 in 'elemVal' + // This means that 'elemVal' can't be encoded + // using a 'bitmask immediate'. + // + // Furthermore, it will continue to fail + // with any larger 'len' that we try. + // so just return false. + // + return false; + } + } + } -// clang-format on + // shift oneBit left by one bit to test the next position + oneBit <<= 1; + } -//------------------------------------------------------------------------ -// emitRegName: Returns a general-purpose register name or SIMD and floating-point scalar register name. -// -// Arguments: -// reg - A general-purpose register or SIMD and floating-point register. -// size - A register size. -// varName - unused parameter. -// -// Return value: -// A string that represents a general-purpose register name or SIMD and floating-point scalar register name. -// -const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName) const -{ - assert(reg < REG_COUNT); + // We expect that bitCount will always be two at this point + // but just in case return false for any bad cases. + // + assert(bitCount == 2); + if (bitCount != 2) + return false; - const char* rn = nullptr; + // Perform some sanity checks on the values of 'S' and 'R' + assert(S > 0); + assert(S < elemWidth); + assert(R < elemWidth); - if (size == EA_8BYTE) - { - rn = xRegNames[reg]; - } - else if (size == EA_4BYTE) - { - rn = wRegNames[reg]; - } - else if (isVectorRegister(reg)) - { - if (size == EA_16BYTE) - { - rn = qRegNames[reg - REG_V0]; - } - else if (size == EA_2BYTE) - { - rn = hRegNames[reg - REG_V0]; - } - else if (size == EA_1BYTE) - { - rn = bRegNames[reg - REG_V0]; - } - else if (size == EA_SCALABLE) - { - rn = emitSveRegName(reg); - } - } + // Does the caller want us to return the N,R,S encoding values? + // + if (wbBMI != nullptr) + { - assert(rn != nullptr); + // The encoding used for S is one less than the + // number of consecutive one bits + S--; - return rn; + if (len == 6) + { + wbBMI->immN = 1; + } + else + { + wbBMI->immN = 0; + // The encoding used for 'S' here is a bit peculiar. + // + // The upper bits need to be complemented, followed by a zero bit + // then the value of 'S-1' + // + unsigned upperBitsOfS = 64 - (1 << (len + 1)); + S |= upperBitsOfS; + } + wbBMI->immR = R; + wbBMI->immS = S; + + // Verify that what we are returning is correct. + assert(imm == emitDecodeBitMaskImm(*wbBMI, size)); + } + // Tell the caller that we can successfully encode this immediate + // using a 'bitmask immediate'. + // + return true; + } + } + return false; } -//------------------------------------------------------------------------ -// emitVectorRegName: Returns a SIMD vector register name. -// -// Arguments: -// reg - A SIMD and floating-point register. -// -// Return value: -// A string that represents a SIMD vector register name. -// -const char* emitter::emitVectorRegName(regNumber reg) +/************************************************************************ + * + * Convert an imm(i16,hw) into a 32/64-bit immediate + * inputs 'hwImm' a halfwordImm struct + * 'size' specifies the size of the result (64 or 32 bits) + */ + +/*static*/ INT64 emitter::emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size) { - assert((reg >= REG_V0) && (reg <= REG_V31)); + assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms - int index = (int)reg - (int)REG_V0; + unsigned hw = hwImm.immHW; + INT64 val = (INT64)hwImm.immVal; - return vRegNames[index]; + assert((hw <= 1) || (size == EA_8BYTE)); + + INT64 result = val << (16 * hw); + return result; } -/***************************************************************************** +/************************************************************************ + * + * returns true if 'imm' of 'size' bits (32/64) can be encoded + * using the ARM64 'halfword immediate' form. + * When a non-null value is passed for 'wbHWI' then this method + * writes back the 'immHW' and 'immVal' values use to encode this immediate * - * Returns the base encoding of the given CPU instruction. */ -emitter::insFormat emitter::emitInsFormat(instruction ins) +/*static*/ bool emitter::canEncodeHalfwordImm(INT64 imm, emitAttr size, emitter::halfwordImm* wbHWI) { - // clang-format off - const static insFormat insFormats[] = + assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms + + unsigned immWidth = (size == EA_8BYTE) ? 64 : 32; + unsigned maxHW = (size == EA_8BYTE) ? 4 : 2; + + // setup immMask to a (EA_4BYTE) 0x00000000_FFFFFFFF or (EA_8BYTE) 0xFFFFFFFF_FFFFFFFF + const UINT64 immMask = ((UINT64)-1) >> (64 - immWidth); + const INT64 mask16 = (INT64)0xFFFF; + + imm = normalizeImm64(imm, size); + + // Try each of the valid hw shift sizes + for (unsigned hw = 0; (hw < maxHW); hw++) { - #define INST1(id, nm, info, fmt, e1 ) fmt, - #define INST2(id, nm, info, fmt, e1, e2 ) fmt, - #define INST3(id, nm, info, fmt, e1, e2, e3 ) fmt, - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) fmt, - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) fmt, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) fmt, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt, - #include "instrs.h" - #define INST1(id, nm, info, fmt, e1 ) fmt, - #define INST2(id, nm, info, fmt, e1, e2 ) fmt, - #define INST3(id, nm, info, fmt, e1, e2, e3 ) fmt, - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) fmt, - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) fmt, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) fmt, - #define INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7 ) fmt, - #define INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) fmt, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) fmt, - #define INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,e11 ) fmt, - #define INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13) fmt, - #include "instrsarm64sve.h" - }; - // clang-format on + INT64 curMask = mask16 << (hw * 16); // Represents the mask of the bits in the current halfword + INT64 checkBits = immMask & ~curMask; - assert(ins < ArrLen(insFormats)); - assert((insFormats[ins] != IF_NONE)); + // Excluding the current halfword (using ~curMask) + // does the immediate have zero bits in every other bit that we care about? + // note we care about all 64-bits for EA_8BYTE + // and we care about the lowest 32 bits for EA_4BYTE + // + if ((imm & checkBits) == 0) + { + // Does the caller want us to return the imm(i16,hw) encoding values? + // + if (wbHWI != nullptr) + { + INT64 val = ((imm & curMask) >> (hw * 16)) & mask16; + wbHWI->immHW = hw; + wbHWI->immVal = val; - return insFormats[ins]; + // Verify that what we are returning is correct. + assert(imm == emitDecodeHalfwordImm(*wbHWI, size)); + } + // Tell the caller that we can successfully encode this immediate + // using a 'halfword immediate'. + // + return true; + } + } + return false; } -#define LD 1 -#define ST 2 -#define CMP 4 -#define RSH 8 -#define WID 16 -#define LNG 32 -#define NRW 64 -#define WR2 128 // writes operand 2 instead of 1 +/************************************************************************ + * + * Convert an imm(i8,sh) into a 16/32-bit immediate + * inputs 'bsImm' a byteShiftedImm struct + * 'size' specifies the size of the result (16 or 32 bits) + */ -// clang-format off -/*static*/ const BYTE CodeGenInterface::instInfo[] = +/*static*/ UINT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size) { - #define INST1(id, nm, info, fmt, e1 ) info, - #define INST2(id, nm, info, fmt, e1, e2 ) info, - #define INST3(id, nm, info, fmt, e1, e2, e3 ) info, - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) info, - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) info, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) info, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) info, - #include "instrs.h" - #define INST1(id, nm, info, fmt, e1 ) info, - #define INST2(id, nm, info, fmt, e1, e2 ) info, - #define INST3(id, nm, info, fmt, e1, e2, e3 ) info, - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) info, - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) info, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) info, - #define INST7(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7 ) info, - #define INST8(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8 ) info, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) info, - #define INST11(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10,e11 ) info, - #define INST13(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13) info, - #include "instrsarm64sve.h" -}; -// clang-format on + bool onesShift = (bsImm.immOnes == 1); + unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3 + UINT32 result = (UINT32)bsImm.immVal; // 8-bit immediate -//------------------------------------------------------------------------ -// emitInsIsCompare: Returns true if the instruction is some kind of compare or test instruction. -// -bool emitter::emitInsIsCompare(instruction ins) -{ - // We have pseudo ins like lea which are not included in emitInsLdStTab. - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & CMP) != 0; - else - return false; -} + if (bySh > 0) + { + assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms + if (size == EA_2BYTE) + { + assert(bySh < 2); + } + else + { + assert(bySh < 4); + } -//------------------------------------------------------------------------ -// emitInsIsLoad: Returns true if the instruction is some kind of load instruction. -// -bool emitter::emitInsIsLoad(instruction ins) -{ - // We have pseudo ins like lea which are not included in emitInsLdStTab. - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & LD) != 0; - else - return false; -} + result <<= (8 * bySh); -//------------------------------------------------------------------------ -// emitInsIsStore: Returns true if the instruction is some kind of store instruction. -// -bool emitter::emitInsIsStore(instruction ins) -{ - // We have pseudo ins like lea which are not included in emitInsLdStTab. - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & ST) != 0; - else - return false; + if (onesShift) + { + result |= ((1 << (8 * bySh)) - 1); + } + } + return result; } -//------------------------------------------------------------------------ -// emitInsIsLoadOrStore: Returns true if the instruction is some kind of load or store instruction. -// -bool emitter::emitInsIsLoadOrStore(instruction ins) -{ - // We have pseudo ins like lea which are not included in emitInsLdStTab. - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & (LD | ST)) != 0; - else - return false; -} +/************************************************************************ + * + * returns true if 'imm' of 'size' bits (16/32) can be encoded + * using the ARM64 'byteShifted immediate' form. + * When a non-null value is passed for 'wbBSI' then this method + * writes back the 'immBY' and 'immVal' values use to encode this immediate + * + */ -//------------------------------------------------------------------------ -// emitInsIsVectorRightShift: Returns true if the instruction is ASIMD right shift. -// -bool emitter::emitInsIsVectorRightShift(instruction ins) +/*static*/ bool emitter::canEncodeByteShiftedImm(INT64 imm, + emitAttr size, + bool allow_MSL, + emitter::byteShiftedImm* wbBSI) { - // We have pseudo ins like lea which are not included in emitInsLdStTab. - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & RSH) != 0; - else - return false; -} + bool canEncode = false; + bool onesShift = false; // true if we use the shifting ones variant + unsigned bySh = 0; // number of bytes to shift: 0, 1, 2, 3 + unsigned imm8 = 0; // immediate to use in the encoding -//------------------------------------------------------------------------ -// emitInsIsVectorLong: Returns true if the instruction has the destination register that is double that of both source -// operands. Indicated by the suffix L. -// -bool emitter::emitInsIsVectorLong(instruction ins) -{ - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & LNG) != 0; - else - return false; -} + imm = normalizeImm64(imm, size); -//------------------------------------------------------------------------ -// emitInsIsVectorNarrow: Returns true if the element width of the destination register of the instruction is half that -// of both source operands. Indicated by the suffix N. -// -bool emitter::emitInsIsVectorNarrow(instruction ins) -{ - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & NRW) != 0; + if (size == EA_1BYTE) + { + imm8 = (unsigned)imm; + assert(imm8 < 0x100); + canEncode = true; + } + else if (size == EA_8BYTE) + { + imm8 = (unsigned)imm; + assert(imm8 < 0x100); + canEncode = true; + } else - return false; -} + { + assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms -//------------------------------------------------------------------------ -// emitInsIsVectorWide: Returns true if the element width of the destination register and the first source operand of -// the instruction is double that of the second source operand. Indicated by the suffix W. -// -bool emitter::emitInsIsVectorWide(instruction ins) -{ - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & WID) != 0; - else - return false; + unsigned immWidth = (size == EA_4BYTE) ? 32 : 16; + unsigned maxBY = (size == EA_4BYTE) ? 4 : 2; + + // setup immMask to a (EA_2BYTE) 0x0000FFFF or (EA_4BYTE) 0xFFFFFFFF + const UINT32 immMask = ((UINT32)-1) >> (32 - immWidth); + const INT32 mask8 = (INT32)0xFF; + + // Try each of the valid by shift sizes + for (bySh = 0; (bySh < maxBY); bySh++) + { + INT32 curMask = mask8 << (bySh * 8); // Represents the mask of the bits in the current byteShifted + INT32 checkBits = immMask & ~curMask; + INT32 immCheck = (imm & checkBits); + + // Excluding the current byte (using ~curMask) + // does the immediate have zero bits in every other bit that we care about? + // or can be use the shifted one variant? + // note we care about all 32-bits for EA_4BYTE + // and we care about the lowest 16 bits for EA_2BYTE + // + if (immCheck == 0) + { + canEncode = true; + } + + // MSL is only supported for 32-bit. + if (allow_MSL && (size == EA_4BYTE)) + { + if ((bySh == 1) && (immCheck == 0xFF)) + { + canEncode = true; + onesShift = true; + } + else if ((bySh == 2) && (immCheck == 0xFFFF)) + { + canEncode = true; + onesShift = true; + } + } + if (canEncode) + { + imm8 = (unsigned)(((imm & curMask) >> (bySh * 8)) & mask8); + break; + } + } + } + + if (canEncode) + { + // Does the caller want us to return the imm(i8,bySh) encoding values? + // + if (wbBSI != nullptr) + { + wbBSI->immOnes = onesShift; + wbBSI->immBY = bySh; + wbBSI->immVal = imm8; + + // Verify that what we are returning is correct. + assert(imm == emitDecodeByteShiftedImm(*wbBSI, size)); + } + // Tell the caller that we can successfully encode this immediate + // using a 'byteShifted immediate'. + // + return true; + } + return false; } -//------------------------------------------------------------------------ -// emitInsDestIsOp2: Returns true if the instruction is one of the special -// cases that has its destination register as the second register operand -// instead of the first. -// -bool emitter::emitInsDestIsOp2(instruction ins) +/************************************************************************ + * + * Convert a 'float 8-bit immediate' into a double. + * inputs 'fpImm' a floatImm8 struct + */ + +/*static*/ double emitter::emitDecodeFloatImm8(const emitter::floatImm8 fpImm) { - if (ins < ArrLen(CodeGenInterface::instInfo)) - return (CodeGenInterface::instInfo[ins] & WR2) != 0; - else - return false; -} + unsigned sign = fpImm.immSign; + unsigned exp = fpImm.immExp ^ 0x4; + unsigned mant = fpImm.immMant + 16; + unsigned scale = 16 * 8; -#undef LD -#undef ST -#undef CMP -#undef RHS -#undef WID -#undef LNG -#undef NRW -#undef WR2 + while (exp > 0) + { + scale /= 2; + exp--; + } -/***************************************************************************** + double result = ((double)mant) / ((double)scale); + if (sign == 1) + { + result = -result; + } + + return result; +} + +/************************************************************************ + * + * returns true if the 'immDbl' can be encoded using the 'float 8-bit immediate' form. + * also returns the encoding if wbFPI is non-null * - * Returns the specific encoding of the given CPU instruction and format */ -emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) +/*static*/ bool emitter::canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI) { - // clang-format off - const static code_t insCodes1[] = + bool canEncode = false; + double val = immDbl; + + int sign = 0; + if (val < 0.0) { - #define INST1(id, nm, info, fmt, e1 ) e1, - #define INST2(id, nm, info, fmt, e1, e2 ) e1, - #define INST3(id, nm, info, fmt, e1, e2, e3 ) e1, - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) e1, - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e1, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e1, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1, - #include "instrs.h" - }; - const static code_t insCodes2[] = - { - #define INST1(id, nm, info, fmt, e1 ) - #define INST2(id, nm, info, fmt, e1, e2 ) e2, - #define INST3(id, nm, info, fmt, e1, e2, e3 ) e2, - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) e2, - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e2, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e2, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2, - #include "instrs.h" - }; - const static code_t insCodes3[] = - { - #define INST1(id, nm, info, fmt, e1 ) - #define INST2(id, nm, info, fmt, e1, e2 ) - #define INST3(id, nm, info, fmt, e1, e2, e3 ) e3, - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) e3, - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e3, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e3, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3, - #include "instrs.h" - }; - const static code_t insCodes4[] = - { - #define INST1(id, nm, info, fmt, e1 ) - #define INST2(id, nm, info, fmt, e1, e2 ) - #define INST3(id, nm, info, fmt, e1, e2, e3 ) - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) e4, - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e4, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e4, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4, - #include "instrs.h" - }; - const static code_t insCodes5[] = - { - #define INST1(id, nm, info, fmt, e1 ) - #define INST2(id, nm, info, fmt, e1, e2 ) - #define INST3(id, nm, info, fmt, e1, e2, e3 ) - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) e5, - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e5, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5, - #include "instrs.h" - }; - const static code_t insCodes6[] = - { - #define INST1(id, nm, info, fmt, e1 ) - #define INST2(id, nm, info, fmt, e1, e2 ) - #define INST3(id, nm, info, fmt, e1, e2, e3 ) - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) e6, - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6, - #include "instrs.h" - }; - const static code_t insCodes7[] = - { - #define INST1(id, nm, info, fmt, e1 ) - #define INST2(id, nm, info, fmt, e1, e2 ) - #define INST3(id, nm, info, fmt, e1, e2, e3 ) - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7, - #include "instrs.h" - }; - const static code_t insCodes8[] = - { - #define INST1(id, nm, info, fmt, e1 ) - #define INST2(id, nm, info, fmt, e1, e2 ) - #define INST3(id, nm, info, fmt, e1, e2, e3 ) - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8, - #include "instrs.h" - }; - const static code_t insCodes9[] = - { - #define INST1(id, nm, info, fmt, e1 ) - #define INST2(id, nm, info, fmt, e1, e2 ) - #define INST3(id, nm, info, fmt, e1, e2, e3 ) - #define INST4(id, nm, info, fmt, e1, e2, e3, e4 ) - #define INST5(id, nm, info, fmt, e1, e2, e3, e4, e5 ) - #define INST6(id, nm, info, fmt, e1, e2, e3, e4, e5, e6 ) - #define INST9(id, nm, info, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9, - #include "instrs.h" - }; - // clang-format on - - const static insFormat formatEncode9[9] = {IF_DR_2E, IF_DR_2G, IF_DI_1B, IF_DI_1D, IF_DV_3C, - IF_DV_2B, IF_DV_2C, IF_DV_2E, IF_DV_2F}; - const static insFormat formatEncode6A[6] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A, IF_DV_3A, IF_DV_3E}; - const static insFormat formatEncode6B[6] = {IF_LS_2D, IF_LS_3F, IF_LS_2E, IF_LS_2F, IF_LS_3G, IF_LS_2G}; - const static insFormat formatEncode5A[5] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A, IF_LS_1A}; - const static insFormat formatEncode5B[5] = {IF_DV_2G, IF_DV_2H, IF_DV_2I, IF_DV_1A, IF_DV_1B}; - const static insFormat formatEncode5C[5] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C, IF_DV_1B}; - const static insFormat formatEncode4A[4] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A}; - const static insFormat formatEncode4B[4] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A}; - const static insFormat formatEncode4C[4] = {IF_DR_2A, IF_DR_2B, IF_DR_2C, IF_DI_1A}; - const static insFormat formatEncode4D[4] = {IF_DV_3B, IF_DV_3D, IF_DV_3BI, IF_DV_3DI}; - const static insFormat formatEncode4E[4] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C}; - const static insFormat formatEncode4F[4] = {IF_DR_3A, IF_DR_3B, IF_DV_3C, IF_DV_1B}; - const static insFormat formatEncode4G[4] = {IF_DR_2E, IF_DR_2F, IF_DV_2M, IF_DV_2L}; - const static insFormat formatEncode4H[4] = {IF_DV_3E, IF_DV_3A, IF_DV_2L, IF_DV_2M}; - const static insFormat formatEncode4I[4] = {IF_DV_3D, IF_DV_3B, IF_DV_2G, IF_DV_2A}; - const static insFormat formatEncode4J[4] = {IF_DV_2N, IF_DV_2O, IF_DV_3E, IF_DV_3A}; - const static insFormat formatEncode4K[4] = {IF_DV_3E, IF_DV_3A, IF_DV_3EI, IF_DV_3AI}; - const static insFormat formatEncode3A[3] = {IF_DR_3A, IF_DR_3B, IF_DI_2C}; - const static insFormat formatEncode3B[3] = {IF_DR_2A, IF_DR_2B, IF_DI_1C}; - const static insFormat formatEncode3C[3] = {IF_DR_3A, IF_DR_3B, IF_DV_3C}; - const static insFormat formatEncode3D[3] = {IF_DV_2C, IF_DV_2D, IF_DV_2E}; - const static insFormat formatEncode3E[3] = {IF_DV_3B, IF_DV_3BI, IF_DV_3DI}; - const static insFormat formatEncode3F[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2H}; - const static insFormat formatEncode3G[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2I}; - const static insFormat formatEncode3H[3] = {IF_DR_3A, IF_DV_3A, IF_DV_3AI}; - const static insFormat formatEncode3I[3] = {IF_DR_2E, IF_DR_2F, IF_DV_2M}; - const static insFormat formatEncode3J[3] = {IF_LS_2D, IF_LS_3F, IF_LS_2E}; - const static insFormat formatEncode2A[2] = {IF_DR_2E, IF_DR_2F}; - const static insFormat formatEncode2B[2] = {IF_DR_3A, IF_DR_3B}; - const static insFormat formatEncode2C[2] = {IF_DR_3A, IF_DI_2D}; - const static insFormat formatEncode2D[2] = {IF_DR_3A, IF_DI_2B}; - const static insFormat formatEncode2E[2] = {IF_LS_3B, IF_LS_3C}; - const static insFormat formatEncode2F[2] = {IF_DR_2I, IF_DI_1F}; - const static insFormat formatEncode2G[2] = {IF_DV_3B, IF_DV_3D}; - const static insFormat formatEncode2H[2] = {IF_DV_2C, IF_DV_2F}; - const static insFormat formatEncode2I[2] = {IF_DV_2K, IF_DV_1C}; - const static insFormat formatEncode2J[2] = {IF_DV_2A, IF_DV_2G}; - const static insFormat formatEncode2K[2] = {IF_DV_2M, IF_DV_2L}; - const static insFormat formatEncode2L[2] = {IF_DR_2G, IF_DV_2M}; - const static insFormat formatEncode2M[2] = {IF_DV_3A, IF_DV_3AI}; - const static insFormat formatEncode2N[2] = {IF_DV_2N, IF_DV_2O}; - const static insFormat formatEncode2O[2] = {IF_DV_3E, IF_DV_3A}; - const static insFormat formatEncode2P[2] = {IF_DV_2Q, IF_DV_3B}; - const static insFormat formatEncode2Q[2] = {IF_DV_2S, IF_DV_3A}; - - code_t code = BAD_CODE; - insFormat insFmt = emitInsFormat(ins); - bool encoding_found = false; - int index = -1; - - switch (insFmt) - { - case IF_EN9: - for (index = 0; index < 9; index++) - { - if (fmt == formatEncode9[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN6A: - for (index = 0; index < 6; index++) - { - if (fmt == formatEncode6A[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN6B: - for (index = 0; index < 6; index++) - { - if (fmt == formatEncode6B[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN5A: - for (index = 0; index < 5; index++) - { - if (fmt == formatEncode5A[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN5B: - for (index = 0; index < 5; index++) - { - if (fmt == formatEncode5B[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN5C: - for (index = 0; index < 5; index++) - { - if (fmt == formatEncode5C[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4A: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4A[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4B: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4B[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4C: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4C[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4D: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4D[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4E: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4E[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4F: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4F[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4G: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4G[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4H: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4H[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4I: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4I[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4J: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4J[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN4K: - for (index = 0; index < 4; index++) - { - if (fmt == formatEncode4K[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3A: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3A[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3B: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3B[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3C: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3C[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3D: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3D[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3E: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3E[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3F: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3F[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3G: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3G[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3H: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3H[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3I: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3I[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN3J: - for (index = 0; index < 3; index++) - { - if (fmt == formatEncode3J[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2A: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2A[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2B: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2B[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2C: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2C[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2D: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2D[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2E: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2E[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2F: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2F[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2G: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2G[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2H: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2H[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2I: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2I[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2J: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2J[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2K: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2K[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2L: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2L[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2M: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2M[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2N: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2N[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2O: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2O[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2P: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2P[index]) - { - encoding_found = true; - break; - } - } - break; - - case IF_EN2Q: - for (index = 0; index < 2; index++) - { - if (fmt == formatEncode2Q[index]) - { - encoding_found = true; - break; - } - } - break; - - default: - if (fmt == insFmt) - { - encoding_found = true; - index = 0; - } - else - { - encoding_found = false; - } - break; - } - - assert(encoding_found); - - switch (index) - { - case 0: - assert(ins < ArrLen(insCodes1)); - code = insCodes1[ins]; - break; - case 1: - assert(ins < ArrLen(insCodes2)); - code = insCodes2[ins]; - break; - case 2: - assert(ins < ArrLen(insCodes3)); - code = insCodes3[ins]; - break; - case 3: - assert(ins < ArrLen(insCodes4)); - code = insCodes4[ins]; - break; - case 4: - assert(ins < ArrLen(insCodes5)); - code = insCodes5[ins]; - break; - case 5: - assert(ins < ArrLen(insCodes6)); - code = insCodes6[ins]; - break; - case 6: - assert(ins < ArrLen(insCodes7)); - code = insCodes7[ins]; - break; - case 7: - assert(ins < ArrLen(insCodes8)); - code = insCodes8[ins]; - break; - case 8: - assert(ins < ArrLen(insCodes9)); - code = insCodes9[ins]; - break; - } - - assert((code != BAD_CODE)); - - return code; -} - -// true if this 'imm' can be encoded as a input operand to a mov instruction -/*static*/ bool emitter::emitIns_valid_imm_for_mov(INT64 imm, emitAttr size) -{ - // Check for "MOV (wide immediate)". - if (canEncodeHalfwordImm(imm, size)) - return true; - - // Next try the ones-complement form of 'halfword immediate' imm(i16,hw), - // namely "MOV (inverted wide immediate)". - ssize_t notOfImm = NOT_helper(imm, getBitWidth(size)); - if (canEncodeHalfwordImm(notOfImm, size)) - return true; - - // Finally try "MOV (bitmask immediate)" imm(N,r,s) - if (canEncodeBitMaskImm(imm, size)) - return true; - - return false; -} - -// true if this 'imm' can be encoded as a input operand to a vector movi instruction -/*static*/ bool emitter::emitIns_valid_imm_for_movi(INT64 imm, emitAttr elemsize) -{ - if (elemsize == EA_8BYTE) - { - UINT64 uimm = imm; - while (uimm != 0) - { - INT64 loByte = uimm & 0xFF; - if ((loByte == 0) || (loByte == 0xFF)) - { - uimm >>= 8; - } - else - { - return false; - } - } - assert(uimm == 0); - return true; - } - else - { - // First try the standard 'byteShifted immediate' imm(i8,bySh) - if (canEncodeByteShiftedImm(imm, elemsize, true)) - return true; - - // Next try the ones-complement form of the 'immediate' imm(i8,bySh) - ssize_t notOfImm = NOT_helper(imm, getBitWidth(elemsize)); - if (canEncodeByteShiftedImm(notOfImm, elemsize, true)) - return true; - } - return false; -} - -// true if this 'imm' can be encoded as a input operand to a fmov instruction -/*static*/ bool emitter::emitIns_valid_imm_for_fmov(double immDbl) -{ - if (canEncodeFloatImm8(immDbl)) - return true; - - return false; -} - -// true if this 'imm' can be encoded as a input operand to an add instruction -/*static*/ bool emitter::emitIns_valid_imm_for_add(INT64 imm, emitAttr size) -{ - if (unsigned_abs(imm) <= 0x0fff) - return true; - else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding - return true; - - return false; -} - -// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction -/*static*/ bool emitter::emitIns_valid_imm_for_cmp(INT64 imm, emitAttr size) -{ - return emitIns_valid_imm_for_add(imm, size); -} - -// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction -/*static*/ bool emitter::emitIns_valid_imm_for_alu(INT64 imm, emitAttr size) -{ - if (canEncodeBitMaskImm(imm, size)) - return true; - - return false; -} - -// true if this 'imm' can be encoded as the offset in an unscaled ldr/str instruction -/*static*/ bool emitter::emitIns_valid_imm_for_unscaled_ldst_offset(INT64 imm) -{ - return (imm >= -256) && (imm <= 255); -} - -// true if this 'imm' can be encoded as the offset in a ldr/str instruction -/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr) -{ - if (imm == 0) - return true; // Encodable using IF_LS_2A - - if (isValidSimm<9>(imm)) - return true; // Encodable using IF_LS_2C (or possibly IF_LS_2B) - - if (imm < 0) - return false; // not encodable - - emitAttr size = EA_SIZE(attr); - unsigned scale = NaturalScale_helper(size); - ssize_t mask = size - 1; // the mask of low bits that must be zero to encode the immediate - - if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) - return true; // Encodable using IF_LS_2B - - return false; // not encodable -} - -// true if this 'imm' can be encoded as a input operand to a ccmp instruction -/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm) -{ - return ((imm & 0x01f) == imm); -} - -// true if 'imm' can be encoded as an offset in a ldp/stp instruction -/*static*/ bool emitter::canEncodeLoadOrStorePairOffset(INT64 imm, emitAttr attr) -{ - assert((attr == EA_4BYTE) || (attr == EA_8BYTE) || (attr == EA_16BYTE)); - const int size = EA_SIZE_IN_BYTES(attr); - return (imm % size == 0) && (imm >= -64 * size) && (imm < 64 * size); -} - -/************************************************************************ - * - * A helper method to return the natural scale for an EA 'size' - */ - -/*static*/ unsigned emitter::NaturalScale_helper(emitAttr size) -{ - assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE); - return BitOperations::Log2((unsigned)size); -} - -/************************************************************************ - * - * A helper method to perform a Rotate-Right shift operation - * the source is 'value' and it is rotated right by 'sh' bits - * 'value' is considered to be a fixed size 'width' set of bits. - * - * Example - * value is '00001111', sh is 2 and width is 8 - * result is '11000011' - */ - -/*static*/ UINT64 emitter::ROR_helper(UINT64 value, unsigned sh, unsigned width) -{ - assert(width <= 64); - // Check that 'value' fits in 'width' bits - assert((width == 64) || (value < (1ULL << width))); - // We don't support shifts >= width - assert(sh < width); - - UINT64 result; - - unsigned rsh = sh; - unsigned lsh = width - rsh; - - result = (value >> rsh); - result |= (value << lsh); - - if (width < 64) - { - // mask off any extra bits that we got from the left shift - result &= ((1ULL << width) - 1); - } - return result; -} -/************************************************************************ - * - * A helper method to perform a 'NOT' bitwise complement operation. - * 'value' is considered to be a fixed size 'width' set of bits. - * - * Example - * value is '01001011', and width is 8 - * result is '10110100' - */ - -/*static*/ UINT64 emitter::NOT_helper(UINT64 value, unsigned width) -{ - assert(width <= 64); - - UINT64 result = ~value; - - if (width < 64) - { - // Check that 'value' fits in 'width' bits. Don't consider "sign" bits above width. - UINT64 maxVal = 1ULL << width; - UINT64 lowBitsMask = maxVal - 1; - UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit - // (sign bit) must be set. - assert((value < maxVal) || ((value & signBitsMask) == signBitsMask)); - - // mask off any extra bits that we got from the complement operation - result &= lowBitsMask; - } - - return result; -} - -/************************************************************************ - * - * A helper method to perform a bit Replicate operation - * the source is 'value' with a fixed size 'width' set of bits. - * value is replicated to fill out 8/16/32/64 bits as determined by 'size'. - * - * Example - * value is '11000011' (0xE3), width is 8 and size is EA_8BYTE - * result is '11000011 11000011 11000011 11000011 11000011 11000011 11000011 11000011' - * 0xE3E3E3E3E3E3E3E3 - */ - -/*static*/ UINT64 emitter::Replicate_helper(UINT64 value, unsigned width, emitAttr size) -{ - unsigned immWidth = getBitWidth(size); - assert(width <= immWidth); - - UINT64 result = value; - unsigned filledBits = width; - - while (filledBits < immWidth) - { - value <<= width; - result |= value; - filledBits += width; - } - return result; -} - -/************************************************************************ - * - * Convert an imm(N,r,s) into a 64-bit immediate - * inputs 'bmImm' a bitMaskImm struct - * 'size' specifies the size of the result (8/16/32/64 bits) - */ - -/*static*/ INT64 emitter::emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size) -{ - unsigned N = bmImm.immN; // read the N,R and S values from the 'bitMaskImm' encoding - unsigned R = bmImm.immR; - unsigned S = bmImm.immS; - - unsigned elemWidth = 64; // used when N == 1 - - if (N == 0) // find the smaller elemWidth when N == 0 - { - // Scan S for the highest bit not set - elemWidth = 32; - for (unsigned bitNum = 5; bitNum > 0; bitNum--) - { - unsigned oneBit = elemWidth; - if ((S & oneBit) == 0) - break; - elemWidth /= 2; - } - } - else - { - assert(size == EA_8BYTE); - } - - unsigned maskSR = elemWidth - 1; - - S &= maskSR; - R &= maskSR; - - // encoding for S is one less than the number of consecutive one bits - S++; // Number of consecutive ones to generate in 'welem' - - // At this point: - // - // 'elemWidth' is the number of bits that we will use for the ROR and Replicate operations - // 'S' is the number of consecutive 1 bits for the immediate - // 'R' is the number of bits that we will Rotate Right the immediate - // 'size' selects the final size of the immediate that we return (64 or 32 bits) - - assert(S < elemWidth); // 'elemWidth' consecutive one's is a reserved encoding - - UINT64 welem; - UINT64 wmask; - - welem = (1ULL << S) - 1; - - wmask = ROR_helper(welem, R, elemWidth); - wmask = Replicate_helper(wmask, elemWidth, size); - - return wmask; -} - -/***************************************************************************** - * - * Check if an immediate can use the left shifted by 12 bits encoding - */ - -/*static*/ bool emitter::canEncodeWithShiftImmBy12(INT64 imm) -{ - if (imm < 0) - { - imm = -imm; // convert to unsigned - } - - if (imm < 0) - { - return false; // Must be MIN_INT64 - } - - if ((imm & 0xfff) != 0) // Now the low 12 bits all have to be zero - { - return false; - } - - imm >>= 12; // shift right by 12 bits - - return (imm <= 0x0fff); // Does it fit in 12 bits -} - -/***************************************************************************** - * - * Normalize the 'imm' so that the upper bits, as defined by 'size' are zero - */ - -/*static*/ INT64 emitter::normalizeImm64(INT64 imm, emitAttr size) -{ - unsigned immWidth = getBitWidth(size); - INT64 result = imm; - - if (immWidth < 64) - { - // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width. - INT64 maxVal = 1LL << immWidth; - INT64 lowBitsMask = maxVal - 1; - INT64 hiBitsMask = ~lowBitsMask; - INT64 signBitsMask = - hiBitsMask | (1LL << (immWidth - 1)); // The high bits must be set, and the top bit (sign bit) must be set. - assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask)); - - // mask off the hiBits - result &= lowBitsMask; - } - return result; -} - -/************************************************************************ - * - * returns true if 'imm' of 'size bits (8/16/32/64) can be encoded - * using the ARM64 'bitmask immediate' form. - * When a non-null value is passed for 'wbBMI' then this method - * writes back the 'N','S' and 'R' values use to encode this immediate - * - */ - -/*static*/ bool emitter::canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI) -{ - unsigned immWidth = getBitWidth(size); - unsigned maxLen; - - switch (size) - { - case EA_1BYTE: - maxLen = 3; - break; - - case EA_2BYTE: - maxLen = 4; - break; - - case EA_4BYTE: - maxLen = 5; - break; - - case EA_8BYTE: - maxLen = 6; - break; - - default: - assert(!"Invalid size"); - maxLen = 0; - break; - } - - imm = normalizeImm64(imm, size); - - // Starting with len=1, elemWidth is 2 bits - // len=2, elemWidth is 4 bits - // len=3, elemWidth is 8 bits - // len=4, elemWidth is 16 bits - // len=5, elemWidth is 32 bits - // len=6, elemWidth is 64 bits - // - for (unsigned len = 1; (len <= maxLen); len++) - { - unsigned elemWidth = 1 << len; - UINT64 elemMask = ((UINT64)-1) >> (64 - elemWidth); - UINT64 tempImm = (UINT64)imm; // A working copy of 'imm' that we can mutate - UINT64 elemVal = tempImm & elemMask; // The low 'elemWidth' bits of 'imm' - - // Check for all 1's or 0's as these can't be encoded - if ((elemVal == 0) || (elemVal == elemMask)) - continue; - - // 'checkedBits' is the count of bits that are known to match 'elemVal' when replicated - unsigned checkedBits = elemWidth; // by definition the first 'elemWidth' bits match - - // Now check to see if each of the next bits match... - // - while (checkedBits < immWidth) - { - tempImm >>= elemWidth; - - UINT64 nextElem = tempImm & elemMask; - if (nextElem != elemVal) - { - // Not matching, exit this loop and checkedBits will not be equal to immWidth - break; - } - - // The 'nextElem' is matching, so increment 'checkedBits' - checkedBits += elemWidth; - } - - // Did the full immediate contain bits that can be formed by repeating 'elemVal'? - if (checkedBits == immWidth) - { - // We are not quite done, since the only values that we can encode as a - // 'bitmask immediate' are those that can be formed by starting with a - // bit string of 0*1* that is rotated by some number of bits. - // - // We check to see if 'elemVal' can be formed using these restrictions. - // - // Observation: - // Rotating by one bit any value that passes these restrictions - // can be xor-ed with the original value and will result it a string - // of bits that have exactly two 1 bits: 'elemRorXor' - // Further the distance between the two one bits tells us the value - // of S and the location of the 1 bits tells us the value of R - // - // Some examples: (immWidth is 8) - // - // S=4,R=0 S=5,R=3 S=3,R=6 - // elemVal: 00001111 11100011 00011100 - // elemRor: 10000111 11110001 00001110 - // elemRorXor: 10001000 00010010 00010010 - // compute S 45678--- ---5678- ---3210- - // compute R 01234567 ---34567 ------67 - - UINT64 elemRor = ROR_helper(elemVal, 1, elemWidth); // Rotate 'elemVal' Right by one bit - UINT64 elemRorXor = elemVal ^ elemRor; // Xor elemVal and elemRor - - // If we only have a two-bit change in elemROR then we can form a mask for this value - unsigned bitCount = 0; - UINT64 oneBit = 0x1; - unsigned R = elemWidth; // R is shift count for ROR (rotate right shift) - unsigned S = 0; // S is number of consecutive one bits - int incr = -1; - - // Loop over the 'elemWidth' bits in 'elemRorXor' - // - for (unsigned bitNum = 0; bitNum < elemWidth; bitNum++) - { - if (incr == -1) - { - R--; // We decrement R by one whenever incr is -1 - } - if (bitCount == 1) - { - S += incr; // We incr/decr S, after we find the first one bit in 'elemRorXor' - } - - // Is this bit position a 1 bit in 'elemRorXor'? - // - if (oneBit & elemRorXor) - { - bitCount++; - // Is this the first 1 bit that we found in 'elemRorXor'? - if (bitCount == 1) - { - // Does this 1 bit represent a transition to zero bits? - bool toZeros = ((oneBit & elemVal) != 0); - if (toZeros) - { - // S :: Count down from elemWidth - S = elemWidth; - incr = -1; - } - else // this 1 bit represent a transition to one bits. - { - // S :: Count up from zero - S = 0; - incr = +1; - } - } - else // bitCount > 1 - { - // We found the second (or third...) 1 bit in 'elemRorXor' - incr = 0; // stop decrementing 'R' - - if (bitCount > 2) - { - // More than 2 transitions from 0/1 in 'elemVal' - // This means that 'elemVal' can't be encoded - // using a 'bitmask immediate'. - // - // Furthermore, it will continue to fail - // with any larger 'len' that we try. - // so just return false. - // - return false; - } - } - } - - // shift oneBit left by one bit to test the next position - oneBit <<= 1; - } - - // We expect that bitCount will always be two at this point - // but just in case return false for any bad cases. - // - assert(bitCount == 2); - if (bitCount != 2) - return false; - - // Perform some sanity checks on the values of 'S' and 'R' - assert(S > 0); - assert(S < elemWidth); - assert(R < elemWidth); - - // Does the caller want us to return the N,R,S encoding values? - // - if (wbBMI != nullptr) - { - - // The encoding used for S is one less than the - // number of consecutive one bits - S--; - - if (len == 6) - { - wbBMI->immN = 1; - } - else - { - wbBMI->immN = 0; - // The encoding used for 'S' here is a bit peculiar. - // - // The upper bits need to be complemented, followed by a zero bit - // then the value of 'S-1' - // - unsigned upperBitsOfS = 64 - (1 << (len + 1)); - S |= upperBitsOfS; - } - wbBMI->immR = R; - wbBMI->immS = S; - - // Verify that what we are returning is correct. - assert(imm == emitDecodeBitMaskImm(*wbBMI, size)); - } - // Tell the caller that we can successfully encode this immediate - // using a 'bitmask immediate'. - // - return true; - } - } - return false; -} - -/************************************************************************ - * - * Convert an imm(i16,hw) into a 32/64-bit immediate - * inputs 'hwImm' a halfwordImm struct - * 'size' specifies the size of the result (64 or 32 bits) - */ - -/*static*/ INT64 emitter::emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size) -{ - assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms - - unsigned hw = hwImm.immHW; - INT64 val = (INT64)hwImm.immVal; - - assert((hw <= 1) || (size == EA_8BYTE)); - - INT64 result = val << (16 * hw); - return result; -} - -/************************************************************************ - * - * returns true if 'imm' of 'size' bits (32/64) can be encoded - * using the ARM64 'halfword immediate' form. - * When a non-null value is passed for 'wbHWI' then this method - * writes back the 'immHW' and 'immVal' values use to encode this immediate - * - */ - -/*static*/ bool emitter::canEncodeHalfwordImm(INT64 imm, emitAttr size, emitter::halfwordImm* wbHWI) -{ - assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms - - unsigned immWidth = (size == EA_8BYTE) ? 64 : 32; - unsigned maxHW = (size == EA_8BYTE) ? 4 : 2; - - // setup immMask to a (EA_4BYTE) 0x00000000_FFFFFFFF or (EA_8BYTE) 0xFFFFFFFF_FFFFFFFF - const UINT64 immMask = ((UINT64)-1) >> (64 - immWidth); - const INT64 mask16 = (INT64)0xFFFF; - - imm = normalizeImm64(imm, size); - - // Try each of the valid hw shift sizes - for (unsigned hw = 0; (hw < maxHW); hw++) - { - INT64 curMask = mask16 << (hw * 16); // Represents the mask of the bits in the current halfword - INT64 checkBits = immMask & ~curMask; - - // Excluding the current halfword (using ~curMask) - // does the immediate have zero bits in every other bit that we care about? - // note we care about all 64-bits for EA_8BYTE - // and we care about the lowest 32 bits for EA_4BYTE - // - if ((imm & checkBits) == 0) - { - // Does the caller want us to return the imm(i16,hw) encoding values? - // - if (wbHWI != nullptr) - { - INT64 val = ((imm & curMask) >> (hw * 16)) & mask16; - wbHWI->immHW = hw; - wbHWI->immVal = val; - - // Verify that what we are returning is correct. - assert(imm == emitDecodeHalfwordImm(*wbHWI, size)); - } - // Tell the caller that we can successfully encode this immediate - // using a 'halfword immediate'. - // - return true; - } - } - return false; -} - -/************************************************************************ - * - * Convert an imm(i8,sh) into a 16/32-bit immediate - * inputs 'bsImm' a byteShiftedImm struct - * 'size' specifies the size of the result (16 or 32 bits) - */ - -/*static*/ UINT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size) -{ - bool onesShift = (bsImm.immOnes == 1); - unsigned bySh = bsImm.immBY; // Num Bytes to shift 0,1,2,3 - UINT32 result = (UINT32)bsImm.immVal; // 8-bit immediate - - if (bySh > 0) - { - assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms - if (size == EA_2BYTE) - { - assert(bySh < 2); - } - else - { - assert(bySh < 4); - } - - result <<= (8 * bySh); - - if (onesShift) - { - result |= ((1 << (8 * bySh)) - 1); - } - } - return result; -} - -/************************************************************************ - * - * returns true if 'imm' of 'size' bits (16/32) can be encoded - * using the ARM64 'byteShifted immediate' form. - * When a non-null value is passed for 'wbBSI' then this method - * writes back the 'immBY' and 'immVal' values use to encode this immediate - * - */ - -/*static*/ bool emitter::canEncodeByteShiftedImm(INT64 imm, - emitAttr size, - bool allow_MSL, - emitter::byteShiftedImm* wbBSI) -{ - bool canEncode = false; - bool onesShift = false; // true if we use the shifting ones variant - unsigned bySh = 0; // number of bytes to shift: 0, 1, 2, 3 - unsigned imm8 = 0; // immediate to use in the encoding - - imm = normalizeImm64(imm, size); - - if (size == EA_1BYTE) - { - imm8 = (unsigned)imm; - assert(imm8 < 0x100); - canEncode = true; - } - else if (size == EA_8BYTE) - { - imm8 = (unsigned)imm; - assert(imm8 < 0x100); - canEncode = true; - } - else - { - assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms - - unsigned immWidth = (size == EA_4BYTE) ? 32 : 16; - unsigned maxBY = (size == EA_4BYTE) ? 4 : 2; - - // setup immMask to a (EA_2BYTE) 0x0000FFFF or (EA_4BYTE) 0xFFFFFFFF - const UINT32 immMask = ((UINT32)-1) >> (32 - immWidth); - const INT32 mask8 = (INT32)0xFF; - - // Try each of the valid by shift sizes - for (bySh = 0; (bySh < maxBY); bySh++) - { - INT32 curMask = mask8 << (bySh * 8); // Represents the mask of the bits in the current byteShifted - INT32 checkBits = immMask & ~curMask; - INT32 immCheck = (imm & checkBits); - - // Excluding the current byte (using ~curMask) - // does the immediate have zero bits in every other bit that we care about? - // or can be use the shifted one variant? - // note we care about all 32-bits for EA_4BYTE - // and we care about the lowest 16 bits for EA_2BYTE - // - if (immCheck == 0) - { - canEncode = true; - } - - // MSL is only supported for 32-bit. - if (allow_MSL && (size == EA_4BYTE)) - { - if ((bySh == 1) && (immCheck == 0xFF)) - { - canEncode = true; - onesShift = true; - } - else if ((bySh == 2) && (immCheck == 0xFFFF)) - { - canEncode = true; - onesShift = true; - } - } - if (canEncode) - { - imm8 = (unsigned)(((imm & curMask) >> (bySh * 8)) & mask8); - break; - } - } - } - - if (canEncode) - { - // Does the caller want us to return the imm(i8,bySh) encoding values? - // - if (wbBSI != nullptr) - { - wbBSI->immOnes = onesShift; - wbBSI->immBY = bySh; - wbBSI->immVal = imm8; - - // Verify that what we are returning is correct. - assert(imm == emitDecodeByteShiftedImm(*wbBSI, size)); - } - // Tell the caller that we can successfully encode this immediate - // using a 'byteShifted immediate'. - // - return true; - } - return false; -} - -/************************************************************************ - * - * Convert a 'float 8-bit immediate' into a double. - * inputs 'fpImm' a floatImm8 struct - */ - -/*static*/ double emitter::emitDecodeFloatImm8(const emitter::floatImm8 fpImm) -{ - unsigned sign = fpImm.immSign; - unsigned exp = fpImm.immExp ^ 0x4; - unsigned mant = fpImm.immMant + 16; - unsigned scale = 16 * 8; - - while (exp > 0) - { - scale /= 2; - exp--; - } - - double result = ((double)mant) / ((double)scale); - if (sign == 1) - { - result = -result; - } - - return result; -} - -/************************************************************************ - * - * returns true if the 'immDbl' can be encoded using the 'float 8-bit immediate' form. - * also returns the encoding if wbFPI is non-null - * - */ - -/*static*/ bool emitter::canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI) -{ - bool canEncode = false; - double val = immDbl; - - int sign = 0; - if (val < 0.0) - { - val = -val; - sign = 1; - } - - int exp = 0; - while ((val < 1.0) && (exp >= -4)) - { - val *= 2.0; - exp--; - } - while ((val >= 2.0) && (exp <= 5)) - { - val *= 0.5; - exp++; - } - exp += 3; - val *= 16.0; - int ival = (int)val; - - if ((exp >= 0) && (exp <= 7)) - { - if (val == (double)ival) - { - canEncode = true; - - if (wbFPI != nullptr) - { - ival -= 16; - assert((ival >= 0) && (ival <= 15)); - - wbFPI->immSign = sign; - wbFPI->immExp = exp ^ 0x4; - wbFPI->immMant = ival; - unsigned imm8 = wbFPI->immFPIVal; - assert((imm8 >= 0) && (imm8 <= 0xff)); - } - } - } - - return canEncode; -} - -/***************************************************************************** - * - * For the given 'ins' returns the reverse instruction - * if one exists, otherwise returns INS_INVALID - */ - -/*static*/ instruction emitter::insReverse(instruction ins) -{ - switch (ins) - { - case INS_add: - return INS_sub; - case INS_adds: - return INS_subs; - - case INS_sub: - return INS_add; - case INS_subs: - return INS_adds; - - case INS_cmp: - return INS_cmn; - case INS_cmn: - return INS_cmp; - - case INS_ccmp: - return INS_ccmn; - case INS_ccmn: - return INS_ccmp; - - default: - return INS_invalid; - } -} - -/***************************************************************************** - * - * For the given 'datasize' and 'elemsize', make the proper arrangement option - * returns the insOpts that specifies the vector register arrangement - * if one does not exist returns INS_OPTS_NONE - */ - -/*static*/ insOpts emitter::optMakeArrangement(emitAttr datasize, emitAttr elemsize) -{ - insOpts result = INS_OPTS_NONE; - - if (datasize == EA_8BYTE) - { - switch (elemsize) - { - case EA_1BYTE: - result = INS_OPTS_8B; - break; - case EA_2BYTE: - result = INS_OPTS_4H; - break; - case EA_4BYTE: - result = INS_OPTS_2S; - break; - case EA_8BYTE: - result = INS_OPTS_1D; - break; - default: - unreached(); - break; - } - } - else if (datasize == EA_16BYTE) - { - switch (elemsize) - { - case EA_1BYTE: - result = INS_OPTS_16B; - break; - case EA_2BYTE: - result = INS_OPTS_8H; - break; - case EA_4BYTE: - result = INS_OPTS_4S; - break; - case EA_8BYTE: - result = INS_OPTS_2D; - break; - default: - unreached(); - break; - } - } - return result; -} - -/***************************************************************************** - * - * For the given 'datasize' and arrangement 'opts' - * returns true is the pair specifies a valid arrangement - */ -/*static*/ bool emitter::isValidArrangement(emitAttr datasize, insOpts opt) -{ - if (datasize == EA_8BYTE) - { - if ((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S) || (opt == INS_OPTS_1D)) - { - return true; - } - } - else if (datasize == EA_16BYTE) - { - if ((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S) || (opt == INS_OPTS_2D)) - { - return true; - } - } - return false; -} - -//------------------------------------------------------------------------ -// insGetRegisterListSize: Returns a size of the register list a given instruction operates on. -// -// Arguments: -// ins - An instruction which uses a register list -// (e.g. ld1 (2 registers), ld1r, st1, tbl, tbx). -// -// Return value: -// A number of consecutive SIMD and floating-point registers the instruction loads to/store from. -// -/*static*/ unsigned emitter::insGetRegisterListSize(instruction ins) -{ - unsigned registerListSize = 0; - - switch (ins) - { - case INS_ld1: - case INS_ld1r: - case INS_st1: - case INS_tbl: - case INS_tbx: - registerListSize = 1; - break; - - case INS_ld1_2regs: - case INS_ld2: - case INS_ld2r: - case INS_st1_2regs: - case INS_st2: - case INS_tbl_2regs: - case INS_tbx_2regs: - registerListSize = 2; - break; - - case INS_ld1_3regs: - case INS_ld3: - case INS_ld3r: - case INS_st1_3regs: - case INS_st3: - case INS_tbl_3regs: - case INS_tbx_3regs: - registerListSize = 3; - break; - - case INS_ld1_4regs: - case INS_ld4: - case INS_ld4r: - case INS_st1_4regs: - case INS_st4: - case INS_tbl_4regs: - case INS_tbx_4regs: - registerListSize = 4; - break; - - default: - assert(!"Unexpected instruction"); - break; - } - - return registerListSize; -} - -// For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement -// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed -// -/*static*/ emitAttr emitter::optGetDatasize(insOpts arrangement) -{ - if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_2S) || - (arrangement == INS_OPTS_1D)) - { - return EA_8BYTE; - } - else if ((arrangement == INS_OPTS_16B) || (arrangement == INS_OPTS_8H) || (arrangement == INS_OPTS_4S) || - (arrangement == INS_OPTS_2D)) - { - return EA_16BYTE; - } - else - { - assert(!" invalid 'arrangement' value"); - return EA_UNKNOWN; - } -} - -// For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement -// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed -// -/*static*/ emitAttr emitter::optGetElemsize(insOpts arrangement) -{ - if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B)) - { - return EA_1BYTE; - } - else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H)) - { - return EA_2BYTE; - } - else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S)) - { - return EA_4BYTE; - } - else if ((arrangement == INS_OPTS_1D) || (arrangement == INS_OPTS_2D)) - { - return EA_8BYTE; - } - else - { - assert(!" invalid 'arrangement' value"); - return EA_UNKNOWN; - } -} - -/*static*/ insOpts emitter::optWidenElemsizeArrangement(insOpts arrangement) -{ - if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B)) - { - return INS_OPTS_8H; - } - else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H)) - { - return INS_OPTS_4S; - } - else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S)) - { - return INS_OPTS_2D; - } - else - { - assert(!" invalid 'arrangement' value"); - return INS_OPTS_NONE; - } -} - -/*static*/ emitAttr emitter::widenDatasize(emitAttr datasize) -{ - if (datasize == EA_1BYTE) - { - return EA_2BYTE; - } - else if (datasize == EA_2BYTE) - { - return EA_4BYTE; - } - else if (datasize == EA_4BYTE) - { - return EA_8BYTE; - } - else - { - assert(!" invalid 'datasize' value"); - return EA_UNKNOWN; - } -} - -// For the given 'srcArrangement' returns the "widen" 'dstArrangement' specifying the destination vector register -// arrangement -// asserts and returns INS_OPTS_NONE if an invalid 'srcArrangement' value is passed -// -/*static*/ insOpts emitter::optWidenDstArrangement(insOpts srcArrangement) -{ - insOpts dstArrangement = INS_OPTS_NONE; - - switch (srcArrangement) - { - case INS_OPTS_8B: - dstArrangement = INS_OPTS_4H; - break; - - case INS_OPTS_16B: - dstArrangement = INS_OPTS_8H; - break; - - case INS_OPTS_4H: - dstArrangement = INS_OPTS_2S; - break; - - case INS_OPTS_8H: - dstArrangement = INS_OPTS_4S; - break; - - case INS_OPTS_2S: - dstArrangement = INS_OPTS_1D; - break; - - case INS_OPTS_4S: - dstArrangement = INS_OPTS_2D; - break; - - default: - assert(!" invalid 'srcArrangement' value"); - break; - } - - return dstArrangement; -} - -// For the given 'conversion' returns the 'dstsize' specified by the conversion option -/*static*/ emitAttr emitter::optGetDstsize(insOpts conversion) -{ - switch (conversion) - { - case INS_OPTS_S_TO_8BYTE: - case INS_OPTS_D_TO_8BYTE: - case INS_OPTS_4BYTE_TO_D: - case INS_OPTS_8BYTE_TO_D: - case INS_OPTS_S_TO_D: - case INS_OPTS_H_TO_D: - - return EA_8BYTE; - - case INS_OPTS_S_TO_4BYTE: - case INS_OPTS_D_TO_4BYTE: - case INS_OPTS_4BYTE_TO_S: - case INS_OPTS_8BYTE_TO_S: - case INS_OPTS_D_TO_S: - case INS_OPTS_H_TO_S: - - return EA_4BYTE; - - case INS_OPTS_S_TO_H: - case INS_OPTS_D_TO_H: - - return EA_2BYTE; - - default: - assert(!" invalid 'conversion' value"); - return EA_UNKNOWN; - } -} - -// For the given 'conversion' returns the 'srcsize' specified by the conversion option -/*static*/ emitAttr emitter::optGetSrcsize(insOpts conversion) -{ - switch (conversion) - { - case INS_OPTS_D_TO_8BYTE: - case INS_OPTS_D_TO_4BYTE: - case INS_OPTS_8BYTE_TO_D: - case INS_OPTS_8BYTE_TO_S: - case INS_OPTS_D_TO_S: - case INS_OPTS_D_TO_H: - - return EA_8BYTE; - - case INS_OPTS_S_TO_8BYTE: - case INS_OPTS_S_TO_4BYTE: - case INS_OPTS_4BYTE_TO_S: - case INS_OPTS_4BYTE_TO_D: - case INS_OPTS_S_TO_D: - case INS_OPTS_S_TO_H: - - return EA_4BYTE; - - case INS_OPTS_H_TO_S: - case INS_OPTS_H_TO_D: - - return EA_2BYTE; - - default: - assert(!" invalid 'conversion' value"); - return EA_UNKNOWN; - } -} - -// For the given 'size' and 'index' returns true if it specifies a valid index for a vector register of 'size' -/*static*/ bool emitter::isValidVectorIndex(emitAttr datasize, emitAttr elemsize, ssize_t index) -{ - assert(isValidVectorDatasize(datasize)); - assert(isValidVectorElemsize(elemsize)); - - bool result = false; - if (index >= 0) - { - if (datasize == EA_8BYTE) - { - switch (elemsize) - { - case EA_1BYTE: - result = (index < 8); - break; - case EA_2BYTE: - result = (index < 4); - break; - case EA_4BYTE: - result = (index < 2); - break; - case EA_8BYTE: - result = (index < 1); - break; - default: - unreached(); - break; - } - } - else if (datasize == EA_16BYTE) - { - switch (elemsize) - { - case EA_1BYTE: - result = (index < 16); - break; - case EA_2BYTE: - result = (index < 8); - break; - case EA_4BYTE: - result = (index < 4); - break; - case EA_8BYTE: - result = (index < 2); - break; - default: - unreached(); - break; - } - } - } - return result; -} - -/***************************************************************************** - * - * Add an instruction with no operands. - */ - -void emitter::emitIns(instruction ins) -{ - instrDesc* id = emitNewInstrSmall(EA_8BYTE); - insFormat fmt = emitInsFormat(ins); - - if (ins != INS_BREAKPOINT) - { - assert(fmt == IF_SN_0A); - } - - id->idIns(ins); - id->idInsFmt(fmt); - - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction with a single immediate value. - */ - -void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm) -{ - insFormat fmt = IF_NONE; - - /* Figure out the encoding format of the instruction */ - if (ins == INS_BREAKPOINT) - { - if ((imm & 0x0000ffff) == imm) - { - fmt = IF_SI_0A; - } - else - { - assert(!"Instruction cannot be encoded: IF_SI_0A"); - } - } - else - { - // fallback to emit SVE instructions. - return emitInsSve_I(ins, attr, imm); - } - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrSC(attr, imm); - - id->idIns(ins); - id->idInsFmt(fmt); - - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction referencing a single register. - */ - -void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt /* = INS_OPTS_NONE */) -{ - insFormat fmt = IF_NONE; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_br: - case INS_ret: - assert(isGeneralRegister(reg)); - fmt = IF_BR_1A; - break; - - case INS_dczva: - assert(isGeneralRegister(reg)); - assert(attr == EA_8BYTE); - fmt = IF_SR_1A; - break; - - case INS_mrs_tpid0: - fmt = IF_SR_1A; - break; - - default: - // fallback to emit SVE instructions. - return emitInsSve_R(ins, attr, reg, opt); - } - - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrSmall(attr); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idReg1(reg); - - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction referencing a register and a constant. - */ - -void emitter::emitIns_R_I(instruction ins, - emitAttr attr, - regNumber reg, - ssize_t imm, - insOpts opt, /* = INS_OPTS_NONE */ - insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */ - DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = GTF_EMPTY */)) -{ - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool canEncode = false; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - bitMaskImm bmi; - halfwordImm hwi; - byteShiftedImm bsi; - ssize_t notOfImm; - - case INS_tst: - assert(insOptsNone(opt)); - assert(isGeneralRegister(reg)); - bmi.immNRS = 0; - canEncode = canEncodeBitMaskImm(imm, size, &bmi); - if (canEncode) - { - imm = bmi.immNRS; - assert(isValidImmNRS(imm, size)); - fmt = IF_DI_1C; - } - break; - - case INS_movk: - case INS_movn: - case INS_movz: - assert(isValidGeneralDatasize(size)); - assert(insOptsNone(opt)); // No LSL here (you must use emitIns_R_I_I if a shift is needed) - assert(isGeneralRegister(reg)); - assert(isValidUimm<16>(imm)); - - hwi.immHW = 0; - hwi.immVal = imm; - assert(imm == emitDecodeHalfwordImm(hwi, size)); - - imm = hwi.immHWVal; - canEncode = true; - fmt = IF_DI_1B; - break; - - case INS_mov: - assert(isValidGeneralDatasize(size)); - assert(insOptsNone(opt)); // No explicit LSL here - // We will automatically determine the shift based upon the imm - - // First try the standard 'halfword immediate' imm(i16,hw) - hwi.immHWVal = 0; - canEncode = canEncodeHalfwordImm(imm, size, &hwi); - if (canEncode) - { - // uses a movz encoding - assert(isGeneralRegister(reg)); - imm = hwi.immHWVal; - assert(isValidImmHWVal(imm, size)); - fmt = IF_DI_1B; - break; - } - - // Next try the ones-complement form of 'halfword immediate' imm(i16,hw) - notOfImm = NOT_helper(imm, getBitWidth(size)); - canEncode = canEncodeHalfwordImm(notOfImm, size, &hwi); - if (canEncode) - { - assert(isGeneralRegister(reg)); - imm = hwi.immHWVal; - ins = INS_movn; // uses a movn encoding - assert(isValidImmHWVal(imm, size)); - fmt = IF_DI_1B; - break; - } - - // Finally try the 'bitmask immediate' imm(N,r,s) - bmi.immNRS = 0; - canEncode = canEncodeBitMaskImm(imm, size, &bmi); - if (canEncode) - { - assert(isGeneralRegisterOrSP(reg)); - reg = encodingSPtoZR(reg); - imm = bmi.immNRS; - assert(isValidImmNRS(imm, size)); - fmt = IF_DI_1D; - break; - } - else - { - assert(!"Instruction cannot be encoded: mov imm"); - } - - break; - - case INS_movi: - assert(isValidVectorDatasize(size)); - assert(isVectorRegister(reg)); - if (insOptsNone(opt) && (size == EA_8BYTE)) - { - opt = INS_OPTS_1D; - } - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - - if (elemsize == EA_8BYTE) - { - size_t uimm = imm; - ssize_t imm8 = 0; - unsigned pos = 0; - canEncode = true; - while (uimm != 0) - { - INT64 loByte = uimm & 0xFF; - if (((loByte == 0) || (loByte == 0xFF)) && (pos < 8)) - { - if (loByte == 0xFF) - { - imm8 |= (ssize_t{1} << pos); - } - uimm >>= 8; - pos++; - } - else - { - canEncode = false; - break; - } - } - imm = imm8; - assert(isValidUimm<8>(imm)); - fmt = IF_DV_1B; - break; - } - else - { - // Vector operation - - // No explicit LSL/MSL is used for the immediate - // We will automatically determine the shift based upon the value of imm - - // First try the standard 'byteShifted immediate' imm(i8,bySh) - bsi.immBSVal = 0; - canEncode = canEncodeByteShiftedImm(imm, elemsize, true, &bsi); - if (canEncode) - { - imm = bsi.immBSVal; - assert(isValidImmBSVal(imm, size)); - fmt = IF_DV_1B; - break; - } - - // Next try the ones-complement form of the 'immediate' imm(i8,bySh) - if ((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)) // Only EA_2BYTE or EA_4BYTE forms - { - notOfImm = NOT_helper(imm, getBitWidth(elemsize)); - canEncode = canEncodeByteShiftedImm(notOfImm, elemsize, true, &bsi); - if (canEncode) - { - imm = bsi.immBSVal; - ins = INS_mvni; // uses a mvni encoding - assert(isValidImmBSVal(imm, size)); - fmt = IF_DV_1B; - break; - } - } - } - break; - - case INS_orr: - case INS_bic: - case INS_mvni: - assert(isValidVectorDatasize(size)); - assert(isVectorRegister(reg)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms - - // Vector operation - - // No explicit LSL/MSL is used for the immediate - // We will automatically determine the shift based upon the value of imm - - // First try the standard 'byteShifted immediate' imm(i8,bySh) - bsi.immBSVal = 0; - canEncode = canEncodeByteShiftedImm(imm, elemsize, - (ins == INS_mvni), // mvni supports the ones shifting variant (aka MSL) - &bsi); - if (canEncode) - { - imm = bsi.immBSVal; - assert(isValidImmBSVal(imm, size)); - fmt = IF_DV_1B; - break; - } - break; - - case INS_cmp: - case INS_cmn: - assert(insOptsNone(opt)); - assert(isGeneralRegister(reg)); - - if (unsigned_abs(imm) <= 0x0fff) - { - if (imm < 0) - { - ins = insReverse(ins); - imm = -imm; - } - assert(isValidUimm<12>(imm)); - canEncode = true; - fmt = IF_DI_1A; - } - else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding - { - // Encoding will use a 12-bit left shift of the immediate - opt = INS_OPTS_LSL12; - if (imm < 0) - { - ins = insReverse(ins); - imm = -imm; - } - assert((imm & 0xfff) == 0); - imm >>= 12; - assert(isValidUimm<12>(imm)); - canEncode = true; - fmt = IF_DI_1A; - } - else - { - assert(!"Instruction cannot be encoded: IF_DI_1A"); - } - break; - - default: - // fallback to emit SVE instructions. - return emitInsSve_R_I(ins, attr, reg, imm, opt, sopt); - } // end switch (ins) - - assert(canEncode); - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrSC(attr, imm); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(reg); - -#ifdef DEBUG - id->idDebugOnlyInfo()->idMemCookie = targetHandle; - id->idDebugOnlyInfo()->idFlags = gtFlags; -#endif - - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction referencing a register and a floating point constant. - */ - -void emitter::emitIns_R_F( - instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt /* = INS_OPTS_NONE */) - -{ - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - ssize_t imm = 0; - bool canEncode = false; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - floatImm8 fpi; - - case INS_fcmp: - case INS_fcmpe: - assert(insOptsNone(opt)); - assert(isValidVectorElemsizeFloat(size)); - assert(isVectorRegister(reg)); - if (immDbl == 0.0) - { - canEncode = true; - fmt = IF_DV_1C; - } - break; - - case INS_fmov: - assert(isVectorRegister(reg)); - fpi.immFPIVal = 0; - canEncode = canEncodeFloatImm8(immDbl, &fpi); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsizeFloat(elemsize)); - assert(opt != INS_OPTS_1D); // Reserved encoding - - if (canEncode) - { - imm = fpi.immFPIVal; - assert((imm >= 0) && (imm <= 0xff)); - fmt = IF_DV_1B; - } - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(isValidVectorElemsizeFloat(size)); - - if (canEncode) - { - imm = fpi.immFPIVal; - assert((imm >= 0) && (imm <= 0xff)); - fmt = IF_DV_1A; - } - } - break; - - default: - // fallback to emit SVE instructions. - return emitInsSve_R_F(ins, attr, reg, immDbl, opt); - - } // end switch (ins) - - assert(canEncode); - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrSC(attr, imm); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(reg); - - dispIns(id); - appendToCurIG(id); -} - -//------------------------------------------------------------------------ -// emitIns_Mov: Emits a move instruction -// -// Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// dstReg -- The destination register -// srcReg -- The source register -// canSkip -- true if the move can be elided when dstReg == srcReg, otherwise false -// insOpts -- The instruction options -// -void emitter::emitIns_Mov( - instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) -{ - assert(IsMovInstruction(ins)); - - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_mov: - { - assert(insOptsNone(opt)); - - if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) - { - // These instructions have no side effect and can be skipped - return; - } - - // Check for the 'mov' aliases for the vector registers - if (isVectorRegister(dstReg)) - { - if (isVectorRegister(srcReg) && isValidVectorDatasize(size)) - { - return emitIns_R_R_R(INS_mov, size, dstReg, srcReg, srcReg); - } - else - { - return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0); - } - } - else - { - if (isVectorRegister(srcReg)) - { - assert(isGeneralRegister(dstReg)); - return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0); - } - } - - // Is this a MOV to/from SP instruction? - if ((dstReg == REG_SP) || (srcReg == REG_SP)) - { - assert(isGeneralRegisterOrSP(dstReg)); - assert(isGeneralRegisterOrSP(srcReg)); - dstReg = encodingSPtoZR(dstReg); - srcReg = encodingSPtoZR(srcReg); - fmt = IF_DR_2G; - } - else - { - assert(insOptsNone(opt)); - assert(isGeneralRegister(dstReg)); - assert(isGeneralRegisterOrZR(srcReg)); - fmt = IF_DR_2E; - } - break; - } - - case INS_sxtw: - { - assert((size == EA_8BYTE) || (size == EA_4BYTE)); - FALLTHROUGH; - } - - case INS_sxtb: - case INS_sxth: - case INS_uxtb: - case INS_uxth: - { - if (canSkip && (dstReg == srcReg)) - { - // There are scenarios such as in genCallInstruction where the sign/zero extension should be elided - return; - } - - assert(insOptsNone(opt)); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(dstReg)); - assert(isGeneralRegister(srcReg)); - fmt = IF_DR_2H; - break; - } - - case INS_fmov: - { - assert(isValidVectorElemsizeFloat(size)); - - if (canSkip && (dstReg == srcReg)) - { - // These instructions have no side effect and can be skipped - return; - } - - if (isVectorRegister(dstReg)) - { - if (isVectorRegister(srcReg)) - { - assert(insOptsNone(opt)); - fmt = IF_DV_2G; - } - else - { - assert(isGeneralRegister(srcReg)); - - // if the optional conversion specifier is not present we calculate it - if (opt == INS_OPTS_NONE) - { - opt = (size == EA_4BYTE) ? INS_OPTS_4BYTE_TO_S : INS_OPTS_8BYTE_TO_D; - } - assert(insOptsConvertIntToFloat(opt)); - - fmt = IF_DV_2I; - } - } - else - { - assert(isGeneralRegister(dstReg)); - assert(isVectorRegister(srcReg)); - - // if the optional conversion specifier is not present we calculate it - if (opt == INS_OPTS_NONE) - { - opt = (size == EA_4BYTE) ? INS_OPTS_S_TO_4BYTE : INS_OPTS_D_TO_8BYTE; - } - assert(insOptsConvertFloatToInt(opt)); - - fmt = IF_DV_2H; - } - break; - } - - default: - { - unreached(); - } - } - - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrSmall(attr); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(dstReg); - id->idReg2(srcReg); - - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction referencing two registers - */ - -void emitter::emitIns_R_R(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - insOpts opt /* = INS_OPTS_NONE */, - insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) -{ - if (IsMovInstruction(ins)) - { - assert(!"Please use emitIns_Mov() to correctly handle move elision"); - emitIns_Mov(ins, attr, reg1, reg2, /* canSkip */ false, opt); - } - - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_dup: - // Vector operation - assert(insOptsAnyArrangement(opt)); - assert(isVectorRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_2C; - break; - - case INS_abs: - case INS_not: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - if (ins == INS_not) - { - assert(isValidVectorDatasize(size)); - // Bitwise behavior is independent of element size, but is always encoded as 1 Byte - opt = optMakeArrangement(size, EA_1BYTE); - } - if (insOptsNone(opt)) - { - // Scalar operation - assert(size == EA_8BYTE); // Only type D is supported - fmt = IF_DV_2L; - } - else - { - // Vector operation - assert(insOptsAnyArrangement(opt)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - fmt = IF_DV_2M; - } - break; - - case INS_mvn: - case INS_neg: - if (isVectorRegister(reg1)) - { - assert(isVectorRegister(reg2)); - if (ins == INS_mvn) - { - assert(isValidVectorDatasize(size)); - // Bitwise behavior is independent of element size, but is always encoded as 1 Byte - opt = optMakeArrangement(size, EA_1BYTE); - } - if (insOptsNone(opt)) - { - // Scalar operation - assert(size == EA_8BYTE); // Only type D is supported - fmt = IF_DV_2L; - } - else - { - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - fmt = IF_DV_2M; - } - break; - } - FALLTHROUGH; - - case INS_negs: - assert(insOptsNone(opt)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - fmt = IF_DR_2E; - break; - - case INS_sxtl: - case INS_sxtl2: - case INS_uxtl: - case INS_uxtl2: - return emitIns_R_R_I(ins, size, reg1, reg2, 0, opt); - - case INS_cls: - case INS_clz: - case INS_rbit: - case INS_rev16: - case INS_rev32: - case INS_cnt: - if (isVectorRegister(reg1)) - { - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - if ((ins == INS_cls) || (ins == INS_clz)) - { - assert(elemsize != EA_8BYTE); // No encoding for type D - } - else if (ins == INS_rev32) - { - assert((elemsize == EA_2BYTE) || (elemsize == EA_1BYTE)); - } - else - { - assert(elemsize == EA_1BYTE); // Only supports 8B or 16B - } - fmt = IF_DV_2M; - break; - } - if (ins == INS_cnt) - { - // Doesn't have general register version(s) - break; - } - - FALLTHROUGH; - - case INS_rev: - assert(insOptsNone(opt)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - if (ins == INS_rev32) - { - assert(size == EA_8BYTE); - } - else - { - assert(isValidGeneralDatasize(size)); - } - fmt = IF_DR_2G; - break; - - case INS_addv: - case INS_saddlv: - case INS_smaxv: - case INS_sminv: - case INS_uaddlv: - case INS_umaxv: - case INS_uminv: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - assert((opt != INS_OPTS_2S) && (opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // Reserved encodings - fmt = IF_DV_2T; - break; - - case INS_rev64: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(elemsize != EA_8BYTE); // No encoding for type D - fmt = IF_DV_2M; - break; - - case INS_sqxtn: - case INS_sqxtun: - case INS_uqxtn: - if (insOptsNone(opt)) - { - // Scalar operation - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorElemsize(size)); - assert(size != EA_8BYTE); // The encoding size = 11 is reserved. - fmt = IF_DV_2L; - break; - } - FALLTHROUGH; - - case INS_xtn: - // Vector operation - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(size == EA_8BYTE); - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved - fmt = IF_DV_2M; - break; - - case INS_sqxtn2: - case INS_sqxtun2: - case INS_uqxtn2: - case INS_xtn2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(size == EA_16BYTE); - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved - fmt = IF_DV_2M; - break; - - case INS_ldar: - case INS_ldapr: - case INS_ldaxr: - case INS_ldxr: - case INS_stlr: - assert(isValidGeneralDatasize(size)); - - FALLTHROUGH; - - case INS_ldarb: - case INS_ldaprb: - case INS_ldaxrb: - case INS_ldxrb: - case INS_ldarh: - case INS_ldaprh: - case INS_ldaxrh: - case INS_ldxrh: - case INS_stlrb: - case INS_stlrh: - assert(isValidGeneralLSDatasize(size)); - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrSP(reg2)); - assert(insOptsNone(opt)); - - reg2 = encodingSPtoZR(reg2); - - fmt = IF_LS_2A; - break; - - case INS_ldr: - case INS_ldrb: - case INS_ldrh: - case INS_ldrsb: - case INS_ldrsh: - case INS_ldrsw: - case INS_str: - case INS_strb: - case INS_strh: - case INS_cmn: - case INS_tst: - assert(insOptsNone(opt)); - emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_OPTS_NONE); - return; - - case INS_cmp: - emitIns_R_R_I(ins, attr, reg1, reg2, 0, opt); - return; - - case INS_staddb: - emitIns_R_R_R(INS_ldaddb, attr, reg1, REG_ZR, reg2); - return; - case INS_staddlb: - emitIns_R_R_R(INS_ldaddlb, attr, reg1, REG_ZR, reg2); - return; - case INS_staddh: - emitIns_R_R_R(INS_ldaddh, attr, reg1, REG_ZR, reg2); - return; - case INS_staddlh: - emitIns_R_R_R(INS_ldaddlh, attr, reg1, REG_ZR, reg2); - return; - case INS_stadd: - emitIns_R_R_R(INS_ldadd, attr, reg1, REG_ZR, reg2); - return; - case INS_staddl: - emitIns_R_R_R(INS_ldaddl, attr, reg1, REG_ZR, reg2); - return; - - case INS_fcmp: - case INS_fcmpe: - assert(insOptsNone(opt)); - assert(isValidVectorElemsizeFloat(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - fmt = IF_DV_2K; - break; - - case INS_fcvtns: - case INS_fcvtnu: - case INS_fcvtas: - case INS_fcvtau: - case INS_fcvtps: - case INS_fcvtpu: - case INS_fcvtms: - case INS_fcvtmu: - case INS_fcvtzs: - case INS_fcvtzu: - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsizeFloat(elemsize)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_2A; - } - else - { - // Scalar operation - assert(isVectorRegister(reg2)); - if (isVectorRegister(reg1)) - { - assert(insOptsNone(opt)); - assert(isValidVectorElemsizeFloat(size)); - fmt = IF_DV_2G; - } - else - { - assert(isGeneralRegister(reg1)); - assert(insOptsConvertFloatToInt(opt)); - assert(isValidVectorElemsizeFloat(size)); - fmt = IF_DV_2H; - } - } - break; - - case INS_fcvtl: - case INS_fcvtn: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(size == EA_8BYTE); - assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); - fmt = IF_DV_2A; - break; - - case INS_fcvtl2: - case INS_fcvtn2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(size == EA_16BYTE); - assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S)); - fmt = IF_DV_2A; - break; - - case INS_fcvtxn: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(size == EA_8BYTE); - assert(opt == INS_OPTS_2S); - fmt = IF_DV_2A; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(size == EA_4BYTE); - fmt = IF_DV_2G; - } - break; - - case INS_fcvtxn2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(size == EA_16BYTE); - assert(opt == INS_OPTS_4S); - fmt = IF_DV_2A; - break; - - case INS_scvtf: - case INS_ucvtf: - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsizeFloat(elemsize)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_2A; - } - else - { - // Scalar operation - assert(isVectorRegister(reg1)); - if (isVectorRegister(reg2)) - { - assert(insOptsNone(opt)); - assert(isValidVectorElemsizeFloat(size)); - fmt = IF_DV_2G; - } - else - { - assert(isGeneralRegister(reg2)); - assert(insOptsConvertIntToFloat(opt)); - assert(isValidVectorElemsizeFloat(size)); - fmt = IF_DV_2I; - } - } - break; - - case INS_fabs: - case INS_fneg: - case INS_fsqrt: - case INS_frinta: - case INS_frinti: - case INS_frintm: - case INS_frintn: - case INS_frintp: - case INS_frintx: - case INS_frintz: - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsizeFloat(elemsize)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_2A; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(isValidVectorElemsizeFloat(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - fmt = IF_DV_2G; - } - break; - - case INS_faddp: - case INS_fmaxnmp: - case INS_fmaxp: - case INS_fminnmp: - case INS_fminp: - // Scalar operation - assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_2D))); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - fmt = IF_DV_2Q; - break; - - case INS_fmaxnmv: - case INS_fmaxv: - case INS_fminnmv: - case INS_fminv: - assert(size == EA_16BYTE); - assert(opt == INS_OPTS_4S); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - fmt = IF_DV_2R; - break; - - case INS_addp: - assert(size == EA_16BYTE); - assert(opt == INS_OPTS_2D); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - fmt = IF_DV_2S; - break; - - case INS_fcvt: - assert(insOptsConvertFloatToFloat(opt)); - assert(isValidVectorFcvtsize(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - fmt = IF_DV_2J; - break; - - case INS_cmeq: - case INS_cmge: - case INS_cmgt: - case INS_cmle: - case INS_cmlt: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - fmt = IF_DV_2M; - } - else - { - // Scalar operation - assert(size == EA_8BYTE); - assert(insOptsNone(opt)); - fmt = IF_DV_2L; - } - break; - - case INS_fcmeq: - case INS_fcmge: - case INS_fcmgt: - case INS_fcmle: - case INS_fcmlt: - case INS_frecpe: - case INS_frsqrte: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsizeFloat(elemsize)); // Only Double/Float supported - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_2A; - } - else - { - // Scalar operation - assert(isValidScalarDatasize(size)); // Only Double/Float supported - assert(insOptsNone(opt)); - fmt = IF_DV_2G; - } - break; - - case INS_aesd: - case INS_aese: - case INS_aesmc: - case INS_aesimc: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - elemsize = optGetElemsize(opt); - assert(elemsize == EA_1BYTE); - fmt = IF_DV_2P; - break; - - case INS_sha1h: - assert(insOptsNone(opt)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - fmt = IF_DV_2U; - break; - - case INS_sha256su0: - case INS_sha1su1: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - elemsize = optGetElemsize(opt); - assert(elemsize == EA_4BYTE); - fmt = IF_DV_2P; - break; - - case INS_ld2: - case INS_ld3: - case INS_ld4: - case INS_st2: - case INS_st3: - case INS_st4: - assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1 - FALLTHROUGH; - - case INS_ld1: - case INS_ld1_2regs: - case INS_ld1_3regs: - case INS_ld1_4regs: - case INS_st1: - case INS_st1_2regs: - case INS_st1_3regs: - case INS_st1_4regs: - case INS_ld1r: - case INS_ld2r: - case INS_ld3r: - case INS_ld4r: - assert(isVectorRegister(reg1)); - assert(isGeneralRegisterOrSP(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - - // Load/Store multiple structures base register - // Load single structure and replicate base register - reg2 = encodingSPtoZR(reg2); - fmt = IF_LS_2D; - break; - - case INS_urecpe: - case INS_ursqrte: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(elemsize == EA_4BYTE); - fmt = IF_DV_2A; - break; - - case INS_frecpx: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidScalarDatasize(size)); - assert(insOptsNone(opt)); - fmt = IF_DV_2G; - break; - - case INS_sadalp: - case INS_saddlp: - case INS_uadalp: - case INS_uaddlp: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isValidArrangement(size, opt)); - assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved - fmt = IF_DV_2T; - break; - - case INS_sqabs: - case INS_sqneg: - case INS_suqadd: - case INS_usqadd: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved - fmt = IF_DV_2M; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(isValidVectorElemsize(size)); - fmt = IF_DV_2L; - } - break; - - default: - // fallback to emit SVE instructions. - return emitInsSve_R_R(ins, attr, reg1, reg2, opt, sopt); - - } // end switch (ins) - - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrSmall(attr); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(reg1); - id->idReg2(reg2); - - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction referencing a register and two constants. - */ - -void emitter::emitIns_R_I_I(instruction ins, - emitAttr attr, - regNumber reg, - ssize_t imm1, - ssize_t imm2, - insOpts opt /* = INS_OPTS_NONE */ - DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = 0 */)) -{ - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - size_t immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - bool canEncode; - halfwordImm hwi; - - case INS_mov: - ins = INS_movz; // INS_mov with LSL is an alias for INS_movz LSL - FALLTHROUGH; - - case INS_movk: - case INS_movn: - case INS_movz: - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg)); - assert(isValidUimm<16>(imm1)); - assert(insOptsLSL(opt)); // Must be INS_OPTS_LSL - - if (size == EA_8BYTE) - { - assert((imm2 == 0) || (imm2 == 16) || // shift amount: 0, 16, 32 or 48 - (imm2 == 32) || (imm2 == 48)); - } - else // EA_4BYTE - { - assert((imm2 == 0) || (imm2 == 16)); // shift amount: 0 or 16 - } - - hwi.immHWVal = 0; - - switch (imm2) - { - case 0: - hwi.immHW = 0; - canEncode = true; - break; - - case 16: - hwi.immHW = 1; - canEncode = true; - break; - - case 32: - hwi.immHW = 2; - canEncode = true; - break; - - case 48: - hwi.immHW = 3; - canEncode = true; - break; - - default: - canEncode = false; - } - - if (canEncode) - { - hwi.immVal = imm1; - - immOut = hwi.immHWVal; - assert(isValidImmHWVal(immOut, size)); - fmt = IF_DI_1B; - } - break; - - default: - // fallback to emit SVE instructions. - return emitInsSve_R_I_I(ins, attr, reg, imm1, imm2, opt); - - } // end switch (ins) - - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrSC(attr, immOut); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(reg); - -#ifdef DEBUG - id->idDebugOnlyInfo()->idFlags = gtFlags; - id->idDebugOnlyInfo()->idMemCookie = targetHandle; -#endif - - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction referencing two registers and a constant. - */ - -void emitter::emitIns_R_R_I(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - ssize_t imm, - insOpts opt /* = INS_OPTS_NONE */, - insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) -{ - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool isLdSt = false; - bool isLdrStr = false; - bool isSIMD = false; - bool isAddSub = false; - bool setFlags = false; - unsigned scale = 0; - bool unscaledOp = false; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - bool canEncode; - bitMaskImm bmi; - unsigned registerListSize; - bool isRightShift; - - case INS_mov: - // Check for the 'mov' aliases for the vector registers - assert(insOptsNone(opt)); - assert(isValidVectorElemsize(size)); - elemsize = size; - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - - if (isVectorRegister(reg1)) - { - if (isGeneralRegisterOrZR(reg2)) - { - fmt = IF_DV_2C; // Alias for 'ins' - break; - } - else if (isVectorRegister(reg2)) - { - fmt = IF_DV_2E; // Alias for 'dup' - break; - } - } - else // isGeneralRegister(reg1) - { - assert(isGeneralRegister(reg1)); - if (isVectorRegister(reg2)) - { - fmt = IF_DV_2B; // Alias for 'umov' - break; - } - } - assert(!" invalid INS_mov operands"); - break; - - case INS_lsl: - case INS_lsr: - case INS_asr: - assert(insOptsNone(opt)); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - assert(isValidImmShift(imm, size)); - fmt = IF_DI_2D; - break; - - case INS_ror: - assert(insOptsNone(opt)); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - assert(isValidImmShift(imm, size)); - fmt = IF_DI_2B; - break; - - case INS_shl: - case INS_sli: - case INS_sri: - case INS_srshr: - case INS_srsra: - case INS_sshr: - case INS_ssra: - case INS_urshr: - case INS_ursra: - case INS_ushr: - case INS_usra: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - isRightShift = emitInsIsVectorRightShift(ins); - - assert(!isRightShift || - (imm != 0 && "instructions for vector right-shift do not allow zero as an immediate value")); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsize(elemsize)); - assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_2O; - break; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(size == EA_8BYTE); // only supported size - assert(isValidVectorShiftAmount(imm, size, isRightShift)); - fmt = IF_DV_2N; - } - break; - - case INS_sqshl: - case INS_uqshl: - case INS_sqshlu: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - isRightShift = emitInsIsVectorRightShift(ins); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_1D); // The encoding immh = 1xxx, Q = 0 is reserved - elemsize = optGetElemsize(opt); - assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); - fmt = IF_DV_2O; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(isValidVectorElemsize(size)); - assert(isValidVectorShiftAmount(imm, size, isRightShift)); - fmt = IF_DV_2N; - } - break; - - case INS_sqrshrn: - case INS_sqrshrun: - case INS_sqshrn: - case INS_sqshrun: - case INS_uqrshrn: - case INS_uqshrn: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - isRightShift = emitInsIsVectorRightShift(ins); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidArrangement(size, opt)); - assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding immh = 1xxx, Q = x is reserved - elemsize = optGetElemsize(opt); - assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); - fmt = IF_DV_2O; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(isValidVectorElemsize(size)); - assert(size != EA_8BYTE); // The encoding immh = 1xxx is reserved - assert(isValidVectorShiftAmount(imm, size, isRightShift)); - fmt = IF_DV_2N; - } - break; - - case INS_sxtl: - case INS_uxtl: - assert(imm == 0); - FALLTHROUGH; - - case INS_rshrn: - case INS_shrn: - case INS_sshll: - case INS_ushll: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - isRightShift = emitInsIsVectorRightShift(ins); - // Vector operation - assert(size == EA_8BYTE); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(elemsize != EA_8BYTE); // Reserved encodings - assert(isValidVectorElemsize(elemsize)); - assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); - fmt = IF_DV_2O; - break; - - case INS_sxtl2: - case INS_uxtl2: - assert(imm == 0); - FALLTHROUGH; - - case INS_rshrn2: - case INS_shrn2: - case INS_sqrshrn2: - case INS_sqrshrun2: - case INS_sqshrn2: - case INS_sqshrun2: - case INS_sshll2: - case INS_uqrshrn2: - case INS_uqshrn2: - case INS_ushll2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - isRightShift = emitInsIsVectorRightShift(ins); - - // Vector operation - assert(size == EA_16BYTE); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(elemsize != EA_8BYTE); // The encoding immh = 1xxx, Q = x is reserved - assert(isValidVectorElemsize(elemsize)); - assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); - fmt = IF_DV_2O; - break; - - case INS_mvn: - case INS_neg: - case INS_negs: - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - - if (imm == 0) - { - assert(insOptsNone(opt)); // a zero imm, means no alu shift kind - - fmt = IF_DR_2E; - } - else - { - if (ins == INS_mvn) - { - assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind - } - else // neg or negs - { - assert(insOptsAluShift(opt)); // a non-zero imm, must select shift kind, can't use ROR - } - assert(isValidImmShift(imm, size)); - fmt = IF_DR_2F; - } - break; - - case INS_tst: - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegister(reg2)); - - if (insOptsAnyShift(opt)) - { - assert(isValidImmShift(imm, size) && (imm != 0)); - fmt = IF_DR_2B; - } - else - { - assert(insOptsNone(opt)); // a zero imm, means no alu shift kind - assert(imm == 0); - fmt = IF_DR_2A; - } - break; - - case INS_cmp: - case INS_cmn: - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegisterOrSP(reg1)); - assert(isGeneralRegister(reg2)); - - reg1 = encodingSPtoZR(reg1); - if (insOptsAnyExtend(opt)) - { - assert((imm >= 0) && (imm <= 4)); - - fmt = IF_DR_2C; - } - else if (imm == 0) - { - assert(insOptsNone(opt)); // a zero imm, means no alu shift kind - - fmt = IF_DR_2A; - } - else - { - assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind - assert(isValidImmShift(imm, size)); - fmt = IF_DR_2B; - } - break; - - case INS_ands: - case INS_and: - case INS_eor: - case INS_orr: - assert(insOptsNone(opt)); - assert(isGeneralRegister(reg2)); - if (ins == INS_ands) - { - assert(isGeneralRegister(reg1)); - } - else - { - assert(isGeneralRegisterOrSP(reg1)); - reg1 = encodingSPtoZR(reg1); - } - - bmi.immNRS = 0; - canEncode = canEncodeBitMaskImm(imm, size, &bmi); - if (canEncode) - { - imm = bmi.immNRS; - assert(isValidImmNRS(imm, size)); - fmt = IF_DI_2C; - } - break; - - case INS_dup: // by element, imm selects the element of reg2 - assert(isVectorRegister(reg1)); - if (isVectorRegister(reg2)) - { - if (insOptsAnyArrangement(opt)) - { - // The size and opt were modified to be based on the - // return type but the immediate is based on the operand - // which can be of a larger size. As such, we don't - // assert the index is valid here and instead do it in - // codegen. - - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsize(elemsize)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_2D; - break; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - elemsize = size; - assert(isValidVectorElemsize(elemsize)); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - fmt = IF_DV_2E; - break; - } - } - FALLTHROUGH; - - case INS_ins: // (MOV from general) - assert(insOptsNone(opt)); - assert(isValidVectorElemsize(size)); - assert(isVectorRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - elemsize = size; - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - fmt = IF_DV_2C; - break; - - case INS_umov: // (MOV to general) - assert(insOptsNone(opt)); - assert(isValidVectorElemsize(size)); - assert(isGeneralRegister(reg1)); - assert(isVectorRegister(reg2)); - elemsize = size; - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - fmt = IF_DV_2B; - break; - - case INS_smov: - assert(insOptsNone(opt)); - assert(isValidVectorElemsize(size)); - assert(size != EA_8BYTE); // no encoding, use INS_umov - assert(isGeneralRegister(reg1)); - assert(isVectorRegister(reg2)); - elemsize = size; - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - fmt = IF_DV_2B; - break; - - case INS_add: - case INS_sub: - setFlags = false; - isAddSub = true; - break; - - case INS_adds: - case INS_subs: - setFlags = true; - isAddSub = true; - break; - - case INS_ldrsb: - case INS_ldursb: - // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register - assert(isValidGeneralDatasize(size)); - unscaledOp = (ins == INS_ldursb); - scale = 0; - isLdSt = true; - break; - - case INS_ldrsh: - case INS_ldursh: - // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register - assert(isValidGeneralDatasize(size)); - unscaledOp = (ins == INS_ldursh); - scale = 1; - isLdSt = true; - break; - - case INS_ldrsw: - case INS_ldursw: - // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register - assert(size == EA_8BYTE); - unscaledOp = (ins == INS_ldursw); - scale = 2; - isLdSt = true; - break; - - case INS_ldrb: - case INS_strb: - // size is ignored - unscaledOp = false; - scale = 0; - isLdSt = true; - break; - - case INS_ldapurb: - case INS_stlurb: - case INS_ldurb: - case INS_sturb: - // size is ignored - unscaledOp = true; - scale = 0; - isLdSt = true; - break; - - case INS_ldrh: - case INS_strh: - // size is ignored - unscaledOp = false; - scale = 1; - isLdSt = true; - break; - - case INS_ldurh: - case INS_ldapurh: - case INS_sturh: - case INS_stlurh: - // size is ignored - unscaledOp = true; - scale = 0; - isLdSt = true; - break; - - case INS_ldr: - case INS_str: - // Is the target a vector register? - if (isVectorRegister(reg1)) - { - assert(isValidVectorLSDatasize(size)); - assert(isGeneralRegisterOrSP(reg2)); - isSIMD = true; - } - else - { - assert(isValidGeneralDatasize(size)); - } - unscaledOp = false; - scale = NaturalScale_helper(size); - isLdSt = true; - isLdrStr = true; - break; - - case INS_ldur: - case INS_stur: - case INS_ldapur: - case INS_stlur: - // Is the target a vector register? - if (isVectorRegister(reg1)) - { - assert(isValidVectorLSDatasize(size)); - assert(isGeneralRegisterOrSP(reg2)); - isSIMD = true; - } - else - { - assert(isValidGeneralDatasize(size)); - } - unscaledOp = true; - scale = 0; - isLdSt = true; - break; - - case INS_ld2: - case INS_ld3: - case INS_ld4: - case INS_st2: - case INS_st3: - case INS_st4: - assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1 - FALLTHROUGH; - - case INS_ld1: - case INS_ld1_2regs: - case INS_ld1_3regs: - case INS_ld1_4regs: - case INS_st1: - case INS_st1_2regs: - case INS_st1_3regs: - case INS_st1_4regs: - assert(isVectorRegister(reg1)); - assert(isGeneralRegisterOrSP(reg2)); - - reg2 = encodingSPtoZR(reg2); - - if (insOptsAnyArrangement(opt)) - { - registerListSize = insGetRegisterListSize(ins); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - assert((size * registerListSize) == imm); - - // Load/Store multiple structures post-indexed by an immediate - fmt = IF_LS_2E; - } - else - { - assert(insOptsNone(opt)); - assert((ins != INS_ld1_2regs) && (ins != INS_ld1_3regs) && (ins != INS_ld1_4regs) && - (ins != INS_st1_2regs) && (ins != INS_st1_3regs) && (ins != INS_st1_4regs)); - - elemsize = size; - assert(isValidVectorElemsize(elemsize)); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - - // Load/Store single structure base register - fmt = IF_LS_2F; - } - break; - - case INS_ld1r: - case INS_ld2r: - case INS_ld3r: - case INS_ld4r: - assert(isVectorRegister(reg1)); - assert(isGeneralRegisterOrSP(reg2)); - - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - - elemsize = optGetElemsize(opt); - registerListSize = insGetRegisterListSize(ins); - assert((elemsize * registerListSize) == imm); - - // Load single structure and replicate post-indexed by an immediate - reg2 = encodingSPtoZR(reg2); - fmt = IF_LS_2E; - break; - - default: - // fallback to emit SVE instructions. - return emitInsSve_R_R_I(ins, attr, reg1, reg2, imm, opt, sopt); - - } // end switch (ins) - - if (isLdSt) - { - assert(!isAddSub); - - if (isSIMD) - { - assert(isValidVectorLSDatasize(size)); - assert(isVectorRegister(reg1)); - assert((scale >= 0) && (scale <= 4)); - } - else - { - assert(isValidGeneralLSDatasize(size)); - assert(isGeneralRegisterOrZR(reg1)); - assert((scale >= 0) && (scale <= 3)); - } - - assert(isGeneralRegisterOrSP(reg2)); - - // Load/Store reserved encodings: - if (insOptsIndexed(opt)) - { - assert(reg1 != reg2); - } - - reg2 = encodingSPtoZR(reg2); - - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (imm == 0 || EA_IS_CNS_TLSGD_RELOC(attr)) - { - assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero - - fmt = IF_LS_2A; - } - else if (insOptsIndexed(opt) || unscaledOp || (imm < 0) || ((imm & mask) != 0)) - { - if (isValidSimm<9>(imm)) - { - fmt = IF_LS_2C; - } - else - { - assert(!"Instruction cannot be encoded: IF_LS_2C"); - } - } - else if (imm > 0) - { - assert(insOptsNone(opt)); - assert(!unscaledOp); - - if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - - fmt = IF_LS_2B; - } - else - { - assert(!"Instruction cannot be encoded: IF_LS_2B"); - } - } - - // Try to optimize a load/store with an alternative instruction. - if (isLdrStr && emitComp->opts.OptimizationEnabled() && - OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, false, -1, -1 DEBUG_ARG(false))) - { - return; - } - } - else if (isAddSub) - { - assert(!isLdSt); - assert(insOptsNone(opt)); - - if (setFlags) // Can't encode SP with setFlags - { - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - } - else - { - assert(isGeneralRegisterOrSP(reg1)); - assert(isGeneralRegisterOrSP(reg2)); - - // Is it just a mov? - if (imm == 0) - { - emitIns_Mov(INS_mov, attr, reg1, reg2, /* canSkip */ true); - return; - } - - reg1 = encodingSPtoZR(reg1); - reg2 = encodingSPtoZR(reg2); - } - - if (unsigned_abs(imm) <= 0x0fff) - { - if (imm < 0) - { - ins = insReverse(ins); - imm = -imm; - } - assert(isValidUimm<12>(imm)); - fmt = IF_DI_2A; - } - else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding - { - // Encoding will use a 12-bit left shift of the immediate - opt = INS_OPTS_LSL12; - if (imm < 0) - { - ins = insReverse(ins); - imm = -imm; - } - assert((imm & 0xfff) == 0); - imm >>= 12; - assert(isValidUimm<12>(imm)); - fmt = IF_DI_2A; - } - else - { - assert(!"Instruction cannot be encoded: IF_DI_2A"); - } - } - - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrSC(attr, imm); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(reg1); - id->idReg2(reg2); - - if (EA_IS_CNS_TLSGD_RELOC(attr)) - { - assert(imm != 0); - id->idSetTlsGD(); - } - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction referencing two registers and a floating point constant. - */ - -void emitter::emitIns_R_R_F( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt /* = INS_OPTS_NONE */) -{ - // Currently, only SVE instructions use this format. - emitInsSve_R_R_F(ins, attr, reg1, reg2, immDbl, opt); -} - -/***************************************************************************** -* -* Add an instruction referencing two registers and a constant. -* Also checks for a large immediate that needs a second instruction -* and will load it in reg1 -* -* - Supports instructions: add, adds, sub, subs, and, ands, eor and orr -* - Requires that reg1 is a general register and not SP or ZR -* - Requires that reg1 != reg2 -*/ -void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm) -{ - assert(isGeneralRegister(reg1)); - assert(reg1 != reg2); - - bool immFits = true; - - switch (ins) - { - case INS_add: - case INS_adds: - case INS_sub: - case INS_subs: - immFits = emitter::emitIns_valid_imm_for_add(imm, attr); - break; - - case INS_ands: - case INS_and: - case INS_eor: - case INS_orr: - immFits = emitter::emitIns_valid_imm_for_alu(imm, attr); - break; - - default: - assert(!"Unsupported instruction in emitIns_R_R_Imm"); - } - - if (immFits) - { - emitIns_R_R_I(ins, attr, reg1, reg2, imm); - } - else - { - // Load 'imm' into the reg1 register - // then issue: 'ins' reg1, reg2, reg1 - // - codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm); - emitIns_R_R_R(ins, attr, reg1, reg2, reg1); - } -} - -/***************************************************************************** - * - * Add an instruction referencing three registers. - */ - -void emitter::emitIns_R_R_R(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - insOpts opt /* = INS_OPTS_NONE */, - insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) -{ - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_mul: - case INS_smull: - case INS_umull: - if (insOptsAnyArrangement(opt)) - { - // ASIMD instruction - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(isValidArrangement(size, opt)); - assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved - fmt = IF_DV_3A; - break; - } - // Base instruction - FALLTHROUGH; - - case INS_lsl: - case INS_lsr: - case INS_asr: - case INS_ror: - case INS_adc: - case INS_adcs: - case INS_sbc: - case INS_sbcs: - case INS_udiv: - case INS_sdiv: - case INS_mneg: - case INS_smnegl: - case INS_smulh: - case INS_umnegl: - case INS_umulh: - case INS_lslv: - case INS_lsrv: - case INS_asrv: - case INS_rorv: - case INS_crc32b: - case INS_crc32h: - case INS_crc32w: - case INS_crc32x: - case INS_crc32cb: - case INS_crc32ch: - case INS_crc32cw: - case INS_crc32cx: - assert(insOptsNone(opt)); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - assert(isGeneralRegister(reg3)); - fmt = IF_DR_3A; - break; - - case INS_add: - case INS_sub: - if (isVectorRegister(reg1)) - { - // ASIMD instruction - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(opt != INS_OPTS_1D); // Reserved encoding - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - fmt = IF_DV_3A; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(size == EA_8BYTE); - fmt = IF_DV_3E; - } - break; - } - // Base instruction - FALLTHROUGH; - - case INS_adds: - case INS_subs: - emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, opt); - return; - - case INS_cmeq: - case INS_cmge: - case INS_cmgt: - case INS_cmhi: - case INS_cmhs: - case INS_cmtst: - case INS_srshl: - case INS_sshl: - case INS_urshl: - case INS_ushl: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved - fmt = IF_DV_3A; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(size == EA_8BYTE); // Only Int64/UInt64 supported - fmt = IF_DV_3E; - } - break; - - case INS_sqadd: - case INS_sqrshl: - case INS_sqshl: - case INS_sqsub: - case INS_uqadd: - case INS_uqrshl: - case INS_uqshl: - case INS_uqsub: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved - fmt = IF_DV_3A; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(isValidVectorElemsize(size)); - fmt = IF_DV_3E; - } - break; - - case INS_fcmeq: - case INS_fcmge: - case INS_fcmgt: - case INS_frecps: - case INS_frsqrts: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert((elemsize == EA_8BYTE) || (elemsize == EA_4BYTE)); // Only Double/Float supported - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_3B; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert((size == EA_8BYTE) || (size == EA_4BYTE)); // Only Double/Float supported - fmt = IF_DV_3D; - } - break; - - case INS_mla: - case INS_mls: - case INS_saba: - case INS_sabd: - case INS_shadd: - case INS_shsub: - case INS_smax: - case INS_smaxp: - case INS_smin: - case INS_sminp: - case INS_srhadd: - case INS_uaba: - case INS_uabd: - case INS_uhadd: - case INS_uhsub: - case INS_umax: - case INS_umaxp: - case INS_umin: - case INS_uminp: - case INS_urhadd: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(isValidArrangement(size, opt)); - assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved - fmt = IF_DV_3A; - break; - - case INS_addp: - case INS_uzp1: - case INS_uzp2: - case INS_zip1: - case INS_zip2: - case INS_trn1: - case INS_trn2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved - fmt = IF_DV_3A; - break; - - case INS_mov: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(reg2 == reg3); - assert(isValidVectorDatasize(size)); - // INS_mov is an alias for INS_orr (vector register) - if (opt == INS_OPTS_NONE) - { - elemsize = EA_1BYTE; - opt = optMakeArrangement(size, elemsize); - } - assert(isValidArrangement(size, opt)); - fmt = IF_DV_3C; - break; - - case INS_and: - case INS_bic: - case INS_eor: - case INS_orr: - case INS_orn: - case INS_tbl: - case INS_tbl_2regs: - case INS_tbl_3regs: - case INS_tbl_4regs: - case INS_tbx: - case INS_tbx_2regs: - case INS_tbx_3regs: - case INS_tbx_4regs: - if (isVectorRegister(reg1)) - { - assert(isValidVectorDatasize(size)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (opt == INS_OPTS_NONE) - { - elemsize = EA_1BYTE; - opt = optMakeArrangement(size, elemsize); - } - assert(isValidArrangement(size, opt)); - fmt = IF_DV_3C; - break; - } - FALLTHROUGH; - - case INS_ands: - case INS_bics: - case INS_eon: - emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE); - return; - - case INS_bsl: - case INS_bit: - case INS_bif: - assert(isValidVectorDatasize(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (opt == INS_OPTS_NONE) - { - elemsize = EA_1BYTE; - opt = optMakeArrangement(size, elemsize); - } - assert(isValidArrangement(size, opt)); - fmt = IF_DV_3C; - break; - - case INS_fadd: - case INS_fsub: - case INS_fdiv: - case INS_fmax: - case INS_fmaxnm: - case INS_fmin: - case INS_fminnm: - case INS_fabd: - case INS_fmul: - case INS_fmulx: - case INS_facge: - case INS_facgt: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsizeFloat(elemsize)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_3B; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(isValidScalarDatasize(size)); - fmt = IF_DV_3D; - } - break; - - case INS_fnmul: - // Scalar operation - assert(insOptsNone(opt)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(isValidScalarDatasize(size)); - fmt = IF_DV_3D; - break; - - case INS_faddp: - case INS_fmaxnmp: - case INS_fmaxp: - case INS_fminnmp: - case INS_fminp: - - case INS_fmla: - case INS_fmls: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(insOptsAnyArrangement(opt)); // no scalar encoding, use 4-operand 'fmadd' or 'fmsub' - - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsizeFloat(elemsize)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_3B; - break; - - case INS_ldr: - case INS_ldrb: - case INS_ldrh: - case INS_ldrsb: - case INS_ldrsh: - case INS_ldrsw: - case INS_str: - case INS_strb: - case INS_strh: - emitIns_R_R_R_Ext(ins, attr, reg1, reg2, reg3, opt); - return; - - case INS_ldp: - case INS_ldpsw: - case INS_ldnp: - case INS_stp: - case INS_stnp: - emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0); - return; - - case INS_stxr: - case INS_stxrb: - case INS_stxrh: - case INS_stlxr: - case INS_stlxrb: - case INS_stlxrh: - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert(isGeneralRegisterOrSP(reg3)); - fmt = IF_LS_3D; - break; - - case INS_casb: - case INS_casab: - case INS_casalb: - case INS_caslb: - case INS_cash: - case INS_casah: - case INS_casalh: - case INS_caslh: - case INS_cas: - case INS_casa: - case INS_casal: - case INS_casl: - case INS_ldaddb: - case INS_ldaddab: - case INS_ldaddalb: - case INS_ldaddlb: - case INS_ldaddh: - case INS_ldaddah: - case INS_ldaddalh: - case INS_ldaddlh: - case INS_ldadd: - case INS_ldadda: - case INS_ldaddal: - case INS_ldaddl: - case INS_ldclral: - case INS_ldsetal: - case INS_swpb: - case INS_swpab: - case INS_swpalb: - case INS_swplb: - case INS_swph: - case INS_swpah: - case INS_swpalh: - case INS_swplh: - case INS_swp: - case INS_swpa: - case INS_swpal: - case INS_swpl: - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert(isGeneralRegisterOrSP(reg3)); - fmt = IF_LS_3E; - break; - - case INS_sha256h: - case INS_sha256h2: - case INS_sha256su1: - case INS_sha1su0: - case INS_sha1c: - case INS_sha1p: - case INS_sha1m: - assert(isValidVectorDatasize(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (opt == INS_OPTS_NONE) - { - elemsize = EA_4BYTE; - opt = optMakeArrangement(size, elemsize); - } - assert(isValidArrangement(size, opt)); - fmt = IF_DV_3F; - break; - - case INS_ld2: - case INS_ld3: - case INS_ld4: - case INS_st2: - case INS_st3: - case INS_st4: - assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1 - FALLTHROUGH; - - case INS_ld1: - case INS_ld1_2regs: - case INS_ld1_3regs: - case INS_ld1_4regs: - case INS_st1: - case INS_st1_2regs: - case INS_st1_3regs: - case INS_st1_4regs: - case INS_ld1r: - case INS_ld2r: - case INS_ld3r: - case INS_ld4r: - assert(isVectorRegister(reg1)); - assert(isGeneralRegisterOrSP(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidArrangement(size, opt)); - - // Load/Store multiple structures post-indexed by a register - // Load single structure and replicate post-indexed by a register - reg2 = encodingSPtoZR(reg2); - fmt = IF_LS_3F; - break; - - case INS_addhn: - case INS_raddhn: - case INS_rsubhn: - case INS_subhn: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_8BYTE); - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved. - fmt = IF_DV_3A; - break; - - case INS_addhn2: - case INS_raddhn2: - case INS_rsubhn2: - case INS_subhn2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_16BYTE); - assert(isValidArrangement(size, opt)); - assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved. - fmt = IF_DV_3A; - break; - - case INS_sabal: - case INS_sabdl: - case INS_saddl: - case INS_saddw: - case INS_smlal: - case INS_smlsl: - case INS_ssubl: - case INS_ssubw: - case INS_uabal: - case INS_uabdl: - case INS_uaddl: - case INS_uaddw: - case INS_umlal: - case INS_umlsl: - case INS_usubl: - case INS_usubw: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_8BYTE); - assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); - fmt = IF_DV_3A; - break; - - case INS_sabal2: - case INS_sabdl2: - case INS_saddl2: - case INS_saddw2: - case INS_smlal2: - case INS_smlsl2: - case INS_ssubl2: - case INS_ssubw2: - case INS_umlal2: - case INS_umlsl2: - case INS_smull2: - case INS_uabal2: - case INS_uabdl2: - case INS_uaddl2: - case INS_uaddw2: - case INS_usubl2: - case INS_umull2: - case INS_usubw2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_16BYTE); - assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S)); - fmt = IF_DV_3A; - break; - - case INS_sqdmlal: - case INS_sqdmlsl: - case INS_sqdmull: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(size == EA_8BYTE); - assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); - fmt = IF_DV_3A; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert((size == EA_2BYTE) || (size == EA_4BYTE)); - fmt = IF_DV_3E; - } - break; - - case INS_sqdmulh: - case INS_sqrdmlah: - case INS_sqrdmlsh: - case INS_sqrdmulh: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - elemsize = optGetElemsize(opt); - assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); - fmt = IF_DV_3A; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert((size == EA_2BYTE) || (size == EA_4BYTE)); - fmt = IF_DV_3E; - } - break; - - case INS_sqdmlal2: - case INS_sqdmlsl2: - case INS_sqdmull2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_16BYTE); - assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S)); - fmt = IF_DV_3A; - break; - - case INS_pmul: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(isValidArrangement(size, opt)); - assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B)); - fmt = IF_DV_3A; - break; - - case INS_pmull: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_8BYTE); - assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_1D)); - fmt = IF_DV_3A; - break; - - case INS_pmull2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_16BYTE); - assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_2D)); - fmt = IF_DV_3A; - break; - - case INS_sdot: - case INS_udot: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_4S))); - fmt = IF_DV_3A; - break; - - default: - // fallback to emit SVE instructions. - return emitInsSve_R_R_R(ins, attr, reg1, reg2, reg3, opt, sopt); - - } // end switch (ins) - - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstr(attr); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - - dispIns(id); - appendToCurIG(id); -} - -//----------------------------------------------------------------------------------- -// emitIns_R_R_R_I_LdStPair: Add an instruction storing 2 registers into a memory -// (pointed by reg3) and the offset (immediate). -// -// Arguments: -// ins - The instruction code -// attr - The emit attribute for register 1 -// attr2 - The emit attribute for register 2 -// reg1 - Register 1 -// reg2 - Register 2 -// reg3 - Register 3 -// imm - Immediate offset, prior to scaling by operand size -// varx1 - LclVar number 1 -// varx2 - LclVar number 2 -// offs1 - Memory offset of lclvar number 1 -// offs2 - Memory offset of lclvar number 2 -// -void emitter::emitIns_R_R_R_I_LdStPair(instruction ins, - emitAttr attr, - emitAttr attr2, - regNumber reg1, - regNumber reg2, - regNumber reg3, - ssize_t imm, - int varx1, - int varx2, - int offs1, - int offs2 DEBUG_ARG(unsigned var1RefsOffs) DEBUG_ARG(unsigned var2RefsOffs)) -{ - assert((ins == INS_stp) || (ins == INS_ldp)); - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - unsigned scale = 0; - - // Is the target a vector register? - if (isVectorRegister(reg1)) - { - assert(isValidVectorLSPDatasize(size)); - assert(isVectorRegister(reg2)); - - scale = NaturalScale_helper(size); - assert((scale >= 2) && (scale <= 4)); - } - else - { - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegisterOrZR(reg2)); - scale = (size == EA_8BYTE) ? 3 : 2; - } - - reg3 = encodingSPtoZR(reg3); - - fmt = IF_LS_3C; - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (imm == 0) - { - fmt = IF_LS_3B; - } - else - { - if ((imm & mask) == 0) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - } - else - { - // Unlike emitIns_S_S_R_R(), we would never come here when - // (imm & mask) != 0. - unreached(); - } - } - - bool validVar1 = varx1 != -1; - bool validVar2 = varx2 != -1; - - instrDesc* id; - - if (validVar1 && validVar2) - { - id = emitNewInstrLclVarPair(attr, imm); - id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1); - id->idSetIsLclVar(); - - emitGetLclVarPairLclVar2(id)->initLclVarAddr(varx2, offs2); - } - else - { - id = emitNewInstrCns(attr, imm); - if (validVar1) - { - id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1); - id->idSetIsLclVar(); - } - if (validVar2) - { - id->idAddr()->iiaLclVar.initLclVarAddr(varx2, offs2); - id->idSetIsLclVar(); - } - } - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - - // Record the attribute for the second register in the pair - if (EA_IS_GCREF(attr2)) - { - id->idGCrefReg2(GCT_GCREF); - } - else if (EA_IS_BYREF(attr2)) - { - id->idGCrefReg2(GCT_BYREF); - } - else - { - id->idGCrefReg2(GCT_NONE); - } - -#ifdef DEBUG - id->idDebugOnlyInfo()->idVarRefOffs = var1RefsOffs; - id->idDebugOnlyInfo()->idVarRefOffs2 = var2RefsOffs; -#endif - dispIns(id); - appendToCurIG(id); -} - -/***************************************************************************** - * - * Add an instruction referencing three registers and a constant. - */ - -void emitter::emitIns_R_R_R_I(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - ssize_t imm, - insOpts opt /* = INS_OPTS_NONE */, - emitAttr attrReg2 /* = EA_UNKNOWN */, - insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) -{ - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool isLdSt = false; - bool isSIMD = false; - bool isAddSub = false; - bool setFlags = false; - unsigned scale = 0; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_extr: - assert(insOptsNone(opt)); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidImmShift(imm, size)); - fmt = IF_DR_3E; - break; - - case INS_and: - case INS_ands: - case INS_eor: - case INS_orr: - case INS_bic: - case INS_bics: - case INS_eon: - case INS_orn: - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isValidImmShift(imm, size)); - if (imm == 0) - { - assert(insOptsNone(opt)); // a zero imm, means no shift kind - fmt = IF_DR_3A; - } - else - { - assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind - fmt = IF_DR_3B; - } - break; - - case INS_fmul: // by element, imm[0..3] selects the element of reg3 - case INS_fmla: - case INS_fmls: - case INS_fmulx: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorElemsizeFloat(elemsize)); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - assert(opt != INS_OPTS_1D); // Reserved encoding - fmt = IF_DV_3BI; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert(isValidScalarDatasize(size)); - elemsize = size; - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - fmt = IF_DV_3DI; - } - break; - - case INS_mul: // by element, imm[0..7] selects the element of reg3 - case INS_mla: - case INS_mls: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - // Vector operation - assert(insOptsAnyArrangement(opt)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - elemsize = optGetElemsize(opt); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - // Only has encodings for H or S elemsize - assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); - // Only has encodings for V0..V15 - if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) - { - noway_assert(!"Invalid reg3"); - } - fmt = IF_DV_3AI; - break; - - case INS_add: - case INS_sub: - setFlags = false; - isAddSub = true; - break; - - case INS_adds: - case INS_subs: - setFlags = true; - isAddSub = true; - break; - - case INS_ldpsw: - scale = 2; - isLdSt = true; - break; - - case INS_ldnp: - case INS_stnp: - assert(insOptsNone(opt)); // Can't use Pre/Post index on these two instructions - FALLTHROUGH; - - case INS_ldp: - case INS_stp: - // Is the target a vector register? - if (isVectorRegister(reg1)) - { - scale = NaturalScale_helper(size); - isSIMD = true; - } - else - { - scale = (size == EA_8BYTE) ? 3 : 2; - } - isLdSt = true; - fmt = IF_LS_3C; - break; - - case INS_ld1: - case INS_ld2: - case INS_ld3: - case INS_ld4: - case INS_st1: - case INS_st2: - case INS_st3: - case INS_st4: - assert(isVectorRegister(reg1)); - assert(isGeneralRegisterOrSP(reg2)); - assert(isGeneralRegister(reg3)); - - assert(insOptsPostIndex(opt)); - - elemsize = size; - assert(isValidVectorElemsize(elemsize)); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - - // Load/Store single structure post-indexed by a register - reg2 = encodingSPtoZR(reg2); - fmt = IF_LS_3G; - break; - - case INS_ext: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(isValidVectorDatasize(size)); - assert(isValidArrangement(size, opt)); - assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B)); - assert(isValidVectorIndex(size, EA_1BYTE, imm)); - fmt = IF_DV_3G; - break; - - case INS_smlal: - case INS_smlsl: - case INS_smull: - case INS_umlal: - case INS_umlsl: - case INS_umull: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_8BYTE); - assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); - elemsize = optGetElemsize(opt); - // Restricted to V0-V15 when element size is H. - if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) - { - assert(!"Invalid reg3"); - } - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - fmt = IF_DV_3AI; - break; - - case INS_sqdmlal: - case INS_sqdmlsl: - case INS_sqdmull: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(size == EA_8BYTE); - assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); - elemsize = optGetElemsize(opt); - fmt = IF_DV_3AI; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert((size == EA_2BYTE) || (size == EA_4BYTE)); - elemsize = size; - fmt = IF_DV_3EI; - } - // Restricted to V0-V15 when element size is H. - if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) - { - assert(!"Invalid reg3"); - } - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - break; - - case INS_sqdmulh: - case INS_sqrdmlah: - case INS_sqrdmlsh: - case INS_sqrdmulh: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - if (insOptsAnyArrangement(opt)) - { - // Vector operation - assert(isValidVectorDatasize(size)); - elemsize = optGetElemsize(opt); - assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); - fmt = IF_DV_3AI; - } - else - { - // Scalar operation - assert(insOptsNone(opt)); - assert((size == EA_2BYTE) || (size == EA_4BYTE)); - elemsize = size; - fmt = IF_DV_3EI; - } - // Restricted to V0-V15 when element size is H. - if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) - { - assert(!"Invalid reg3"); - } - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - break; - - case INS_smlal2: - case INS_smlsl2: - case INS_smull2: - case INS_sqdmlal2: - case INS_sqdmlsl2: - case INS_sqdmull2: - case INS_umlal2: - case INS_umlsl2: - case INS_umull2: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(size == EA_16BYTE); - assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S)); - elemsize = optGetElemsize(opt); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - // Restricted to V0-V15 when element size is H - if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) - { - assert(!"Invalid reg3"); - } - fmt = IF_DV_3AI; - break; - - case INS_sdot: - case INS_udot: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_4S))); - assert(isValidVectorIndex(EA_16BYTE, EA_4BYTE, imm)); - fmt = IF_DV_3AI; - break; - - default: - // fallback to emit SVE instructions. - return emitInsSve_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt, sopt); - - } // end switch (ins) - - assert(insScalableOptsNone(sopt)); + val = -val; + sign = 1; + } - if (isLdSt) + int exp = 0; + while ((val < 1.0) && (exp >= -4)) { - assert(!isAddSub); - assert(isGeneralRegisterOrSP(reg3)); - assert(insOptsNone(opt) || insOptsIndexed(opt)); - - if (isSIMD) - { - assert(isValidVectorLSPDatasize(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert((scale >= 2) && (scale <= 4)); - } - else - { - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert((scale == 2) || (scale == 3)); - } - - // Load/Store Pair reserved encodings: - if (emitInsIsLoad(ins)) - { - assert(reg1 != reg2); - } - if (insOptsIndexed(opt)) - { - assert(reg1 != reg3); - assert(reg2 != reg3); - } - - reg3 = encodingSPtoZR(reg3); + val *= 2.0; + exp--; + } + while ((val >= 2.0) && (exp <= 5)) + { + val *= 0.5; + exp++; + } + exp += 3; + val *= 16.0; + int ival = (int)val; - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (imm == 0) + if ((exp >= 0) && (exp <= 7)) + { + if (val == (double)ival) { - assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero + canEncode = true; - fmt = IF_LS_3B; - } - else - { - if ((imm & mask) == 0) + if (wbFPI != nullptr) { - imm >>= scale; // The immediate is scaled by the size of the ld/st + ival -= 16; + assert((ival >= 0) && (ival <= 15)); - if ((imm >= -64) && (imm <= 63)) - { - fmt = IF_LS_3C; - } - } -#ifdef DEBUG - if (fmt != IF_LS_3C) - { - assert(!"Instruction cannot be encoded: IF_LS_3C"); + wbFPI->immSign = sign; + wbFPI->immExp = exp ^ 0x4; + wbFPI->immMant = ival; + unsigned imm8 = wbFPI->immFPIVal; + assert((imm8 >= 0) && (imm8 <= 0xff)); } -#endif } } - else if (isAddSub) - { - bool reg2IsSP = (reg2 == REG_SP); - assert(!isLdSt); - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg3)); - if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option - { - assert(isGeneralRegisterOrZR(reg1)); - } - else - { - assert(isGeneralRegisterOrSP(reg1)); - reg1 = encodingSPtoZR(reg1); - } + return canEncode; +} - if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option - { - assert(isGeneralRegister(reg2)); - } - else - { - assert(isGeneralRegisterOrSP(reg2)); - reg2 = encodingSPtoZR(reg2); - } +/***************************************************************************** + * + * For the given 'ins' returns the reverse instruction + * if one exists, otherwise returns INS_INVALID + */ - if (insOptsAnyExtend(opt)) - { - assert((imm >= 0) && (imm <= 4)); +/*static*/ instruction emitter::insReverse(instruction ins) +{ + switch (ins) + { + case INS_add: + return INS_sub; + case INS_adds: + return INS_subs; - fmt = IF_DR_3C; - } - else if (insOptsAluShift(opt)) - { - // imm should be non-zero and in [1..63] - assert(isValidImmShift(imm, size) && (imm != 0)); - fmt = IF_DR_3B; - } - else if (imm == 0) - { - assert(insOptsNone(opt)); + case INS_sub: + return INS_add; + case INS_subs: + return INS_adds; - if (reg2IsSP) - { - // To encode the SP register as reg2 we must use the IF_DR_3C encoding - // and also specify a LSL of zero (imm == 0) - opt = INS_OPTS_LSL; - fmt = IF_DR_3C; - } - else - { - fmt = IF_DR_3A; - } - } - else - { - assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); - } - } + case INS_cmp: + return INS_cmn; + case INS_cmn: + return INS_cmp; - assert(fmt != IF_NONE); + case INS_ccmp: + return INS_ccmn; + case INS_ccmn: + return INS_ccmp; - instrDesc* id = emitNewInstrCns(attr, imm); + default: + return INS_invalid; + } +} - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); +/***************************************************************************** + * + * For the given 'datasize' and 'elemsize', make the proper arrangement option + * returns the insOpts that specifies the vector register arrangement + * if one does not exist returns INS_OPTS_NONE + */ - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); +/*static*/ insOpts emitter::optMakeArrangement(emitAttr datasize, emitAttr elemsize) +{ + insOpts result = INS_OPTS_NONE; - // Record the attribute for the second register in the pair - id->idGCrefReg2(GCT_NONE); - if (attrReg2 != EA_UNKNOWN) + if (datasize == EA_8BYTE) { - // Record the attribute for the second register in the pair - assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); - if (EA_IS_GCREF(attrReg2)) + switch (elemsize) { - id->idGCrefReg2(GCT_GCREF); + case EA_1BYTE: + result = INS_OPTS_8B; + break; + case EA_2BYTE: + result = INS_OPTS_4H; + break; + case EA_4BYTE: + result = INS_OPTS_2S; + break; + case EA_8BYTE: + result = INS_OPTS_1D; + break; + default: + unreached(); + break; } - else if (EA_IS_BYREF(attrReg2)) + } + else if (datasize == EA_16BYTE) + { + switch (elemsize) { - id->idGCrefReg2(GCT_BYREF); + case EA_1BYTE: + result = INS_OPTS_16B; + break; + case EA_2BYTE: + result = INS_OPTS_8H; + break; + case EA_4BYTE: + result = INS_OPTS_4S; + break; + case EA_8BYTE: + result = INS_OPTS_2D; + break; + default: + unreached(); + break; } } - - dispIns(id); - appendToCurIG(id); + return result; } /***************************************************************************** * - * Add an instruction referencing three registers and two constants. + * For the given 'datasize' and arrangement 'opts' + * returns true is the pair specifies a valid arrangement */ - -void emitter::emitIns_R_R_R_I_I(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - ssize_t imm1, - ssize_t imm2, - insOpts opt) +/*static*/ bool emitter::isValidArrangement(emitAttr datasize, insOpts opt) { - // Currently, only SVE instructions use this format. - emitInsSve_R_R_R_I_I(ins, attr, reg1, reg2, reg3, imm1, imm2, opt); + if (datasize == EA_8BYTE) + { + if ((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S) || (opt == INS_OPTS_1D)) + { + return true; + } + } + else if (datasize == EA_16BYTE) + { + if ((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S) || (opt == INS_OPTS_2D)) + { + return true; + } + } + return false; } -/***************************************************************************** - * - * Add an instruction referencing three registers, with an extend option - */ - -void emitter::emitIns_R_R_R_Ext(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - insOpts opt, /* = INS_OPTS_NONE */ - int shiftAmount) /* = -1 -- unset */ +//------------------------------------------------------------------------ +// insGetRegisterListSize: Returns a size of the register list a given instruction operates on. +// +// Arguments: +// ins - An instruction which uses a register list +// (e.g. ld1 (2 registers), ld1r, st1, tbl, tbx). +// +// Return value: +// A number of consecutive SIMD and floating-point registers the instruction loads to/store from. +// +/*static*/ unsigned emitter::insGetRegisterListSize(instruction ins) { - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - bool isSIMD = false; - int scale = -1; + unsigned registerListSize = 0; - /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_ldrb: - case INS_ldrsb: - case INS_strb: - scale = 0; + case INS_ld1: + case INS_ld1r: + case INS_st1: + case INS_tbl: + case INS_tbx: + registerListSize = 1; break; - case INS_ldrh: - case INS_ldrsh: - case INS_strh: - scale = 1; + case INS_ld1_2regs: + case INS_ld2: + case INS_ld2r: + case INS_st1_2regs: + case INS_st2: + case INS_tbl_2regs: + case INS_tbx_2regs: + registerListSize = 2; break; - case INS_ldrsw: - scale = 2; + case INS_ld1_3regs: + case INS_ld3: + case INS_ld3r: + case INS_st1_3regs: + case INS_st3: + case INS_tbl_3regs: + case INS_tbx_3regs: + registerListSize = 3; break; - case INS_ldr: - case INS_str: - // Is the target a vector register? - if (isVectorRegister(reg1)) - { - assert(isValidVectorLSDatasize(size)); - scale = NaturalScale_helper(size); - isSIMD = true; - } - else - { - assert(isValidGeneralDatasize(size)); - scale = (size == EA_8BYTE) ? 3 : 2; - } - + case INS_ld1_4regs: + case INS_ld4: + case INS_ld4r: + case INS_st1_4regs: + case INS_st4: + case INS_tbl_4regs: + case INS_tbx_4regs: + registerListSize = 4; break; default: - unreached(); + assert(!"Unexpected instruction"); break; + } - } // end switch (ins) - - assert(scale != -1); - assert(insOptsLSExtend(opt)); + return registerListSize; +} - if (isSIMD) +// For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement +// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed +// +/*static*/ emitAttr emitter::optGetDatasize(insOpts arrangement) +{ + if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_2S) || + (arrangement == INS_OPTS_1D)) { - assert(isValidVectorLSDatasize(size)); - assert(isVectorRegister(reg1)); + return EA_8BYTE; + } + else if ((arrangement == INS_OPTS_16B) || (arrangement == INS_OPTS_8H) || (arrangement == INS_OPTS_4S) || + (arrangement == INS_OPTS_2D)) + { + return EA_16BYTE; } else { - assert(isValidGeneralLSDatasize(size)); - assert(isGeneralRegisterOrZR(reg1)); + assert(!" invalid 'arrangement' value"); + return EA_UNKNOWN; } +} - assert(isGeneralRegisterOrSP(reg2)); - assert(isGeneralRegister(reg3)); - - // Load/Store reserved encodings: - if (insOptsIndexed(opt)) +// For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement +// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed +// +/*static*/ emitAttr emitter::optGetElemsize(insOpts arrangement) +{ + if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B)) { - assert(reg1 != reg2); + return EA_1BYTE; } - - if (shiftAmount == -1) + else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H)) { - shiftAmount = insOptsLSL(opt) ? scale : 0; + return EA_2BYTE; } + else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S)) + { + return EA_4BYTE; + } + else if ((arrangement == INS_OPTS_1D) || (arrangement == INS_OPTS_2D)) + { + return EA_8BYTE; + } + else + { + assert(!" invalid 'arrangement' value"); + return EA_UNKNOWN; + } +} - assert((shiftAmount == scale) || (shiftAmount == 0)); - - reg2 = encodingSPtoZR(reg2); - fmt = IF_LS_3A; - - instrDesc* id = emitNewInstr(attr); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - id->idReg3Scaled(shiftAmount == scale); - - dispIns(id); - appendToCurIG(id); +/*static*/ insOpts emitter::optWidenElemsizeArrangement(insOpts arrangement) +{ + if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B)) + { + return INS_OPTS_8H; + } + else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H)) + { + return INS_OPTS_4S; + } + else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S)) + { + return INS_OPTS_2D; + } + else + { + assert(!" invalid 'arrangement' value"); + return INS_OPTS_NONE; + } } -/***************************************************************************** - * - * Add an instruction referencing two registers and two constants. - */ +/*static*/ emitAttr emitter::widenDatasize(emitAttr datasize) +{ + if (datasize == EA_1BYTE) + { + return EA_2BYTE; + } + else if (datasize == EA_2BYTE) + { + return EA_4BYTE; + } + else if (datasize == EA_4BYTE) + { + return EA_8BYTE; + } + else + { + assert(!" invalid 'datasize' value"); + return EA_UNKNOWN; + } +} -void emitter::emitIns_R_R_I_I( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt) +// For the given 'srcArrangement' returns the "widen" 'dstArrangement' specifying the destination vector register +// arrangement +// asserts and returns INS_OPTS_NONE if an invalid 'srcArrangement' value is passed +// +/*static*/ insOpts emitter::optWidenDstArrangement(insOpts srcArrangement) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - size_t immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc + insOpts dstArrangement = INS_OPTS_NONE; - /* Figure out the encoding format of the instruction */ - switch (ins) + switch (srcArrangement) { - int lsb; - int width; - bitMaskImm bmi; - unsigned registerListSize; + case INS_OPTS_8B: + dstArrangement = INS_OPTS_4H; + break; - case INS_bfm: - case INS_sbfm: - case INS_ubfm: - assert(isGeneralRegister(reg1)); - assert((ins == INS_bfm) ? isGeneralRegisterOrZR(reg2) : isGeneralRegister(reg2)); - assert(isValidImmShift(imm1, size)); - assert(isValidImmShift(imm2, size)); - assert(insOptsNone(opt)); - bmi.immNRS = 0; - bmi.immN = (size == EA_8BYTE); - bmi.immR = imm1; - bmi.immS = imm2; - immOut = bmi.immNRS; - fmt = IF_DI_2D; + case INS_OPTS_16B: + dstArrangement = INS_OPTS_8H; + break; + + case INS_OPTS_4H: + dstArrangement = INS_OPTS_2S; + break; + + case INS_OPTS_8H: + dstArrangement = INS_OPTS_4S; break; - case INS_bfi: - case INS_sbfiz: - case INS_ubfiz: - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - lsb = getBitWidth(size) - imm1; - width = imm2 - 1; - assert(isValidImmShift(lsb, size)); - assert(isValidImmShift(width, size)); - assert(insOptsNone(opt)); - bmi.immNRS = 0; - bmi.immN = (size == EA_8BYTE); - bmi.immR = lsb; - bmi.immS = width; - immOut = bmi.immNRS; - fmt = IF_DI_2D; + case INS_OPTS_2S: + dstArrangement = INS_OPTS_1D; break; - case INS_bfxil: - case INS_sbfx: - case INS_ubfx: - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - lsb = imm1; - width = imm2 + imm1 - 1; - assert(isValidImmShift(lsb, size)); - assert(isValidImmShift(width, size)); - assert(insOptsNone(opt)); - bmi.immNRS = 0; - bmi.immN = (size == EA_8BYTE); - bmi.immR = imm1; - bmi.immS = imm2 + imm1 - 1; - immOut = bmi.immNRS; - fmt = IF_DI_2D; + case INS_OPTS_4S: + dstArrangement = INS_OPTS_2D; break; - case INS_mov: - case INS_ins: - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - elemsize = size; - assert(isValidVectorElemsize(elemsize)); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1)); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm2)); - assert(insOptsNone(opt)); - immOut = (imm1 << 4) + imm2; - fmt = IF_DV_2F; + default: + assert(!" invalid 'srcArrangement' value"); break; + } - case INS_ld1: - case INS_ld2: - case INS_ld3: - case INS_ld4: - case INS_st1: - case INS_st2: - case INS_st3: - case INS_st4: - assert(isVectorRegister(reg1)); - assert(isGeneralRegisterOrSP(reg2)); + return dstArrangement; +} - elemsize = size; - assert(isValidVectorElemsize(elemsize)); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1)); +// For the given 'conversion' returns the 'dstsize' specified by the conversion option +/*static*/ emitAttr emitter::optGetDstsize(insOpts conversion) +{ + switch (conversion) + { + case INS_OPTS_S_TO_8BYTE: + case INS_OPTS_D_TO_8BYTE: + case INS_OPTS_4BYTE_TO_D: + case INS_OPTS_8BYTE_TO_D: + case INS_OPTS_S_TO_D: + case INS_OPTS_H_TO_D: - registerListSize = insGetRegisterListSize(ins); - assert((elemsize * registerListSize) == (unsigned)imm2); - assert(insOptsPostIndex(opt)); + return EA_8BYTE; - // Load/Store single structure post-indexed by an immediate - reg2 = encodingSPtoZR(reg2); - immOut = imm1; - fmt = IF_LS_2G; - break; + case INS_OPTS_S_TO_4BYTE: + case INS_OPTS_D_TO_4BYTE: + case INS_OPTS_4BYTE_TO_S: + case INS_OPTS_8BYTE_TO_S: + case INS_OPTS_D_TO_S: + case INS_OPTS_H_TO_S: + + return EA_4BYTE; + + case INS_OPTS_S_TO_H: + case INS_OPTS_D_TO_H: + + return EA_2BYTE; default: - unreached(); - break; + assert(!" invalid 'conversion' value"); + return EA_UNKNOWN; } - assert(fmt != IF_NONE); +} - instrDesc* id = emitNewInstrSC(attr, immOut); +// For the given 'conversion' returns the 'srcsize' specified by the conversion option +/*static*/ emitAttr emitter::optGetSrcsize(insOpts conversion) +{ + switch (conversion) + { + case INS_OPTS_D_TO_8BYTE: + case INS_OPTS_D_TO_4BYTE: + case INS_OPTS_8BYTE_TO_D: + case INS_OPTS_8BYTE_TO_S: + case INS_OPTS_D_TO_S: + case INS_OPTS_D_TO_H: - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); + return EA_8BYTE; - id->idReg1(reg1); - id->idReg2(reg2); + case INS_OPTS_S_TO_8BYTE: + case INS_OPTS_S_TO_4BYTE: + case INS_OPTS_4BYTE_TO_S: + case INS_OPTS_4BYTE_TO_D: + case INS_OPTS_S_TO_D: + case INS_OPTS_S_TO_H: - dispIns(id); - appendToCurIG(id); + return EA_4BYTE; + + case INS_OPTS_H_TO_S: + case INS_OPTS_H_TO_D: + + return EA_2BYTE; + + default: + assert(!" invalid 'conversion' value"); + return EA_UNKNOWN; + } +} + +// For the given 'size' and 'index' returns true if it specifies a valid index for a vector register of 'size' +/*static*/ bool emitter::isValidVectorIndex(emitAttr datasize, emitAttr elemsize, ssize_t index) +{ + assert(isValidVectorDatasize(datasize)); + assert(isValidVectorElemsize(elemsize)); + + bool result = false; + if (index >= 0) + { + if (datasize == EA_8BYTE) + { + switch (elemsize) + { + case EA_1BYTE: + result = (index < 8); + break; + case EA_2BYTE: + result = (index < 4); + break; + case EA_4BYTE: + result = (index < 2); + break; + case EA_8BYTE: + result = (index < 1); + break; + default: + unreached(); + break; + } + } + else if (datasize == EA_16BYTE) + { + switch (elemsize) + { + case EA_1BYTE: + result = (index < 16); + break; + case EA_2BYTE: + result = (index < 8); + break; + case EA_4BYTE: + result = (index < 4); + break; + case EA_8BYTE: + result = (index < 2); + break; + default: + unreached(); + break; + } + } + } + return result; } /***************************************************************************** * - * Add an instruction referencing four registers. + * Add an instruction with no operands. */ -void emitter::emitIns_R_R_R_R(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - regNumber reg4, - insOpts opt /* = INS_OPTS_NONE*/, - insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) +void emitter::emitIns(instruction ins) { - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; + instrDesc* id = emitNewInstrSmall(EA_8BYTE); + insFormat fmt = emitInsFormat(ins); - /* Figure out the encoding format of the instruction */ - switch (ins) + if (ins != INS_BREAKPOINT) { - case INS_madd: - case INS_msub: - case INS_smaddl: - case INS_smsubl: - case INS_umaddl: - case INS_umsubl: - assert(isValidGeneralDatasize(size)); - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - assert(isGeneralRegister(reg3)); - assert(isGeneralRegister(reg4)); - assert(insScalableOptsNone(sopt)); - fmt = IF_DR_4A; - break; - - case INS_fmadd: - case INS_fmsub: - case INS_fnmadd: - case INS_fnmsub: - // Scalar operation - assert(isValidScalarDatasize(size)); - assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(isVectorRegister(reg4)); - assert(insScalableOptsNone(sopt)); - fmt = IF_DV_4A; - break; - - case INS_invalid: - fmt = IF_NONE; - break; - - // Fallback handles emitting the SVE instructions. - default: - return emitInsSve_R_R_R_R(ins, attr, reg1, reg2, reg3, reg4, opt, sopt); + assert(fmt == IF_SN_0A); } - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstr(attr); id->idIns(ins); id->idInsFmt(fmt); - id->idInsOpt(opt); - - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - id->idReg4(reg4); dispIns(id); appendToCurIG(id); @@ -9443,59 +3663,36 @@ void emitter::emitIns_R_R_R_R(instruction ins, /***************************************************************************** * - * Add an instruction referencing four registers and a constant. - */ - -void emitter::emitIns_R_R_R_R_I(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - regNumber reg4, - ssize_t imm, - insOpts opt /* = INS_OPT_NONE*/) -{ - // Currently, only SVE instructions use this format. - emitInsSve_R_R_R_R_I(ins, attr, reg1, reg2, reg3, reg4, imm, opt); -} - -/***************************************************************************** - * - * Add an instruction referencing a register and a condition code + * Add an instruction with a single immediate value. */ -void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond) +void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm) { - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; + insFormat fmt = IF_NONE; /* Figure out the encoding format of the instruction */ - switch (ins) + if (ins == INS_BREAKPOINT) { - case INS_cset: - case INS_csetm: - assert(isGeneralRegister(reg)); - cfi.cond = cond; - fmt = IF_DR_1D; - break; - - default: - unreached(); - break; - - } // end switch (ins) - + if ((imm & 0x0000ffff) == imm) + { + fmt = IF_SI_0A; + } + else + { + assert(!"Instruction cannot be encoded: IF_SI_0A"); + } + } + else + { + // fallback to emit SVE instructions. + return emitInsSve_I(ins, attr, imm); + } assert(fmt != IF_NONE); - assert(isValidImmCond(cfi.immCFVal)); - instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + instrDesc* id = emitNewInstrSC(attr, imm); id->idIns(ins); id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - - id->idReg1(reg); dispIns(id); appendToCurIG(id); @@ -9503,43 +3700,44 @@ void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insC /***************************************************************************** * - * Add an instruction referencing two registers and a condition code + * Add an instruction referencing a single register. */ -void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond) +void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt /* = INS_OPTS_NONE */) { - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; + insFormat fmt = IF_NONE; /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_cinc: - case INS_cinv: - case INS_cneg: - assert(isGeneralRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - cfi.cond = cond; - fmt = IF_DR_2D; + case INS_br: + case INS_ret: + assert(isGeneralRegister(reg)); + fmt = IF_BR_1A; break; - default: - unreached(); + + case INS_dczva: + assert(isGeneralRegister(reg)); + assert(attr == EA_8BYTE); + fmt = IF_SR_1A; break; - } // end switch (ins) + case INS_mrs_tpid0: + fmt = IF_SR_1A; + break; + + default: + // fallback to emit SVE instructions. + return emitInsSve_R(ins, attr, reg, opt); + } assert(fmt != IF_NONE); - assert(isValidImmCond(cfi.immCFVal)); - instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + instrDesc* id = emitNewInstrSmall(attr); id->idIns(ins); id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - - id->idReg1(reg1); - id->idReg2(reg2); + id->idReg1(reg); dispIns(id); appendToCurIG(id); @@ -9547,701 +3745,1201 @@ void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, r /***************************************************************************** * - * Add an instruction referencing two registers and a condition code + * Add an instruction referencing a register and a constant. */ -void emitter::emitIns_R_R_R_COND( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond) +void emitter::emitIns_R_I(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t imm, + insOpts opt, /* = INS_OPTS_NONE */ + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */ + DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = GTF_EMPTY */)) { - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool canEncode = false; /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_csel: - case INS_csinc: - case INS_csinv: - case INS_csneg: - assert(isGeneralRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert(isGeneralRegisterOrZR(reg3)); - cfi.cond = cond; - fmt = IF_DR_3D; + bitMaskImm bmi; + halfwordImm hwi; + byteShiftedImm bsi; + ssize_t notOfImm; + + case INS_tst: + assert(insOptsNone(opt)); + assert(isGeneralRegister(reg)); + bmi.immNRS = 0; + canEncode = canEncodeBitMaskImm(imm, size, &bmi); + if (canEncode) + { + imm = bmi.immNRS; + assert(isValidImmNRS(imm, size)); + fmt = IF_DI_1C; + } break; - default: - unreached(); + case INS_movk: + case INS_movn: + case INS_movz: + assert(isValidGeneralDatasize(size)); + assert(insOptsNone(opt)); // No LSL here (you must use emitIns_R_I_I if a shift is needed) + assert(isGeneralRegister(reg)); + assert(isValidUimm<16>(imm)); + + hwi.immHW = 0; + hwi.immVal = imm; + assert(imm == emitDecodeHalfwordImm(hwi, size)); + + imm = hwi.immHWVal; + canEncode = true; + fmt = IF_DI_1B; break; - } // end switch (ins) + case INS_mov: + assert(isValidGeneralDatasize(size)); + assert(insOptsNone(opt)); // No explicit LSL here + // We will automatically determine the shift based upon the imm - assert(fmt != IF_NONE); - assert(isValidImmCond(cfi.immCFVal)); + // First try the standard 'halfword immediate' imm(i16,hw) + hwi.immHWVal = 0; + canEncode = canEncodeHalfwordImm(imm, size, &hwi); + if (canEncode) + { + // uses a movz encoding + assert(isGeneralRegister(reg)); + imm = hwi.immHWVal; + assert(isValidImmHWVal(imm, size)); + fmt = IF_DI_1B; + break; + } - instrDesc* id = emitNewInstr(attr); + // Next try the ones-complement form of 'halfword immediate' imm(i16,hw) + notOfImm = NOT_helper(imm, getBitWidth(size)); + canEncode = canEncodeHalfwordImm(notOfImm, size, &hwi); + if (canEncode) + { + assert(isGeneralRegister(reg)); + imm = hwi.immHWVal; + ins = INS_movn; // uses a movn encoding + assert(isValidImmHWVal(imm, size)); + fmt = IF_DI_1B; + break; + } - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + // Finally try the 'bitmask immediate' imm(N,r,s) + bmi.immNRS = 0; + canEncode = canEncodeBitMaskImm(imm, size, &bmi); + if (canEncode) + { + assert(isGeneralRegisterOrSP(reg)); + reg = encodingSPtoZR(reg); + imm = bmi.immNRS; + assert(isValidImmNRS(imm, size)); + fmt = IF_DI_1D; + break; + } + else + { + assert(!"Instruction cannot be encoded: mov imm"); + } - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - id->idSmallCns(cfi.immCFVal); + break; - dispIns(id); - appendToCurIG(id); -} + case INS_movi: + assert(isValidVectorDatasize(size)); + assert(isVectorRegister(reg)); + if (insOptsNone(opt) && (size == EA_8BYTE)) + { + opt = INS_OPTS_1D; + } + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + + if (elemsize == EA_8BYTE) + { + size_t uimm = imm; + ssize_t imm8 = 0; + unsigned pos = 0; + canEncode = true; + while (uimm != 0) + { + INT64 loByte = uimm & 0xFF; + if (((loByte == 0) || (loByte == 0xFF)) && (pos < 8)) + { + if (loByte == 0xFF) + { + imm8 |= (ssize_t{1} << pos); + } + uimm >>= 8; + pos++; + } + else + { + canEncode = false; + break; + } + } + imm = imm8; + assert(isValidUimm<8>(imm)); + fmt = IF_DV_1B; + break; + } + else + { + // Vector operation -/***************************************************************************** - * - * Add an instruction referencing two registers the flags and a condition code - */ + // No explicit LSL/MSL is used for the immediate + // We will automatically determine the shift based upon the value of imm -void emitter::emitIns_R_R_FLAGS_COND( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond) -{ - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; + // First try the standard 'byteShifted immediate' imm(i8,bySh) + bsi.immBSVal = 0; + canEncode = canEncodeByteShiftedImm(imm, elemsize, true, &bsi); + if (canEncode) + { + imm = bsi.immBSVal; + assert(isValidImmBSVal(imm, size)); + fmt = IF_DV_1B; + break; + } - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_ccmp: - case INS_ccmn: - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - cfi.flags = flags; - cfi.cond = cond; - fmt = IF_DR_2I; - break; - default: - unreached(); + // Next try the ones-complement form of the 'immediate' imm(i8,bySh) + if ((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)) // Only EA_2BYTE or EA_4BYTE forms + { + notOfImm = NOT_helper(imm, getBitWidth(elemsize)); + canEncode = canEncodeByteShiftedImm(notOfImm, elemsize, true, &bsi); + if (canEncode) + { + imm = bsi.immBSVal; + ins = INS_mvni; // uses a mvni encoding + assert(isValidImmBSVal(imm, size)); + fmt = IF_DV_1B; + break; + } + } + } break; - } // end switch (ins) - - assert(fmt != IF_NONE); - assert(isValidImmCondFlags(cfi.immCFVal)); - - instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - id->idReg1(reg1); - id->idReg2(reg2); + case INS_orr: + case INS_bic: + case INS_mvni: + assert(isValidVectorDatasize(size)); + assert(isVectorRegister(reg)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms - dispIns(id); - appendToCurIG(id); -} + // Vector operation -/***************************************************************************** - * - * Add an instruction referencing a register, an immediate, the flags and a condition code - */ + // No explicit LSL/MSL is used for the immediate + // We will automatically determine the shift based upon the value of imm -void emitter::emitIns_R_I_FLAGS_COND( - instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insCflags flags, insCond cond) -{ - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; + // First try the standard 'byteShifted immediate' imm(i8,bySh) + bsi.immBSVal = 0; + canEncode = canEncodeByteShiftedImm(imm, elemsize, + (ins == INS_mvni), // mvni supports the ones shifting variant (aka MSL) + &bsi); + if (canEncode) + { + imm = bsi.immBSVal; + assert(isValidImmBSVal(imm, size)); + fmt = IF_DV_1B; + break; + } + break; - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_ccmp: - case INS_ccmn: + case INS_cmp: + case INS_cmn: + assert(insOptsNone(opt)); assert(isGeneralRegister(reg)); - if (imm < 0) + + if (unsigned_abs(imm) <= 0x0fff) { - ins = insReverse(ins); - imm = -imm; + if (imm < 0) + { + ins = insReverse(ins); + imm = -imm; + } + assert(isValidUimm<12>(imm)); + canEncode = true; + fmt = IF_DI_1A; } - if (isValidUimm<5>(imm)) + else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding { - cfi.imm5 = imm; - cfi.flags = flags; - cfi.cond = cond; - fmt = IF_DI_1F; + // Encoding will use a 12-bit left shift of the immediate + opt = INS_OPTS_LSL12; + if (imm < 0) + { + ins = insReverse(ins); + imm = -imm; + } + assert((imm & 0xfff) == 0); + imm >>= 12; + assert(isValidUimm<12>(imm)); + canEncode = true; + fmt = IF_DI_1A; } else { - assert(!"Instruction cannot be encoded: ccmp/ccmn imm5"); + assert(!"Instruction cannot be encoded: IF_DI_1A"); } break; + default: - unreached(); - break; + // fallback to emit SVE instructions. + return emitInsSve_R_I(ins, attr, reg, imm, opt, sopt); } // end switch (ins) + assert(canEncode); assert(fmt != IF_NONE); - assert(isValidImmCondFlagsImm5(cfi.immCFVal)); - instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + instrDesc* id = emitNewInstrSC(attr, imm); id->idIns(ins); id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + id->idInsOpt(opt); id->idReg1(reg); +#ifdef DEBUG + id->idDebugOnlyInfo()->idMemCookie = targetHandle; + id->idDebugOnlyInfo()->idFlags = gtFlags; +#endif + dispIns(id); appendToCurIG(id); } /***************************************************************************** * - * Add a memory barrier instruction with a 'barrier' immediate + * Add an instruction referencing a register and a floating point constant. */ -void emitter::emitIns_BARR(instruction ins, insBarrier barrier) +void emitter::emitIns_R_F( + instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt /* = INS_OPTS_NONE */) + { - insFormat fmt = IF_NONE; - ssize_t imm = 0; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + ssize_t imm = 0; + bool canEncode = false; /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_dsb: - case INS_dmb: - case INS_isb: + floatImm8 fpi; - fmt = IF_SI_0B; - imm = (ssize_t)barrier; - break; - default: - unreached(); + case INS_fcmp: + case INS_fcmpe: + assert(insOptsNone(opt)); + assert(isValidVectorElemsizeFloat(size)); + assert(isVectorRegister(reg)); + if (immDbl == 0.0) + { + canEncode = true; + fmt = IF_DV_1C; + } break; - } // end switch (ins) - assert(fmt != IF_NONE); + case INS_fmov: + assert(isVectorRegister(reg)); + fpi.immFPIVal = 0; + canEncode = canEncodeFloatImm8(immDbl, &fpi); - instrDesc* id = emitNewInstrSC(EA_8BYTE, imm); + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsizeFloat(elemsize)); + assert(opt != INS_OPTS_1D); // Reserved encoding - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + if (canEncode) + { + imm = fpi.immFPIVal; + assert((imm >= 0) && (imm <= 0xff)); + fmt = IF_DV_1B; + } + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(isValidVectorElemsizeFloat(size)); - dispIns(id); - appendToCurIG(id); -} + if (canEncode) + { + imm = fpi.immFPIVal; + assert((imm >= 0) && (imm <= 0xff)); + fmt = IF_DV_1A; + } + } + break; -/***************************************************************************** - * - * Add an instruction with a static data member operand. If 'size' is 0, the - * instruction operates on the address of the static member instead of its - * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]"). - */ + default: + // fallback to emit SVE instructions. + return emitInsSve_R_F(ins, attr, reg, immDbl, opt); + + } // end switch (ins) + + assert(canEncode); + assert(fmt != IF_NONE); -void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) -{ - NYI("emitIns_C"); -} + instrDesc* id = emitNewInstrSC(attr, imm); -/***************************************************************************** - * - * Add an instruction referencing stack-based local variable. - */ + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); -void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) -{ - NYI("emitIns_S"); + id->idReg1(reg); + + dispIns(id); + appendToCurIG(id); } -/***************************************************************************** - * - * Add an instruction referencing a register and a stack-based local variable. - */ -void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +//------------------------------------------------------------------------ +// emitIns_Mov: Emits a move instruction +// +// Arguments: +// ins -- The instruction being emitted +// attr -- The emit attribute +// dstReg -- The destination register +// srcReg -- The source register +// canSkip -- true if the move can be elided when dstReg == srcReg, otherwise false +// insOpts -- The instruction options +// +void emitter::emitIns_Mov( + instruction ins, emitAttr attr, regNumber dstReg, regNumber srcReg, bool canSkip, insOpts opt /* = INS_OPTS_NONE */) { - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - unsigned scale = 0; - bool isLdrStr = false; - bool isSimple = true; - bool useRegForImm = false; - - assert(offs >= 0); - - /* Figure out the variable's frame position */ - bool FPbased; - int base = emitComp->lvaFrameAddress(varx, &FPbased); - int disp = base + offs; - ssize_t imm = disp; + assert(IsMovInstruction(ins)); - regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; - // TODO-ARM64-CQ: use unscaled loads? /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_strb: - case INS_ldrb: - case INS_ldrsb: - scale = 0; - break; - - case INS_strh: - case INS_ldrh: - case INS_ldrsh: - scale = 1; - break; - - case INS_ldrsw: - scale = 2; - break; - - case INS_str: - case INS_ldr: - assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size)); - scale = genLog2(EA_SIZE_IN_BYTES(size)); - isLdrStr = true; - break; + case INS_mov: + { + assert(insOptsNone(opt)); - case INS_lea: - assert(size == EA_8BYTE); - isSimple = false; - scale = 0; + if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) + { + // These instructions have no side effect and can be skipped + return; + } - if (disp >= 0) + // Check for the 'mov' aliases for the vector registers + if (isVectorRegister(dstReg)) { - ins = INS_add; + if (isVectorRegister(srcReg) && isValidVectorDatasize(size)) + { + return emitIns_R_R_R(INS_mov, size, dstReg, srcReg, srcReg); + } + else + { + return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0); + } } else { - ins = INS_sub; - imm = -disp; + if (isVectorRegister(srcReg)) + { + assert(isGeneralRegister(dstReg)); + return emitIns_R_R_I(INS_mov, size, dstReg, srcReg, 0); + } } - if (imm <= 0x0fff) + // Is this a MOV to/from SP instruction? + if ((dstReg == REG_SP) || (srcReg == REG_SP)) { - fmt = IF_DI_2A; // add reg1,reg2,#disp + assert(isGeneralRegisterOrSP(dstReg)); + assert(isGeneralRegisterOrSP(srcReg)); + dstReg = encodingSPtoZR(dstReg); + srcReg = encodingSPtoZR(srcReg); + fmt = IF_DR_2G; } else { - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - fmt = IF_DR_3A; // add reg1,reg2,rsvdReg + assert(insOptsNone(opt)); + assert(isGeneralRegister(dstReg)); + assert(isGeneralRegisterOrZR(srcReg)); + fmt = IF_DR_2E; } break; + } - case INS_sve_ldr: + case INS_sxtw: { - assert(isVectorRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_IE_2A; + assert((size == EA_8BYTE) || (size == EA_4BYTE)); + FALLTHROUGH; + } - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + case INS_sxtb: + case INS_sxth: + case INS_uxtb: + case INS_uxth: + { + if (canSkip && (dstReg == srcReg)) + { + // There are scenarios such as in genCallInstruction where the sign/zero extension should be elided + return; + } - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + assert(insOptsNone(opt)); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(dstReg)); + assert(isGeneralRegister(srcReg)); + fmt = IF_DR_2H; + break; + } + + case INS_fmov: + { + assert(isValidVectorElemsizeFloat(size)); + + if (canSkip && (dstReg == srcReg)) { - imm >>= scale; // The immediate is scaled by the size of the ld/st + // These instructions have no side effect and can be skipped + return; + } + + if (isVectorRegister(dstReg)) + { + if (isVectorRegister(srcReg)) + { + assert(insOptsNone(opt)); + fmt = IF_DV_2G; + } + else + { + assert(isGeneralRegister(srcReg)); + + // if the optional conversion specifier is not present we calculate it + if (opt == INS_OPTS_NONE) + { + opt = (size == EA_4BYTE) ? INS_OPTS_4BYTE_TO_S : INS_OPTS_8BYTE_TO_D; + } + assert(insOptsConvertIntToFloat(opt)); + + fmt = IF_DV_2I; + } } else { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + assert(isGeneralRegister(dstReg)); + assert(isVectorRegister(srcReg)); + + // if the optional conversion specifier is not present we calculate it + if (opt == INS_OPTS_NONE) + { + opt = (size == EA_4BYTE) ? INS_OPTS_S_TO_4BYTE : INS_OPTS_D_TO_8BYTE; + } + assert(insOptsConvertFloatToInt(opt)); + + fmt = IF_DV_2H; } + break; } - break; - // TODO-SVE: Fold into INS_sve_ldr once REG_V0 and REG_P0 are distinct - case INS_sve_ldr_mask: + default: { - assert(isPredicateRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_ID_2A; - ins = INS_sve_ldr; + unreached(); + } + } + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrSmall(attr); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(dstReg); + id->idReg2(srcReg); + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing two registers + */ + +void emitter::emitIns_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + insOpts opt /* = INS_OPTS_NONE */, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) +{ + if (IsMovInstruction(ins)) + { + assert(!"Please use emitIns_Mov() to correctly handle move elision"); + emitIns_Mov(ins, attr, reg1, reg2, /* canSkip */ false, opt); + } + + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; - // TODO-SVE: Don't assume 128bit vectors - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_dup: + // Vector operation + assert(insOptsAnyArrangement(opt)); + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_2C; + break; - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + case INS_abs: + case INS_not: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + if (ins == INS_not) { - imm >>= scale; // The immediate is scaled by the size of the ld/st + assert(isValidVectorDatasize(size)); + // Bitwise behavior is independent of element size, but is always encoded as 1 Byte + opt = optMakeArrangement(size, EA_1BYTE); + } + if (insOptsNone(opt)) + { + // Scalar operation + assert(size == EA_8BYTE); // Only type D is supported + fmt = IF_DV_2L; } else { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + // Vector operation + assert(insOptsAnyArrangement(opt)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + fmt = IF_DV_2M; } - } - break; - - default: - NYI("emitIns_R_S"); // FP locals? - return; + break; - } // end switch (ins) + case INS_mvn: + case INS_neg: + if (isVectorRegister(reg1)) + { + assert(isVectorRegister(reg2)); + if (ins == INS_mvn) + { + assert(isValidVectorDatasize(size)); + // Bitwise behavior is independent of element size, but is always encoded as 1 Byte + opt = optMakeArrangement(size, EA_1BYTE); + } + if (insOptsNone(opt)) + { + // Scalar operation + assert(size == EA_8BYTE); // Only type D is supported + fmt = IF_DV_2L; + } + else + { + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + fmt = IF_DV_2M; + } + break; + } + FALLTHROUGH; - assert((scale >= 0) && (scale <= 4)); + case INS_negs: + assert(insOptsNone(opt)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + fmt = IF_DR_2E; + break; - if (isSimple) - { - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + case INS_sxtl: + case INS_sxtl2: + case INS_uxtl: + case INS_uxtl2: + return emitIns_R_R_I(ins, size, reg1, reg2, 0, opt); - if (imm == 0) - { - fmt = IF_LS_2A; - } - else if ((imm < 0) || ((imm & mask) != 0)) - { - if (isValidSimm<9>(imm)) + case INS_cls: + case INS_clz: + case INS_rbit: + case INS_rev16: + case INS_rev32: + case INS_cnt: + if (isVectorRegister(reg1)) { - fmt = IF_LS_2C; + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + if ((ins == INS_cls) || (ins == INS_clz)) + { + assert(elemsize != EA_8BYTE); // No encoding for type D + } + else if (ins == INS_rev32) + { + assert((elemsize == EA_2BYTE) || (elemsize == EA_1BYTE)); + } + else + { + assert(elemsize == EA_1BYTE); // Only supports 8B or 16B + } + fmt = IF_DV_2M; + break; } - else + if (ins == INS_cnt) { - useRegForImm = true; + // Doesn't have general register version(s) + break; } - } - else if (imm > 0) - { - if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - fmt = IF_LS_2B; + FALLTHROUGH; + + case INS_rev: + assert(insOptsNone(opt)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + if (ins == INS_rev32) + { + assert(size == EA_8BYTE); } else { - useRegForImm = true; + assert(isValidGeneralDatasize(size)); } - } + fmt = IF_DR_2G; + break; - if (useRegForImm) - { - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - fmt = IF_LS_3A; - } - } + case INS_addv: + case INS_saddlv: + case INS_smaxv: + case INS_sminv: + case INS_uaddlv: + case INS_umaxv: + case INS_uminv: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + assert((opt != INS_OPTS_2S) && (opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // Reserved encodings + fmt = IF_DV_2T; + break; - assert(fmt != IF_NONE); + case INS_rev64: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(elemsize != EA_8BYTE); // No encoding for type D + fmt = IF_DV_2M; + break; - // Try to optimize a load/store with an alternative instruction. - if (isLdrStr && emitComp->opts.OptimizationEnabled() && - OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm))) - { - return; - } + case INS_sqxtn: + case INS_sqxtun: + case INS_uqxtn: + if (insOptsNone(opt)) + { + // Scalar operation + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorElemsize(size)); + assert(size != EA_8BYTE); // The encoding size = 11 is reserved. + fmt = IF_DV_2L; + break; + } + FALLTHROUGH; - instrDesc* id = emitNewInstrCns(attr, imm); + case INS_xtn: + // Vector operation + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(size == EA_8BYTE); + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved + fmt = IF_DV_2M; + break; - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + case INS_sqxtn2: + case INS_sqxtun2: + case INS_uqxtn2: + case INS_xtn2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(size == EA_16BYTE); + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved + fmt = IF_DV_2M; + break; - id->idReg1(reg1); - id->idReg2(reg2); - id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); - id->idSetIsLclVar(); + case INS_ldar: + case INS_ldapr: + case INS_ldaxr: + case INS_ldxr: + case INS_stlr: + assert(isValidGeneralDatasize(size)); -#ifdef DEBUG - id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; -#endif + FALLTHROUGH; - dispIns(id); - appendToCurIG(id); -} + case INS_ldarb: + case INS_ldaprb: + case INS_ldaxrb: + case INS_ldxrb: + case INS_ldarh: + case INS_ldaprh: + case INS_ldaxrh: + case INS_ldxrh: + case INS_stlrb: + case INS_stlrh: + assert(isValidGeneralLSDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrSP(reg2)); + assert(insOptsNone(opt)); -/***************************************************************************** - * - * Add an instruction referencing two register and consecutive stack-based local variable slots. - */ -void emitter::emitIns_R_R_S_S( - instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) -{ - assert((ins == INS_ldp) || (ins == INS_ldnp)); - assert(EA_8BYTE == EA_SIZE(attr1)); - assert(EA_8BYTE == EA_SIZE(attr2)); - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert(offs >= 0); + reg2 = encodingSPtoZR(reg2); - insFormat fmt = IF_LS_3B; - int disp = 0; - const unsigned scale = 3; + fmt = IF_LS_2A; + break; - /* Figure out the variable's frame position */ - int base; - bool FPbased; + case INS_ldr: + case INS_ldrb: + case INS_ldrh: + case INS_ldrsb: + case INS_ldrsh: + case INS_ldrsw: + case INS_str: + case INS_strb: + case INS_strh: + case INS_cmn: + case INS_tst: + assert(insOptsNone(opt)); + emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_OPTS_NONE); + return; - base = emitComp->lvaFrameAddress(varx, &FPbased); - disp = base + offs; + case INS_cmp: + emitIns_R_R_I(ins, attr, reg1, reg2, 0, opt); + return; - // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? - regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE; - reg3 = encodingSPtoZR(reg3); + case INS_staddb: + emitIns_R_R_R(INS_ldaddb, attr, reg1, REG_ZR, reg2); + return; + case INS_staddlb: + emitIns_R_R_R(INS_ldaddlb, attr, reg1, REG_ZR, reg2); + return; + case INS_staddh: + emitIns_R_R_R(INS_ldaddh, attr, reg1, REG_ZR, reg2); + return; + case INS_staddlh: + emitIns_R_R_R(INS_ldaddlh, attr, reg1, REG_ZR, reg2); + return; + case INS_stadd: + emitIns_R_R_R(INS_ldadd, attr, reg1, REG_ZR, reg2); + return; + case INS_staddl: + emitIns_R_R_R(INS_ldaddl, attr, reg1, REG_ZR, reg2); + return; - bool useRegForAdr = true; - ssize_t imm = disp; - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (imm == 0) - { - useRegForAdr = false; - } - else - { - if ((imm & mask) == 0) - { - ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st + case INS_fcmp: + case INS_fcmpe: + assert(insOptsNone(opt)); + assert(isValidVectorElemsizeFloat(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2K; + break; - if ((immShift >= -64) && (immShift <= 63)) + case INS_fcvtns: + case INS_fcvtnu: + case INS_fcvtas: + case INS_fcvtau: + case INS_fcvtps: + case INS_fcvtpu: + case INS_fcvtms: + case INS_fcvtmu: + case INS_fcvtzs: + case INS_fcvtzu: + if (insOptsAnyArrangement(opt)) { - fmt = IF_LS_3C; - useRegForAdr = false; - imm = immShift; + // Vector operation + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsizeFloat(elemsize)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_2A; } - } - } - - if (useRegForAdr) - { - regNumber rsvd = codeGen->rsGetRsvdReg(); - emitIns_R_R_Imm(INS_add, EA_PTRSIZE, rsvd, reg3, imm); - reg3 = rsvd; - imm = 0; - } - - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrCns(attr1, imm); + else + { + // Scalar operation + assert(isVectorRegister(reg2)); + if (isVectorRegister(reg1)) + { + assert(insOptsNone(opt)); + assert(isValidVectorElemsizeFloat(size)); + fmt = IF_DV_2G; + } + else + { + assert(isGeneralRegister(reg1)); + assert(insOptsConvertFloatToInt(opt)); + assert(isValidVectorElemsizeFloat(size)); + fmt = IF_DV_2H; + } + } + break; - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + case INS_fcvtl: + case INS_fcvtn: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(size == EA_8BYTE); + assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); + fmt = IF_DV_2A; + break; - // Record the attribute for the second register in the pair - if (EA_IS_GCREF(attr2)) - { - id->idGCrefReg2(GCT_GCREF); - } - else if (EA_IS_BYREF(attr2)) - { - id->idGCrefReg2(GCT_BYREF); - } - else - { - id->idGCrefReg2(GCT_NONE); - } + case INS_fcvtl2: + case INS_fcvtn2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(size == EA_16BYTE); + assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S)); + fmt = IF_DV_2A; + break; - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); - id->idSetIsLclVar(); + case INS_fcvtxn: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); -#ifdef DEBUG - id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; -#endif + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(size == EA_8BYTE); + assert(opt == INS_OPTS_2S); + fmt = IF_DV_2A; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(size == EA_4BYTE); + fmt = IF_DV_2G; + } + break; - dispIns(id); - appendToCurIG(id); -} + case INS_fcvtxn2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(size == EA_16BYTE); + assert(opt == INS_OPTS_4S); + fmt = IF_DV_2A; + break; -/***************************************************************************** - * - * Add an instruction referencing a stack-based local variable and a register - */ -void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) -{ - assert(offs >= 0); - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - unsigned scale = 0; - bool isVectorStore = false; - bool isStr = false; - bool isSimple = true; - bool useRegForImm = false; + case INS_scvtf: + case INS_ucvtf: + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsizeFloat(elemsize)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_2A; + } + else + { + // Scalar operation + assert(isVectorRegister(reg1)); + if (isVectorRegister(reg2)) + { + assert(insOptsNone(opt)); + assert(isValidVectorElemsizeFloat(size)); + fmt = IF_DV_2G; + } + else + { + assert(isGeneralRegister(reg2)); + assert(insOptsConvertIntToFloat(opt)); + assert(isValidVectorElemsizeFloat(size)); + fmt = IF_DV_2I; + } + } + break; - /* Figure out the variable's frame position */ - bool FPbased; - int base = emitComp->lvaFrameAddress(varx, &FPbased); - int disp = base + offs; - ssize_t imm = disp; + case INS_fabs: + case INS_fneg: + case INS_fsqrt: + case INS_frinta: + case INS_frinti: + case INS_frintm: + case INS_frintn: + case INS_frintp: + case INS_frintx: + case INS_frintz: + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsizeFloat(elemsize)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_2A; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(isValidVectorElemsizeFloat(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2G; + } + break; - // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? - regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); + case INS_faddp: + case INS_fmaxnmp: + case INS_fmaxp: + case INS_fminnmp: + case INS_fminp: + // Scalar operation + assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_2D))); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2Q; + break; - // TODO-ARM64-CQ: use unscaled loads? - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_strb: - scale = 0; - assert(isGeneralRegisterOrZR(reg1)); + case INS_fmaxnmv: + case INS_fmaxv: + case INS_fminnmv: + case INS_fminv: + assert(size == EA_16BYTE); + assert(opt == INS_OPTS_4S); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2R; break; - case INS_strh: - scale = 1; - assert(isGeneralRegisterOrZR(reg1)); + case INS_addp: + assert(size == EA_16BYTE); + assert(opt == INS_OPTS_2D); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2S; break; - case INS_str: - if (isGeneralRegisterOrZR(reg1)) + case INS_fcvt: + assert(insOptsConvertFloatToFloat(opt)); + assert(isValidVectorFcvtsize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2J; + break; + + case INS_cmeq: + case INS_cmge: + case INS_cmgt: + case INS_cmle: + case INS_cmlt: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + + if (insOptsAnyArrangement(opt)) { - assert(isValidGeneralDatasize(size)); - scale = (size == EA_8BYTE) ? 3 : 2; + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + fmt = IF_DV_2M; } else { - assert(isVectorRegister(reg1)); - assert(isValidVectorLSDatasize(size)); - scale = NaturalScale_helper(size); - isVectorStore = true; + // Scalar operation + assert(size == EA_8BYTE); + assert(insOptsNone(opt)); + fmt = IF_DV_2L; } - isStr = true; break; - case INS_sve_str: - { + case INS_fcmeq: + case INS_fcmge: + case INS_fcmgt: + case INS_fcmle: + case INS_fcmlt: + case INS_frecpe: + case INS_frsqrte: assert(isVectorRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_JH_2A; - - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + assert(isVectorRegister(reg2)); - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + if (insOptsAnyArrangement(opt)) { - imm >>= scale; // The immediate is scaled by the size of the ld/st + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsizeFloat(elemsize)); // Only Double/Float supported + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_2A; } else { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + // Scalar operation + assert(isValidScalarDatasize(size)); // Only Double/Float supported + assert(insOptsNone(opt)); + fmt = IF_DV_2G; } - } - break; + break; - // TODO-SVE: Fold into INS_sve_str once REG_V0 and REG_P0 are distinct - case INS_sve_str_mask: - { - assert(isPredicateRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_JG_2A; - ins = INS_sve_str; + case INS_aesd: + case INS_aese: + case INS_aesmc: + case INS_aesimc: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + elemsize = optGetElemsize(opt); + assert(elemsize == EA_1BYTE); + fmt = IF_DV_2P; + break; - // TODO-SVE: Don't assume 128bit vectors - // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + case INS_sha1h: + assert(insOptsNone(opt)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + fmt = IF_DV_2U; + break; + + case INS_sha256su0: + case INS_sha1su1: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + elemsize = optGetElemsize(opt); + assert(elemsize == EA_4BYTE); + fmt = IF_DV_2P; + break; + + case INS_ld2: + case INS_ld3: + case INS_ld4: + case INS_st2: + case INS_st3: + case INS_st4: + assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1 + FALLTHROUGH; + + case INS_ld1: + case INS_ld1_2regs: + case INS_ld1_3regs: + case INS_ld1_4regs: + case INS_st1: + case INS_st1_2regs: + case INS_st1_3regs: + case INS_st1_4regs: + case INS_ld1r: + case INS_ld2r: + case INS_ld3r: + case INS_ld4r: + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrSP(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + + // Load/Store multiple structures base register + // Load single structure and replicate base register + reg2 = encodingSPtoZR(reg2); + fmt = IF_LS_2D; + break; + + case INS_urecpe: + case INS_ursqrte: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(elemsize == EA_4BYTE); + fmt = IF_DV_2A; + break; + + case INS_frecpx: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidScalarDatasize(size)); + assert(insOptsNone(opt)); + fmt = IF_DV_2G; + break; + + case INS_sadalp: + case INS_saddlp: + case INS_uadalp: + case INS_uaddlp: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidArrangement(size, opt)); + assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved + fmt = IF_DV_2T; + break; + + case INS_sqabs: + case INS_sqneg: + case INS_suqadd: + case INS_usqadd: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + if (insOptsAnyArrangement(opt)) { - imm >>= scale; // The immediate is scaled by the size of the ld/st + // Vector operation + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved + fmt = IF_DV_2M; } else { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + // Scalar operation + assert(insOptsNone(opt)); + assert(isValidVectorElemsize(size)); + fmt = IF_DV_2L; } - } - break; + break; default: - NYI("emitIns_S_R"); // FP locals? - return; + // fallback to emit SVE instructions. + return emitInsSve_R_R(ins, attr, reg1, reg2, opt, sopt); } // end switch (ins) - if (isVectorStore || !isSimple) - { - assert(scale <= 4); - } - else - { - assert(scale <= 3); - } - - if (isSimple) - { - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - - if (imm == 0) - { - fmt = IF_LS_2A; - } - else if ((imm < 0) || ((imm & mask) != 0)) - { - if (isValidSimm<9>(imm)) - { - fmt = IF_LS_2C; - } - else - { - useRegForImm = true; - } - } - else if (imm > 0) - { - if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - fmt = IF_LS_2B; - } - else - { - useRegForImm = true; - } - } - - if (useRegForImm) - { - // The reserved register is not stored in idReg3() since that field overlaps with iiaLclVar. - // It is instead implicit when idSetIsLclVar() is set, with this encoding format. - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - fmt = IF_LS_3A; - } - } - assert(fmt != IF_NONE); - // Try to optimize a store with an alternative instruction. - if (isStr && emitComp->opts.OptimizationEnabled() && - OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm))) - { - return; - } - - instrDesc* id = emitNewInstrCns(attr, imm); + instrDesc* id = emitNewInstrSmall(attr); id->idIns(ins); id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + id->idInsOpt(opt); id->idReg1(reg1); id->idReg2(reg2); - id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); - id->idSetIsLclVar(); - -#ifdef DEBUG - id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; -#endif dispIns(id); appendToCurIG(id); @@ -10249,200 +4947,107 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va /***************************************************************************** * - * Add an instruction referencing consecutive stack-based local variable slots and two registers + * Add an instruction referencing a register and two constants. */ -void emitter::emitIns_S_S_R_R( - instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) -{ - assert((ins == INS_stp) || (ins == INS_stnp)); - assert(EA_8BYTE == EA_SIZE(attr1)); - assert(EA_8BYTE == EA_SIZE(attr2)); - assert(isGeneralRegisterOrZR(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert(offs >= 0); - - insFormat fmt = IF_LS_3B; - int disp = 0; - const unsigned scale = 3; - /* Figure out the variable's frame position */ - int base; - bool FPbased; +void emitter::emitIns_R_I_I(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t imm1, + ssize_t imm2, + insOpts opt /* = INS_OPTS_NONE */ + DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = 0 */)) +{ + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + size_t immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc - base = emitComp->lvaFrameAddress(varx, &FPbased); - disp = base + offs; + /* Figure out the encoding format of the instruction */ + switch (ins) + { + bool canEncode; + halfwordImm hwi; - // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? - regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE; + case INS_mov: + ins = INS_movz; // INS_mov with LSL is an alias for INS_movz LSL + FALLTHROUGH; - bool useRegForAdr = true; - ssize_t imm = disp; - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (imm == 0) - { - useRegForAdr = false; - } - else - { - if ((imm & mask) == 0) - { - ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st + case INS_movk: + case INS_movn: + case INS_movz: + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg)); + assert(isValidUimm<16>(imm1)); + assert(insOptsLSL(opt)); // Must be INS_OPTS_LSL - if ((immShift >= -64) && (immShift <= 63)) + if (size == EA_8BYTE) { - fmt = IF_LS_3C; - useRegForAdr = false; - imm = immShift; + assert((imm2 == 0) || (imm2 == 16) || // shift amount: 0, 16, 32 or 48 + (imm2 == 32) || (imm2 == 48)); + } + else // EA_4BYTE + { + assert((imm2 == 0) || (imm2 == 16)); // shift amount: 0 or 16 } - } - } - - if (useRegForAdr) - { - regNumber rsvd = codeGen->rsGetRsvdReg(); - emitIns_R_R_Imm(INS_add, EA_PTRSIZE, rsvd, reg3, imm); - reg3 = rsvd; - imm = 0; - } - - assert(fmt != IF_NONE); - - instrDesc* id = emitNewInstrCns(attr1, imm); - - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - - // Record the attribute for the second register in the pair - if (EA_IS_GCREF(attr2)) - { - id->idGCrefReg2(GCT_GCREF); - } - else if (EA_IS_BYREF(attr2)) - { - id->idGCrefReg2(GCT_BYREF); - } - else - { - id->idGCrefReg2(GCT_NONE); - } - - reg3 = encodingSPtoZR(reg3); - - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); - id->idSetIsLclVar(); - -#ifdef DEBUG - id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; -#endif - dispIns(id); - appendToCurIG(id); -} + hwi.immHWVal = 0; -/***************************************************************************** - * - * Add an instruction referencing stack-based local variable and an immediate - */ -void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val) -{ - NYI("emitIns_S_I"); -} + switch (imm2) + { + case 0: + hwi.immHW = 0; + canEncode = true; + break; -/***************************************************************************** - * - * Add an instruction with a register + static member operands. - * Constant is stored into JIT data which is adjacent to code. - * No relocation is needed. PC-relative offset will be encoded directly into instruction. - * - */ -void emitter::emitIns_R_C( - instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs) -{ - assert(offs >= 0); - assert(instrDesc::fitsInSmallCns(offs)); + case 16: + hwi.immHW = 1; + canEncode = true; + break; - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - instrDescJmp* id = emitNewInstrJmp(); + case 32: + hwi.immHW = 2; + canEncode = true; + break; - switch (ins) - { - case INS_adr: - // This is case to get address to the constant data. - fmt = IF_LARGEADR; - assert(isGeneralRegister(reg)); - assert(isValidGeneralDatasize(size)); - break; + case 48: + hwi.immHW = 3; + canEncode = true; + break; - case INS_ldr: - fmt = IF_LARGELDC; - if (isVectorRegister(reg)) - { - assert(isValidVectorLSDatasize(size)); - // For vector (float/double) register, we should have an integer address reg to - // compute long address which consists of page address and page offset. - // For integer constant, this is not needed since the dest reg can be used to - // compute address as well as contain the final contents. - assert(isGeneralRegister(reg) || (addrReg != REG_NA)); + default: + canEncode = false; } - else + + if (canEncode) { - assert(isGeneralRegister(reg)); - assert(isValidGeneralDatasize(size)); + hwi.immVal = imm1; + + immOut = hwi.immHWVal; + assert(isValidImmHWVal(immOut, size)); + fmt = IF_DI_1B; } break; default: - unreached(); - } + // fallback to emit SVE instructions. + return emitInsSve_R_I_I(ins, attr, reg, imm1, imm2, opt); + + } // end switch (ins) assert(fmt != IF_NONE); + instrDesc* id = emitNewInstrSC(attr, immOut); + id->idIns(ins); id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - id->idSmallCns(offs); - id->idOpSize(size); - id->idAddr()->iiaFieldHnd = fldHnd; - id->idSetIsBound(); // We won't patch address since we will know the exact distance once JIT code and data are - // allocated together. - - id->idReg1(reg); // destination register that will get the constant value. - if (addrReg != REG_NA) - { - id->idReg2(addrReg); // integer register to compute long address (used for vector dest when we end up with long - // address) - } - id->idjShort = false; // Assume loading constant from long address + id->idInsOpt(opt); - // Keep it long if it's in cold code. - id->idjKeepLong = emitComp->fgIsBlockCold(emitComp->compCurBB); + id->idReg1(reg); #ifdef DEBUG - if (emitComp->opts.compLongAddress) - id->idjKeepLong = 1; -#endif // DEBUG - - // If it's possible to be shortened, then put it in jump list - // to be revisited by emitJumpDistBind. - if (!id->idjKeepLong) - { - /* Record the jump's IG and offset within it */ - id->idjIG = emitCurIG; - id->idjOffs = emitCurIGsize; - - /* Append this jump to this IG's jump list */ - id->idjNext = emitCurIGjmpList; - emitCurIGjmpList = id; - -#if EMITTER_STATS - emitTotalIGjmps++; + id->idDebugOnlyInfo()->idFlags = gtFlags; + id->idDebugOnlyInfo()->idMemCookie = targetHandle; #endif - } dispIns(id); appendToCurIG(id); @@ -10450,10523 +5055,9772 @@ void emitter::emitIns_R_C( /***************************************************************************** * - * Add an instruction with a static member + constant. - */ - -void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val) -{ - NYI("emitIns_C_I"); -} - -/***************************************************************************** - * - * Add an instruction with a static member + register operands. + * Add an instruction referencing two registers and a constant. */ -void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs) -{ - assert(!"emitIns_C_R not supported for RyuJIT backend"); -} - -void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) -{ - NYI("emitIns_R_AR"); -} - -// This generates code to populate the access for TLS on linux -void emitter::emitIns_Adrp_Ldr_Add(emitAttr attr, - regNumber reg1, - regNumber reg2, - ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +void emitter::emitIns_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - assert(emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)); - assert(TargetOS::IsUnix); - assert(EA_IS_RELOC(attr)); - assert(EA_IS_CNS_TLSGD_RELOC(attr)); - - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_DI_1E; - bool needAdd = false; - instrDescJmp* id = emitNewInstrJmp(); - - // adrp - id->idIns(INS_adrp); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - id->idOpSize(size); - id->idAddr()->iiaAddr = (BYTE*)addr; - id->idReg1(reg1); - id->idSetIsDspReloc(); - id->idSetTlsGD(); + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool isLdSt = false; + bool isLdrStr = false; + bool isSIMD = false; + bool isAddSub = false; + bool setFlags = false; + unsigned scale = 0; + bool unscaledOp = false; -#ifdef DEBUG - id->idDebugOnlyInfo()->idMemCookie = targetHandle; - id->idDebugOnlyInfo()->idFlags = gtFlags; -#endif + /* Figure out the encoding format of the instruction */ + switch (ins) + { + bool canEncode; + bitMaskImm bmi; + unsigned registerListSize; + bool isRightShift; - dispIns(id); - appendToCurIG(id); + case INS_mov: + // Check for the 'mov' aliases for the vector registers + assert(insOptsNone(opt)); + assert(isValidVectorElemsize(size)); + elemsize = size; + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - // ldr - emitIns_R_R_I(INS_ldr, attr, reg2, reg1, (ssize_t)addr); + if (isVectorRegister(reg1)) + { + if (isGeneralRegisterOrZR(reg2)) + { + fmt = IF_DV_2C; // Alias for 'ins' + break; + } + else if (isVectorRegister(reg2)) + { + fmt = IF_DV_2E; // Alias for 'dup' + break; + } + } + else // isGeneralRegister(reg1) + { + assert(isGeneralRegister(reg1)); + if (isVectorRegister(reg2)) + { + fmt = IF_DV_2B; // Alias for 'umov' + break; + } + } + assert(!" invalid INS_mov operands"); + break; - // add - fmt = IF_DI_2A; - instrDesc* addId = emitNewInstr(attr); - assert(id->idIsReloc()); + case INS_lsl: + case INS_lsr: + case INS_asr: + assert(insOptsNone(opt)); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + assert(isValidImmShift(imm, size)); + fmt = IF_DI_2D; + break; - addId->idIns(INS_add); - addId->idInsFmt(fmt); - addId->idInsOpt(INS_OPTS_NONE); - addId->idOpSize(size); - addId->idAddr()->iiaAddr = (BYTE*)addr; - addId->idReg1(reg1); - addId->idReg2(reg1); - addId->idSetTlsGD(); + case INS_ror: + assert(insOptsNone(opt)); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + assert(isValidImmShift(imm, size)); + fmt = IF_DI_2B; + break; - dispIns(addId); - appendToCurIG(addId); -} + case INS_shl: + case INS_sli: + case INS_sri: + case INS_srshr: + case INS_srsra: + case INS_sshr: + case INS_ssra: + case INS_urshr: + case INS_ursra: + case INS_ushr: + case INS_usra: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + isRightShift = emitInsIsVectorRightShift(ins); -// This computes address from the immediate which is relocatable. -void emitter::emitIns_R_AI(instruction ins, - emitAttr attr, - regNumber ireg, - ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) -{ - assert(EA_IS_RELOC(attr)); - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_DI_1E; - bool needAdd = false; - instrDescJmp* id = emitNewInstrJmp(); + assert(!isRightShift || + (imm != 0 && "instructions for vector right-shift do not allow zero as an immediate value")); - switch (ins) - { - case INS_adrp: - // This computes page address. - // page offset is needed using add. - needAdd = true; + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_2O; + break; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(size == EA_8BYTE); // only supported size + assert(isValidVectorShiftAmount(imm, size, isRightShift)); + fmt = IF_DV_2N; + } break; - case INS_adr: + + case INS_sqshl: + case INS_uqshl: + case INS_sqshlu: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + isRightShift = emitInsIsVectorRightShift(ins); + + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_1D); // The encoding immh = 1xxx, Q = 0 is reserved + elemsize = optGetElemsize(opt); + assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); + fmt = IF_DV_2O; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(isValidVectorElemsize(size)); + assert(isValidVectorShiftAmount(imm, size, isRightShift)); + fmt = IF_DV_2N; + } break; - default: - unreached(); - } - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - id->idOpSize(size); - id->idAddr()->iiaAddr = (BYTE*)addr; - id->idReg1(ireg); - id->idSetIsDspReloc(); -#ifdef DEBUG - id->idDebugOnlyInfo()->idMemCookie = targetHandle; - id->idDebugOnlyInfo()->idFlags = gtFlags; -#endif + case INS_sqrshrn: + case INS_sqrshrun: + case INS_sqshrn: + case INS_sqshrun: + case INS_uqrshrn: + case INS_uqshrn: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + isRightShift = emitInsIsVectorRightShift(ins); - dispIns(id); - appendToCurIG(id); + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidArrangement(size, opt)); + assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding immh = 1xxx, Q = x is reserved + elemsize = optGetElemsize(opt); + assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); + fmt = IF_DV_2O; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(isValidVectorElemsize(size)); + assert(size != EA_8BYTE); // The encoding immh = 1xxx is reserved + assert(isValidVectorShiftAmount(imm, size, isRightShift)); + fmt = IF_DV_2N; + } + break; - if (needAdd) - { - // add reg, reg, imm - ins = INS_add; - fmt = IF_DI_2A; - instrDesc* id = emitNewInstr(attr); - assert(id->idIsReloc()); + case INS_sxtl: + case INS_uxtl: + assert(imm == 0); + FALLTHROUGH; - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); - id->idOpSize(size); - id->idAddr()->iiaAddr = (BYTE*)addr; - id->idReg1(ireg); - id->idReg2(ireg); + case INS_rshrn: + case INS_shrn: + case INS_sshll: + case INS_ushll: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + isRightShift = emitInsIsVectorRightShift(ins); + // Vector operation + assert(size == EA_8BYTE); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(elemsize != EA_8BYTE); // Reserved encodings + assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); + fmt = IF_DV_2O; + break; - dispIns(id); - appendToCurIG(id); - } -} + case INS_sxtl2: + case INS_uxtl2: + assert(imm == 0); + FALLTHROUGH; -void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) -{ - NYI("emitIns_AR_R"); -} + case INS_rshrn2: + case INS_shrn2: + case INS_sqrshrn2: + case INS_sqrshrun2: + case INS_sqshrn2: + case INS_sqshrun2: + case INS_sshll2: + case INS_uqrshrn2: + case INS_uqshrn2: + case INS_ushll2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + isRightShift = emitInsIsVectorRightShift(ins); -void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) -{ - NYI("emitIns_R_ARR"); -} + // Vector operation + assert(size == EA_16BYTE); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(elemsize != EA_8BYTE); // The encoding immh = 1xxx, Q = x is reserved + assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorShiftAmount(imm, elemsize, isRightShift)); + fmt = IF_DV_2O; + break; -void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) -{ - NYI("emitIns_R_ARR"); -} + case INS_mvn: + case INS_neg: + case INS_negs: + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrZR(reg2)); -void emitter::emitIns_R_ARX( - instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp) -{ - NYI("emitIns_R_ARR"); -} + if (imm == 0) + { + assert(insOptsNone(opt)); // a zero imm, means no alu shift kind -/***************************************************************************** - * - * Record that a jump instruction uses the short encoding - * - */ -void emitter::emitSetShortJump(instrDescJmp* id) -{ - if (id->idjKeepLong) - return; + fmt = IF_DR_2E; + } + else + { + if (ins == INS_mvn) + { + assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind + } + else // neg or negs + { + assert(insOptsAluShift(opt)); // a non-zero imm, must select shift kind, can't use ROR + } + assert(isValidImmShift(imm, size)); + fmt = IF_DR_2F; + } + break; - insFormat fmt = IF_NONE; - if (emitIsCondJump(id)) - { - switch (id->idIns()) - { - case INS_cbz: - case INS_cbnz: - fmt = IF_BI_1A; - break; - case INS_tbz: - case INS_tbnz: - fmt = IF_BI_1B; - break; - default: - fmt = IF_BI_0B; - break; - } - } - else if (emitIsLoadLabel(id)) - { - fmt = IF_DI_1E; - } - else if (emitIsLoadConstant(id)) - { - fmt = IF_LS_1A; - } - else - { - unreached(); - } + case INS_tst: + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegister(reg2)); - id->idInsFmt(fmt); - id->idjShort = true; -} + if (insOptsAnyShift(opt)) + { + assert(isValidImmShift(imm, size) && (imm != 0)); + fmt = IF_DR_2B; + } + else + { + assert(insOptsNone(opt)); // a zero imm, means no alu shift kind + assert(imm == 0); + fmt = IF_DR_2A; + } + break; -/***************************************************************************** - * - * Add a label instruction. - */ + case INS_cmp: + case INS_cmn: + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegisterOrSP(reg1)); + assert(isGeneralRegister(reg2)); + + reg1 = encodingSPtoZR(reg1); + if (insOptsAnyExtend(opt)) + { + assert((imm >= 0) && (imm <= 4)); -void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) -{ - assert(dst->HasFlag(BBF_HAS_LABEL)); + fmt = IF_DR_2C; + } + else if (imm == 0) + { + assert(insOptsNone(opt)); // a zero imm, means no alu shift kind - insFormat fmt = IF_NONE; + fmt = IF_DR_2A; + } + else + { + assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind + assert(isValidImmShift(imm, size)); + fmt = IF_DR_2B; + } + break; - switch (ins) - { - case INS_adr: - fmt = IF_LARGEADR; + case INS_ands: + case INS_and: + case INS_eor: + case INS_orr: + assert(insOptsNone(opt)); + assert(isGeneralRegister(reg2)); + if (ins == INS_ands) + { + assert(isGeneralRegister(reg1)); + } + else + { + assert(isGeneralRegisterOrSP(reg1)); + reg1 = encodingSPtoZR(reg1); + } + + bmi.immNRS = 0; + canEncode = canEncodeBitMaskImm(imm, size, &bmi); + if (canEncode) + { + imm = bmi.immNRS; + assert(isValidImmNRS(imm, size)); + fmt = IF_DI_2C; + } break; - default: - unreached(); - } - instrDescJmp* id = emitNewInstrJmp(); + case INS_dup: // by element, imm selects the element of reg2 + assert(isVectorRegister(reg1)); + if (isVectorRegister(reg2)) + { + if (insOptsAnyArrangement(opt)) + { + // The size and opt were modified to be based on the + // return type but the immediate is based on the operand + // which can be of a larger size. As such, we don't + // assert the index is valid here and instead do it in + // codegen. - id->idIns(ins); - id->idInsFmt(fmt); - id->idjShort = false; - id->idAddr()->iiaBBlabel = dst; - id->idReg1(reg); - id->idOpSize(EA_PTRSIZE); + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsize(elemsize)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_2D; + break; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + elemsize = size; + assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + fmt = IF_DV_2E; + break; + } + } + FALLTHROUGH; -#ifdef DEBUG - // Mark the catch return - if (emitComp->compCurBB->KindIs(BBJ_EHCATCHRET)) - { - id->idDebugOnlyInfo()->idCatchRet = true; - } -#endif // DEBUG + case INS_ins: // (MOV from general) + assert(insOptsNone(opt)); + assert(isValidVectorElemsize(size)); + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + elemsize = size; + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + fmt = IF_DV_2C; + break; - id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); + case INS_umov: // (MOV to general) + assert(insOptsNone(opt)); + assert(isValidVectorElemsize(size)); + assert(isGeneralRegister(reg1)); + assert(isVectorRegister(reg2)); + elemsize = size; + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + fmt = IF_DV_2B; + break; -#ifdef DEBUG - if (emitComp->opts.compLongAddress) - id->idjKeepLong = 1; -#endif // DEBUG + case INS_smov: + assert(insOptsNone(opt)); + assert(isValidVectorElemsize(size)); + assert(size != EA_8BYTE); // no encoding, use INS_umov + assert(isGeneralRegister(reg1)); + assert(isVectorRegister(reg2)); + elemsize = size; + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + fmt = IF_DV_2B; + break; - /* Record the jump's IG and offset within it */ + case INS_add: + case INS_sub: + setFlags = false; + isAddSub = true; + break; - id->idjIG = emitCurIG; - id->idjOffs = emitCurIGsize; + case INS_adds: + case INS_subs: + setFlags = true; + isAddSub = true; + break; - /* Append this jump to this IG's jump list */ + case INS_ldrsb: + case INS_ldursb: + // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register + assert(isValidGeneralDatasize(size)); + unscaledOp = (ins == INS_ldursb); + scale = 0; + isLdSt = true; + break; - id->idjNext = emitCurIGjmpList; - emitCurIGjmpList = id; + case INS_ldrsh: + case INS_ldursh: + // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register + assert(isValidGeneralDatasize(size)); + unscaledOp = (ins == INS_ldursh); + scale = 1; + isLdSt = true; + break; -#if EMITTER_STATS - emitTotalIGjmps++; -#endif + case INS_ldrsw: + case INS_ldursw: + // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register + assert(size == EA_8BYTE); + unscaledOp = (ins == INS_ldursw); + scale = 2; + isLdSt = true; + break; - dispIns(id); - appendToCurIG(id); -} + case INS_ldrb: + case INS_strb: + // size is ignored + unscaledOp = false; + scale = 0; + isLdSt = true; + break; -/***************************************************************************** - * - * Add a data label instruction. - */ + case INS_ldapurb: + case INS_stlurb: + case INS_ldurb: + case INS_sturb: + // size is ignored + unscaledOp = true; + scale = 0; + isLdSt = true; + break; -void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg) -{ - NYI("emitIns_R_D"); -} + case INS_ldrh: + case INS_strh: + // size is ignored + unscaledOp = false; + scale = 1; + isLdSt = true; + break; -void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) -{ - assert((ins == INS_cbz) || (ins == INS_cbnz)); + case INS_ldurh: + case INS_ldapurh: + case INS_sturh: + case INS_stlurh: + // size is ignored + unscaledOp = true; + scale = 0; + isLdSt = true; + break; - assert(dst != nullptr); - assert(dst->HasFlag(BBF_HAS_LABEL)); + case INS_ldr: + case INS_str: + // Is the target a vector register? + if (isVectorRegister(reg1)) + { + assert(isValidVectorLSDatasize(size)); + assert(isGeneralRegisterOrSP(reg2)); + isSIMD = true; + } + else + { + assert(isValidGeneralDatasize(size)); + } + unscaledOp = false; + scale = NaturalScale_helper(size); + isLdSt = true; + isLdrStr = true; + break; - insFormat fmt = IF_LARGEJMP; + case INS_ldur: + case INS_stur: + case INS_ldapur: + case INS_stlur: + // Is the target a vector register? + if (isVectorRegister(reg1)) + { + assert(isValidVectorLSDatasize(size)); + assert(isGeneralRegisterOrSP(reg2)); + isSIMD = true; + } + else + { + assert(isValidGeneralDatasize(size)); + } + unscaledOp = true; + scale = 0; + isLdSt = true; + break; - instrDescJmp* id = emitNewInstrJmp(); + case INS_ld2: + case INS_ld3: + case INS_ld4: + case INS_st2: + case INS_st3: + case INS_st4: + assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1 + FALLTHROUGH; - id->idIns(ins); - id->idInsFmt(fmt); - id->idReg1(reg); - id->idjShort = false; - id->idOpSize(EA_SIZE(attr)); + case INS_ld1: + case INS_ld1_2regs: + case INS_ld1_3regs: + case INS_ld1_4regs: + case INS_st1: + case INS_st1_2regs: + case INS_st1_3regs: + case INS_st1_4regs: + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrSP(reg2)); - id->idAddr()->iiaBBlabel = dst; - id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); + reg2 = encodingSPtoZR(reg2); - /* Record the jump's IG and offset within it */ + if (insOptsAnyArrangement(opt)) + { + registerListSize = insGetRegisterListSize(ins); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + assert((size * registerListSize) == imm); - id->idjIG = emitCurIG; - id->idjOffs = emitCurIGsize; + // Load/Store multiple structures post-indexed by an immediate + fmt = IF_LS_2E; + } + else + { + assert(insOptsNone(opt)); + assert((ins != INS_ld1_2regs) && (ins != INS_ld1_3regs) && (ins != INS_ld1_4regs) && + (ins != INS_st1_2regs) && (ins != INS_st1_3regs) && (ins != INS_st1_4regs)); - /* Append this jump to this IG's jump list */ + elemsize = size; + assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - id->idjNext = emitCurIGjmpList; - emitCurIGjmpList = id; + // Load/Store single structure base register + fmt = IF_LS_2F; + } + break; -#if EMITTER_STATS - emitTotalIGjmps++; -#endif + case INS_ld1r: + case INS_ld2r: + case INS_ld3r: + case INS_ld4r: + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrSP(reg2)); - dispIns(id); - appendToCurIG(id); -} + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); -void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int imm) -{ - assert((ins == INS_tbz) || (ins == INS_tbnz)); + elemsize = optGetElemsize(opt); + registerListSize = insGetRegisterListSize(ins); + assert((elemsize * registerListSize) == imm); - assert(dst != nullptr); - assert(dst->HasFlag(BBF_HAS_LABEL)); - assert((EA_SIZE(attr) == EA_4BYTE) || (EA_SIZE(attr) == EA_8BYTE)); - assert(imm < ((EA_SIZE(attr) == EA_4BYTE) ? 32 : 64)); + // Load single structure and replicate post-indexed by an immediate + reg2 = encodingSPtoZR(reg2); + fmt = IF_LS_2E; + break; - insFormat fmt = IF_LARGEJMP; + default: + // fallback to emit SVE instructions. + return emitInsSve_R_R_I(ins, attr, reg1, reg2, imm, opt, sopt); - instrDescJmp* id = emitNewInstrJmp(); + } // end switch (ins) - id->idIns(ins); - id->idInsFmt(fmt); - id->idReg1(reg); - id->idjShort = false; - id->idSmallCns(imm); - id->idOpSize(EA_SIZE(attr)); + if (isLdSt) + { + assert(!isAddSub); - id->idAddr()->iiaBBlabel = dst; - id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); + if (isSIMD) + { + assert(isValidVectorLSDatasize(size)); + assert(isVectorRegister(reg1)); + assert((scale >= 0) && (scale <= 4)); + } + else + { + assert(isValidGeneralLSDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); + assert((scale >= 0) && (scale <= 3)); + } - /* Record the jump's IG and offset within it */ + assert(isGeneralRegisterOrSP(reg2)); - id->idjIG = emitCurIG; - id->idjOffs = emitCurIGsize; + // Load/Store reserved encodings: + if (insOptsIndexed(opt)) + { + assert(reg1 != reg2); + } - /* Append this jump to this IG's jump list */ + reg2 = encodingSPtoZR(reg2); - id->idjNext = emitCurIGjmpList; - emitCurIGjmpList = id; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0 || EA_IS_CNS_TLSGD_RELOC(attr)) + { + assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero -#if EMITTER_STATS - emitTotalIGjmps++; -#endif + fmt = IF_LS_2A; + } + else if (insOptsIndexed(opt) || unscaledOp || (imm < 0) || ((imm & mask) != 0)) + { + if (isValidSimm<9>(imm)) + { + fmt = IF_LS_2C; + } + else + { + assert(!"Instruction cannot be encoded: IF_LS_2C"); + } + } + else if (imm > 0) + { + assert(insOptsNone(opt)); + assert(!unscaledOp); - dispIns(id); - appendToCurIG(id); -} + if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st -void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) -{ - insFormat fmt = IF_NONE; + fmt = IF_LS_2B; + } + else + { + assert(!"Instruction cannot be encoded: IF_LS_2B"); + } + } - if (dst != nullptr) - { - assert(dst->HasFlag(BBF_HAS_LABEL)); + // Try to optimize a load/store with an alternative instruction. + if (isLdrStr && emitComp->opts.OptimizationEnabled() && + OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, false, -1, -1 DEBUG_ARG(false))) + { + return; + } } - else + else if (isAddSub) { - assert(instrCount != 0); - } + assert(!isLdSt); + assert(insOptsNone(opt)); - /* Figure out the encoding format of the instruction */ + if (setFlags) // Can't encode SP with setFlags + { + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + } + else + { + assert(isGeneralRegisterOrSP(reg1)); + assert(isGeneralRegisterOrSP(reg2)); - switch (ins) - { - case INS_bl_local: - case INS_b: - // Unconditional jump is a single form. - // Assume is long in case we cross hot/cold sections. - fmt = IF_BI_0A; - break; + // Is it just a mov? + if (imm == 0) + { + emitIns_Mov(INS_mov, attr, reg1, reg2, /* canSkip */ true); + return; + } - case INS_beq: - case INS_bne: - case INS_bhs: - case INS_blo: - case INS_bmi: - case INS_bpl: - case INS_bvs: - case INS_bvc: - case INS_bhi: - case INS_bls: - case INS_bge: - case INS_blt: - case INS_bgt: - case INS_ble: - // Assume conditional jump is long. - fmt = IF_LARGEJMP; - break; + reg1 = encodingSPtoZR(reg1); + reg2 = encodingSPtoZR(reg2); + } - default: - unreached(); - break; + if (unsigned_abs(imm) <= 0x0fff) + { + if (imm < 0) + { + ins = insReverse(ins); + imm = -imm; + } + assert(isValidUimm<12>(imm)); + fmt = IF_DI_2A; + } + else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding + { + // Encoding will use a 12-bit left shift of the immediate + opt = INS_OPTS_LSL12; + if (imm < 0) + { + ins = insReverse(ins); + imm = -imm; + } + assert((imm & 0xfff) == 0); + imm >>= 12; + assert(isValidUimm<12>(imm)); + fmt = IF_DI_2A; + } + else + { + assert(!"Instruction cannot be encoded: IF_DI_2A"); + } } - instrDescJmp* id = emitNewInstrJmp(); + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrSC(attr, imm); id->idIns(ins); id->idInsFmt(fmt); - id->idjShort = false; - -#ifdef DEBUG - // Mark the finally call - if (ins == INS_bl_local && emitComp->compCurBB->KindIs(BBJ_CALLFINALLY)) - { - id->idDebugOnlyInfo()->idFinallyCall = true; - } -#endif // DEBUG - - if (dst != nullptr) - { - id->idAddr()->iiaBBlabel = dst; + id->idInsOpt(opt); - // Skip unconditional jump that has a single form. - // The target needs to be relocated. - id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); + id->idReg1(reg1); + id->idReg2(reg2); -#ifdef DEBUG - if (emitComp->opts.compLongAddress) // Force long branches - { - id->idjKeepLong = true; - } -#endif // DEBUG - } - else + if (EA_IS_CNS_TLSGD_RELOC(attr)) { - id->idAddr()->iiaSetInstrCount(instrCount); - id->idjKeepLong = false; - /* This jump must be short */ - emitSetShortJump(id); - id->idSetIsBound(); + assert(imm != 0); + id->idSetTlsGD(); } - - /* Record the jump's IG and offset within it */ - - id->idjIG = emitCurIG; - id->idjOffs = emitCurIGsize; - - /* Append this jump to this IG's jump list */ - - id->idjNext = emitCurIGjmpList; - emitCurIGjmpList = id; - -#if EMITTER_STATS - emitTotalIGjmps++; -#endif - dispIns(id); appendToCurIG(id); } /***************************************************************************** * - * Add a call instruction (direct or indirect). - * argSize<0 means that the caller will pop the arguments - * - * The other arguments are interpreted depending on callType as shown: - * Unless otherwise specified, ireg,xreg,xmul,disp should have default values. - * - * EC_FUNC_TOKEN : addr is the method address - * EC_FUNC_ADDR : addr is the absolute address of the function - * - * If callType is one of these emitCallTypes, addr has to be NULL. - * EC_INDIR_R : "call ireg". - * - * For ARM xreg, xmul and disp are never used and should always be 0/REG_NA. - * - * Please consult the "debugger team notification" comment in genFnProlog(). + * Add an instruction referencing two registers and a floating point constant. */ -void emitter::emitIns_Call(EmitCallType callType, - CORINFO_METHOD_HANDLE methHnd, - INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE - void* addr, - ssize_t argSize, - emitAttr retSize, - emitAttr secondRetSize, - VARSET_VALARG_TP ptrVars, - regMaskTP gcrefRegs, - regMaskTP byrefRegs, - const DebugInfo& di /* = DebugInfo() */, - regNumber ireg /* = REG_NA */, - regNumber xreg /* = REG_NA */, - unsigned xmul /* = 0 */, - ssize_t disp /* = 0 */, - bool isJump /* = false */) +void emitter::emitIns_R_R_F( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt /* = INS_OPTS_NONE */) { - /* Sanity check the arguments depending on callType */ + // Currently, only SVE instructions use this format. + emitInsSve_R_R_F(ins, attr, reg1, reg2, immDbl, opt); +} - assert(callType < EC_COUNT); - assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA)); - assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT)); +/***************************************************************************** +* +* Add an instruction referencing two registers and a constant. +* Also checks for a large immediate that needs a second instruction +* and will load it in reg1 +* +* - Supports instructions: add, adds, sub, subs, and, ands, eor and orr +* - Requires that reg1 is a general register and not SP or ZR +* - Requires that reg1 != reg2 +*/ +void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm) +{ + assert(isGeneralRegister(reg1)); + assert(reg1 != reg2); - // ARM never uses these - assert(xreg == REG_NA && xmul == 0 && disp == 0); + bool immFits = true; - // Our stack level should be always greater than the bytes of arguments we push. Just - // a sanity test. - assert((unsigned)abs(argSize) <= codeGen->genStackLevel); + switch (ins) + { + case INS_add: + case INS_adds: + case INS_sub: + case INS_subs: + immFits = emitter::emitIns_valid_imm_for_add(imm, attr); + break; - // Trim out any callee-trashed registers from the live set. - regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); - gcrefRegs &= savedSet; - byrefRegs &= savedSet; + case INS_ands: + case INS_and: + case INS_eor: + case INS_orr: + immFits = emitter::emitIns_valid_imm_for_alu(imm, attr); + break; -#ifdef DEBUG - if (EMIT_GC_VERBOSE) - { - printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); - dumpConvertedVarSet(emitComp, ptrVars); - printf(", gcrefRegs="); - printRegMaskInt(gcrefRegs); - emitDispRegSet(gcrefRegs); - printf(", byrefRegs="); - printRegMaskInt(byrefRegs); - emitDispRegSet(byrefRegs); - printf("\n"); + default: + assert(!"Unsupported instruction in emitIns_R_R_Imm"); } -#endif - /* Managed RetVal: emit sequence point for the call */ - if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + if (immFits) { - codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + emitIns_R_R_I(ins, attr, reg1, reg2, imm); + } + else + { + // Load 'imm' into the reg1 register + // then issue: 'ins' reg1, reg2, reg1 + // + codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm); + emitIns_R_R_R(ins, attr, reg1, reg2, reg1); } +} - /* - We need to allocate the appropriate instruction descriptor based - on whether this is a direct/indirect call, and whether we need to - record an updated set of live GC variables. - */ - instrDesc* id; +/***************************************************************************** + * + * Add an instruction referencing three registers. + */ + +void emitter::emitIns_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + insOpts opt /* = INS_OPTS_NONE */, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) +{ + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_mul: + case INS_smull: + case INS_umull: + if (insOptsAnyArrangement(opt)) + { + // ASIMD instruction + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidArrangement(size, opt)); + assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved + fmt = IF_DV_3A; + break; + } + // Base instruction + FALLTHROUGH; + + case INS_lsl: + case INS_lsr: + case INS_asr: + case INS_ror: + case INS_adc: + case INS_adcs: + case INS_sbc: + case INS_sbcs: + case INS_udiv: + case INS_sdiv: + case INS_mneg: + case INS_smnegl: + case INS_smulh: + case INS_umnegl: + case INS_umulh: + case INS_lslv: + case INS_lsrv: + case INS_asrv: + case INS_rorv: + case INS_crc32b: + case INS_crc32h: + case INS_crc32w: + case INS_crc32x: + case INS_crc32cb: + case INS_crc32ch: + case INS_crc32cw: + case INS_crc32cx: + assert(insOptsNone(opt)); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + assert(isGeneralRegister(reg3)); + fmt = IF_DR_3A; + break; + + case INS_add: + case INS_sub: + if (isVectorRegister(reg1)) + { + // ASIMD instruction + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(opt != INS_OPTS_1D); // Reserved encoding + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + fmt = IF_DV_3A; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(size == EA_8BYTE); + fmt = IF_DV_3E; + } + break; + } + // Base instruction + FALLTHROUGH; - assert(argSize % REGSIZE_BYTES == 0); - int argCnt = (int)(argSize / (int)REGSIZE_BYTES); + case INS_adds: + case INS_subs: + emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, opt); + return; - if (callType == EC_INDIR_R) - { - /* Indirect call, virtual calls */ + case INS_cmeq: + case INS_cmge: + case INS_cmgt: + case INS_cmhi: + case INS_cmhs: + case INS_cmtst: + case INS_srshl: + case INS_sshl: + case INS_urshl: + case INS_ushl: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); - id = emitNewInstrCallInd(argCnt, 0 /* disp */, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); - } - else - { - /* Helper/static/nonvirtual/function calls (direct or through handle), - and calls to an absolute addr. */ + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved + fmt = IF_DV_3A; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(size == EA_8BYTE); // Only Int64/UInt64 supported + fmt = IF_DV_3E; + } + break; - assert(callType == EC_FUNC_TOKEN); + case INS_sqadd: + case INS_sqrshl: + case INS_sqshl: + case INS_sqsub: + case INS_uqadd: + case INS_uqrshl: + case INS_uqshl: + case INS_uqsub: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); - id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); - } + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved + fmt = IF_DV_3A; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(isValidVectorElemsize(size)); + fmt = IF_DV_3E; + } + break; - /* Update the emitter's live GC ref sets */ + case INS_fcmeq: + case INS_fcmge: + case INS_fcmgt: + case INS_frecps: + case INS_frsqrts: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); - VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); - emitThisGCrefRegs = gcrefRegs; - emitThisByrefRegs = byrefRegs; + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert((elemsize == EA_8BYTE) || (elemsize == EA_4BYTE)); // Only Double/Float supported + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_3B; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert((size == EA_8BYTE) || (size == EA_4BYTE)); // Only Double/Float supported + fmt = IF_DV_3D; + } + break; - id->idSetIsNoGC(emitNoGChelper(methHnd)); + case INS_mla: + case INS_mls: + case INS_saba: + case INS_sabd: + case INS_shadd: + case INS_shsub: + case INS_smax: + case INS_smaxp: + case INS_smin: + case INS_sminp: + case INS_srhadd: + case INS_uaba: + case INS_uabd: + case INS_uhadd: + case INS_uhsub: + case INS_umax: + case INS_umaxp: + case INS_umin: + case INS_uminp: + case INS_urhadd: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidArrangement(size, opt)); + assert((opt != INS_OPTS_1D) && (opt != INS_OPTS_2D)); // The encoding size = 11, Q = x is reserved + fmt = IF_DV_3A; + break; - /* Set the instruction - special case jumping a function */ - instruction ins; - insFormat fmt = IF_NONE; + case INS_addp: + case INS_uzp1: + case INS_uzp2: + case INS_zip1: + case INS_zip2: + case INS_trn1: + case INS_trn2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = 0 is reserved + fmt = IF_DV_3A; + break; - /* Record the address: method, indirection, or funcptr */ + case INS_mov: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(reg2 == reg3); + assert(isValidVectorDatasize(size)); + // INS_mov is an alias for INS_orr (vector register) + if (opt == INS_OPTS_NONE) + { + elemsize = EA_1BYTE; + opt = optMakeArrangement(size, elemsize); + } + assert(isValidArrangement(size, opt)); + fmt = IF_DV_3C; + break; - if (callType == EC_INDIR_R) - { - /* This is an indirect call (either a virtual call or func ptr call) */ + case INS_and: + case INS_bic: + case INS_eor: + case INS_orr: + case INS_orn: + case INS_tbl: + case INS_tbl_2regs: + case INS_tbl_3regs: + case INS_tbl_4regs: + case INS_tbx: + case INS_tbx_2regs: + case INS_tbx_3regs: + case INS_tbx_4regs: + if (isVectorRegister(reg1)) + { + assert(isValidVectorDatasize(size)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (opt == INS_OPTS_NONE) + { + elemsize = EA_1BYTE; + opt = optMakeArrangement(size, elemsize); + } + assert(isValidArrangement(size, opt)); + fmt = IF_DV_3C; + break; + } + FALLTHROUGH; - if (isJump) - { - ins = INS_br_tail; // INS_br_tail Reg - } - else - { - ins = INS_blr; // INS_blr Reg - } - fmt = IF_BR_1B; + case INS_ands: + case INS_bics: + case INS_eon: + emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE); + return; - id->idIns(ins); - id->idInsFmt(fmt); + case INS_bsl: + case INS_bit: + case INS_bif: + assert(isValidVectorDatasize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (opt == INS_OPTS_NONE) + { + elemsize = EA_1BYTE; + opt = optMakeArrangement(size, elemsize); + } + assert(isValidArrangement(size, opt)); + fmt = IF_DV_3C; + break; - assert(xreg == REG_NA); - if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && EA_IS_CNS_TLSGD_RELOC(retSize)) - { - // For NativeAOT linux/arm64, we need to also record the relocation of methHnd. - // Since we do not have space to embed it in instrDesc, we store the register in - // reg1 and instead use the `iiaAdd` to store the method handle. Likewise, during - // emitOutputInstr, we retrieve the register from reg1 for this specific case. - id->idSetTlsGD(); - id->idReg1(ireg); - id->idAddr()->iiaAddr = (BYTE*)methHnd; - } - else - { - id->idReg3(ireg); - } - } - else - { - /* This is a simple direct call: "call helper/method/addr" */ + case INS_fadd: + case INS_fsub: + case INS_fdiv: + case INS_fmax: + case INS_fmaxnm: + case INS_fmin: + case INS_fminnm: + case INS_fabd: + case INS_fmul: + case INS_fmulx: + case INS_facge: + case INS_facgt: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsizeFloat(elemsize)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_3B; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(isValidScalarDatasize(size)); + fmt = IF_DV_3D; + } + break; - assert(callType == EC_FUNC_TOKEN); + case INS_fnmul: + // Scalar operation + assert(insOptsNone(opt)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidScalarDatasize(size)); + fmt = IF_DV_3D; + break; - assert(addr != NULL); + case INS_faddp: + case INS_fmaxnmp: + case INS_fmaxp: + case INS_fminnmp: + case INS_fminp: - if (isJump) - { - ins = INS_b_tail; // INS_b_tail imm28 - } - else - { - ins = INS_bl; // INS_bl imm28 - } - fmt = IF_BI_0C; + case INS_fmla: + case INS_fmls: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsAnyArrangement(opt)); // no scalar encoding, use 4-operand 'fmadd' or 'fmsub' - id->idIns(ins); - id->idInsFmt(fmt); + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsizeFloat(elemsize)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_3B; + break; - id->idAddr()->iiaAddr = (BYTE*)addr; + case INS_ldr: + case INS_ldrb: + case INS_ldrh: + case INS_ldrsb: + case INS_ldrsh: + case INS_ldrsw: + case INS_str: + case INS_strb: + case INS_strh: + emitIns_R_R_R_Ext(ins, attr, reg1, reg2, reg3, opt); + return; - if (emitComp->opts.compReloc) - { - id->idSetIsDspReloc(); - } - } + case INS_ldp: + case INS_ldpsw: + case INS_ldnp: + case INS_stp: + case INS_stnp: + emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0); + return; -#ifdef DEBUG - if (EMIT_GC_VERBOSE) - { - if (id->idIsLargeCall()) - { - printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, - VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars)); - } - } -#endif + case INS_stxr: + case INS_stxrb: + case INS_stxrh: + case INS_stlxr: + case INS_stlxrb: + case INS_stlxrh: + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(isGeneralRegisterOrSP(reg3)); + fmt = IF_LS_3D; + break; - if (m_debugInfoSize > 0) - { - INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo); - id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token - } + case INS_casb: + case INS_casab: + case INS_casalb: + case INS_caslb: + case INS_cash: + case INS_casah: + case INS_casalh: + case INS_caslh: + case INS_cas: + case INS_casa: + case INS_casal: + case INS_casl: + case INS_ldaddb: + case INS_ldaddab: + case INS_ldaddalb: + case INS_ldaddlb: + case INS_ldaddh: + case INS_ldaddah: + case INS_ldaddalh: + case INS_ldaddlh: + case INS_ldadd: + case INS_ldadda: + case INS_ldaddal: + case INS_ldaddl: + case INS_ldclral: + case INS_ldsetal: + case INS_swpb: + case INS_swpab: + case INS_swpalb: + case INS_swplb: + case INS_swph: + case INS_swpah: + case INS_swpalh: + case INS_swplh: + case INS_swp: + case INS_swpa: + case INS_swpal: + case INS_swpl: + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(isGeneralRegisterOrSP(reg3)); + fmt = IF_LS_3E; + break; -#ifdef LATE_DISASM - if (addr != nullptr) - { - codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); - } -#endif // LATE_DISASM + case INS_sha256h: + case INS_sha256h2: + case INS_sha256su1: + case INS_sha1su0: + case INS_sha1c: + case INS_sha1p: + case INS_sha1m: + assert(isValidVectorDatasize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (opt == INS_OPTS_NONE) + { + elemsize = EA_4BYTE; + opt = optMakeArrangement(size, elemsize); + } + assert(isValidArrangement(size, opt)); + fmt = IF_DV_3F; + break; - dispIns(id); - appendToCurIG(id); - emitLastMemBarrier = nullptr; // Cannot optimize away future memory barriers -} + case INS_ld2: + case INS_ld3: + case INS_ld4: + case INS_st2: + case INS_st3: + case INS_st4: + assert(opt != INS_OPTS_1D); // .1D format only permitted with LD1 & ST1 + FALLTHROUGH; -/***************************************************************************** - * - * Returns true if 'imm' is valid Cond encoding - */ + case INS_ld1: + case INS_ld1_2regs: + case INS_ld1_3regs: + case INS_ld1_4regs: + case INS_st1: + case INS_st1_2regs: + case INS_st1_3regs: + case INS_st1_4regs: + case INS_ld1r: + case INS_ld2r: + case INS_ld3r: + case INS_ld4r: + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrSP(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidArrangement(size, opt)); -/*static*/ bool emitter::isValidImmCond(ssize_t imm) -{ - // range check the ssize_t value, to make sure it is a small unsigned value - // and that only the bits in the cfi.cond are set - if ((imm < 0) || (imm > 0xF)) - return false; + // Load/Store multiple structures post-indexed by a register + // Load single structure and replicate post-indexed by a register + reg2 = encodingSPtoZR(reg2); + fmt = IF_LS_3F; + break; - condFlagsImm cfi; - cfi.immCFVal = (unsigned)imm; + case INS_addhn: + case INS_raddhn: + case INS_rsubhn: + case INS_subhn: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_8BYTE); + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_1D); // The encoding size = 11, Q = x is reserved. + fmt = IF_DV_3A; + break; - return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV). -} + case INS_addhn2: + case INS_raddhn2: + case INS_rsubhn2: + case INS_subhn2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_16BYTE); + assert(isValidArrangement(size, opt)); + assert(opt != INS_OPTS_2D); // The encoding size = 11, Q = x is reserved. + fmt = IF_DV_3A; + break; -/***************************************************************************** - * - * Returns true if 'imm' is valid Cond/Flags encoding - */ + case INS_sabal: + case INS_sabdl: + case INS_saddl: + case INS_saddw: + case INS_smlal: + case INS_smlsl: + case INS_ssubl: + case INS_ssubw: + case INS_uabal: + case INS_uabdl: + case INS_uaddl: + case INS_uaddw: + case INS_umlal: + case INS_umlsl: + case INS_usubl: + case INS_usubw: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_8BYTE); + assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); + fmt = IF_DV_3A; + break; -/*static*/ bool emitter::isValidImmCondFlags(ssize_t imm) -{ - // range check the ssize_t value, to make sure it is a small unsigned value - // and that only the bits in the cfi.cond or cfi.flags are set - if ((imm < 0) || (imm > 0xFF)) - return false; + case INS_sabal2: + case INS_sabdl2: + case INS_saddl2: + case INS_saddw2: + case INS_smlal2: + case INS_smlsl2: + case INS_ssubl2: + case INS_ssubw2: + case INS_umlal2: + case INS_umlsl2: + case INS_smull2: + case INS_uabal2: + case INS_uabdl2: + case INS_uaddl2: + case INS_uaddw2: + case INS_usubl2: + case INS_umull2: + case INS_usubw2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_16BYTE); + assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S)); + fmt = IF_DV_3A; + break; - condFlagsImm cfi; - cfi.immCFVal = (unsigned)imm; + case INS_sqdmlal: + case INS_sqdmlsl: + case INS_sqdmull: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(size == EA_8BYTE); + assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); + fmt = IF_DV_3A; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert((size == EA_2BYTE) || (size == EA_4BYTE)); + fmt = IF_DV_3E; + } + break; - return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV). -} + case INS_sqdmulh: + case INS_sqrdmlah: + case INS_sqrdmlsh: + case INS_sqrdmulh: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidVectorDatasize(size)); + elemsize = optGetElemsize(opt); + assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); + fmt = IF_DV_3A; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert((size == EA_2BYTE) || (size == EA_4BYTE)); + fmt = IF_DV_3E; + } + break; -/***************************************************************************** - * - * Returns true if 'imm' is valid Cond/Flags/Imm5 encoding - */ + case INS_sqdmlal2: + case INS_sqdmlsl2: + case INS_sqdmull2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_16BYTE); + assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S)); + fmt = IF_DV_3A; + break; -/*static*/ bool emitter::isValidImmCondFlagsImm5(ssize_t imm) -{ - // range check the ssize_t value, to make sure it is a small unsigned value - // and that only the bits in the cfi.cond, cfi.flags or cfi.imm5 are set - if ((imm < 0) || (imm > 0x1FFF)) - return false; + case INS_pmul: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidArrangement(size, opt)); + assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B)); + fmt = IF_DV_3A; + break; - condFlagsImm cfi; - cfi.immCFVal = (unsigned)imm; + case INS_pmull: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_8BYTE); + assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_1D)); + fmt = IF_DV_3A; + break; - return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV). -} + case INS_pmull2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_16BYTE); + assert((opt == INS_OPTS_16B) || (opt == INS_OPTS_2D)); + fmt = IF_DV_3A; + break; -/***************************************************************************** - * - * Return an encoding for the specified 'V' register used in '9' thru '6' position with the times two encoding. - * This encoding requires that the register number be divisible by two. - */ + case INS_sdot: + case INS_udot: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_4S))); + fmt = IF_DV_3A; + break; -/*static*/ emitter::code_t emitter::insEncodeReg_V_9_to_6_Times_Two(regNumber reg) -{ - assert(isVectorRegister(reg)); - emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0; - assert(ureg % 2 == 0); - ureg /= 2u; - assert((ureg >= 0) && (ureg <= 31)); - return ureg << 6; -} + default: + // fallback to emit SVE instructions. + return emitInsSve_R_R_R(ins, attr, reg1, reg2, reg3, opt, sopt); -/***************************************************************************** - * - * Returns an encoding for the specified condition code. - */ + } // end switch (ins) -/*static*/ emitter::code_t emitter::insEncodeCond(insCond cond) -{ - emitter::code_t uimm = (emitter::code_t)cond; - return uimm << 12; -} + assert(fmt != IF_NONE); -/***************************************************************************** - * - * Returns an encoding for the condition code with the lowest bit inverted (marked by invert() in the - * architecture manual). - */ + instrDesc* id = emitNewInstr(attr); -/*static*/ emitter::code_t emitter::insEncodeInvertedCond(insCond cond) -{ - emitter::code_t uimm = (emitter::code_t)cond; - uimm ^= 1; // invert the lowest bit - return uimm << 12; -} + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); -/***************************************************************************** - * - * Returns an encoding for the specified flags. - */ + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); -/*static*/ emitter::code_t emitter::insEncodeFlags(insCflags flags) -{ - emitter::code_t uimm = (emitter::code_t)flags; - return uimm; + dispIns(id); + appendToCurIG(id); } -/***************************************************************************** - * - * Returns the encoding for the Shift Count bits to be used for Arm64 encodings - */ - -/*static*/ emitter::code_t emitter::insEncodeShiftCount(ssize_t imm, emitAttr size) +//----------------------------------------------------------------------------------- +// emitIns_R_R_R_I_LdStPair: Add an instruction storing 2 registers into a memory +// (pointed by reg3) and the offset (immediate). +// +// Arguments: +// ins - The instruction code +// attr - The emit attribute for register 1 +// attr2 - The emit attribute for register 2 +// reg1 - Register 1 +// reg2 - Register 2 +// reg3 - Register 3 +// imm - Immediate offset, prior to scaling by operand size +// varx1 - LclVar number 1 +// varx2 - LclVar number 2 +// offs1 - Memory offset of lclvar number 1 +// offs2 - Memory offset of lclvar number 2 +// +void emitter::emitIns_R_R_R_I_LdStPair(instruction ins, + emitAttr attr, + emitAttr attr2, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + int varx1, + int varx2, + int offs1, + int offs2 DEBUG_ARG(unsigned var1RefsOffs) DEBUG_ARG(unsigned var2RefsOffs)) { - assert((imm & 0x003F) == imm); - assert(((imm & 0x0020) == 0) || (size == EA_8BYTE)); + assert((ins == INS_stp) || (ins == INS_ldp)); + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + unsigned scale = 0; - return (emitter::code_t)imm << 10; -} + // Is the target a vector register? + if (isVectorRegister(reg1)) + { + assert(isValidVectorLSPDatasize(size)); + assert(isVectorRegister(reg2)); -/***************************************************************************** - * - * Returns the encoding to select a 64-bit datasize for an Arm64 instruction - */ + scale = NaturalScale_helper(size); + assert((scale >= 2) && (scale <= 4)); + } + else + { + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegisterOrZR(reg2)); + scale = (size == EA_8BYTE) ? 3 : 2; + } -/*static*/ emitter::code_t emitter::insEncodeDatasize(emitAttr size) -{ - if (size == EA_8BYTE) + reg3 = encodingSPtoZR(reg3); + + fmt = IF_LS_3C; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) { - return 0x80000000; // set the bit at location 31 + fmt = IF_LS_3B; } else { - assert(size == EA_4BYTE); - return 0; + if ((imm & mask) == 0) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + // Unlike emitIns_S_S_R_R(), we would never come here when + // (imm & mask) != 0. + unreached(); + } } -} -/***************************************************************************** - * - * Returns the encoding to select the datasize for the general load/store Arm64 instructions - * - */ + bool validVar1 = varx1 != -1; + bool validVar2 = varx2 != -1; -/*static*/ emitter::code_t emitter::insEncodeDatasizeLS(emitter::code_t code, emitAttr size) -{ - bool exclusive = ((code & 0x35000000) == 0); - bool atomic = ((code & 0x31200C00) == 0x30200000); + instrDesc* id; - if ((code & 0x00800000) && !exclusive && !atomic) // Is this a sign-extending opcode? (i.e. ldrsw, ldrsh, ldrsb) + if (validVar1 && validVar2) { - if ((code & 0x80000000) == 0) // Is it a ldrsh or ldrsb and not ldrsw ? - { - if (EA_SIZE(size) != EA_8BYTE) // Do we need to encode the 32-bit Rt size bit? - { - return 0x00400000; // set the bit at location 22 - } - } + id = emitNewInstrLclVarPair(attr, imm); + id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1); + id->idSetIsLclVar(); + + emitGetLclVarPairLclVar2(id)->initLclVarAddr(varx2, offs2); } - else if (code & 0x80000000) // Is this a ldr/str/ldur/stur opcode? + else { - if (EA_SIZE(size) == EA_8BYTE) // Do we need to encode the 64-bit size bit? + id = emitNewInstrCns(attr, imm); + if (validVar1) { - return 0x40000000; // set the bit at location 30 + id->idAddr()->iiaLclVar.initLclVarAddr(varx1, offs1); + id->idSetIsLclVar(); + } + if (validVar2) + { + id->idAddr()->iiaLclVar.initLclVarAddr(varx2, offs2); + id->idSetIsLclVar(); } } - return 0; + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + + // Record the attribute for the second register in the pair + if (EA_IS_GCREF(attr2)) + { + id->idGCrefReg2(GCT_GCREF); + } + else if (EA_IS_BYREF(attr2)) + { + id->idGCrefReg2(GCT_BYREF); + } + else + { + id->idGCrefReg2(GCT_NONE); + } + +#ifdef DEBUG + id->idDebugOnlyInfo()->idVarRefOffs = var1RefsOffs; + id->idDebugOnlyInfo()->idVarRefOffs2 = var2RefsOffs; +#endif + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to select the datasize for the vector load/store Arm64 instructions - * + * Add an instruction referencing three registers and a constant. */ -/*static*/ emitter::code_t emitter::insEncodeDatasizeVLS(emitter::code_t code, emitAttr size) +void emitter::emitIns_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */, + emitAttr attrReg2 /* = EA_UNKNOWN */, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { - code_t result = 0; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool isLdSt = false; + bool isSIMD = false; + bool isAddSub = false; + bool setFlags = false; + unsigned scale = 0; - // Check bit 29 - if ((code & 0x20000000) == 0) + /* Figure out the encoding format of the instruction */ + switch (ins) { - // LDR literal + case INS_extr: + assert(insOptsNone(opt)); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidImmShift(imm, size)); + fmt = IF_DR_3E; + break; - if (size == EA_16BYTE) - { - // set the operation size in bit 31 - result = 0x80000000; - } - else if (size == EA_8BYTE) - { - // set the operation size in bit 30 - result = 0x40000000; - } - else - { - assert(size == EA_4BYTE); - // no bits are set - result = 0x00000000; - } - } - else - { - // LDR non-literal + case INS_and: + case INS_ands: + case INS_eor: + case INS_orr: + case INS_bic: + case INS_bics: + case INS_eon: + case INS_orn: + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isValidImmShift(imm, size)); + if (imm == 0) + { + assert(insOptsNone(opt)); // a zero imm, means no shift kind + fmt = IF_DR_3A; + } + else + { + assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind + fmt = IF_DR_3B; + } + break; - if (size == EA_16BYTE) - { - // The operation size in bits 31 and 30 are zero - // Bit 23 specifies a 128-bit Load/Store - result = 0x00800000; - } - else if (size == EA_8BYTE) - { - // set the operation size in bits 31 and 30 - result = 0xC0000000; - } - else if (size == EA_4BYTE) - { - // set the operation size in bit 31 - result = 0x80000000; - } - else if (size == EA_2BYTE) - { - // set the operation size in bit 30 - result = 0x40000000; - } - else - { - assert(size == EA_1BYTE); - // The operation size in bits 31 and 30 are zero - result = 0x00000000; - } - } + case INS_fmul: // by element, imm[0..3] selects the element of reg3 + case INS_fmla: + case INS_fmls: + case INS_fmulx: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorElemsizeFloat(elemsize)); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + assert(opt != INS_OPTS_1D); // Reserved encoding + fmt = IF_DV_3BI; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert(isValidScalarDatasize(size)); + elemsize = size; + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + fmt = IF_DV_3DI; + } + break; + + case INS_mul: // by element, imm[0..7] selects the element of reg3 + case INS_mla: + case INS_mls: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + // Vector operation + assert(insOptsAnyArrangement(opt)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + elemsize = optGetElemsize(opt); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + // Only has encodings for H or S elemsize + assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); + // Only has encodings for V0..V15 + if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) + { + noway_assert(!"Invalid reg3"); + } + fmt = IF_DV_3AI; + break; - // Or in bit 26 to indicate a Vector register is used as 'target' - result |= 0x04000000; + case INS_add: + case INS_sub: + setFlags = false; + isAddSub = true; + break; - return result; -} + case INS_adds: + case INS_subs: + setFlags = true; + isAddSub = true; + break; -/***************************************************************************** - * - * Returns the encoding to select the datasize for the vector load/store Arm64 instructions - * - */ + case INS_ldpsw: + scale = 2; + isLdSt = true; + break; -/*static*/ emitter::code_t emitter::insEncodeDatasizeVPLS(emitter::code_t code, emitAttr size) -{ - code_t result = 0; + case INS_ldnp: + case INS_stnp: + assert(insOptsNone(opt)); // Can't use Pre/Post index on these two instructions + FALLTHROUGH; - if (size == EA_16BYTE) - { - // The operation size in bits 31 and 30 are zero - // Bit 23 specifies a 128-bit Load/Store - result = 0x80000000; - } - else if (size == EA_8BYTE) - { - // set the operation size in bits 31 and 30 - result = 0x40000000; - } - else if (size == EA_4BYTE) - { - // set the operation size in bit 31 - result = 0x00000000; - } + case INS_ldp: + case INS_stp: + // Is the target a vector register? + if (isVectorRegister(reg1)) + { + scale = NaturalScale_helper(size); + isSIMD = true; + } + else + { + scale = (size == EA_8BYTE) ? 3 : 2; + } + isLdSt = true; + fmt = IF_LS_3C; + break; - // Or in bit 26 to indicate a Vector register is used as 'target' - result |= 0x04000000; + case INS_ld1: + case INS_ld2: + case INS_ld3: + case INS_ld4: + case INS_st1: + case INS_st2: + case INS_st3: + case INS_st4: + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrSP(reg2)); + assert(isGeneralRegister(reg3)); - return result; -} + assert(insOptsPostIndex(opt)); -/***************************************************************************** - * - * Returns the encoding to set the size bit and the N bits for a 'bitfield' instruction - * - */ + elemsize = size; + assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); -/*static*/ emitter::code_t emitter::insEncodeDatasizeBF(emitter::code_t code, emitAttr size) -{ - // is bit 30 equal to 0? - if ((code & 0x40000000) == 0) // is the opcode one of extr, sxtb, sxth or sxtw - { - if (size == EA_8BYTE) // Do we need to set the sf and N bits? - { - return 0x80400000; // set the sf-bit at location 31 and the N-bit at location 22 - } - } - return 0; // don't set any bits -} + // Load/Store single structure post-indexed by a register + reg2 = encodingSPtoZR(reg2); + fmt = IF_LS_3G; + break; -/***************************************************************************** - * - * Returns the encoding to select the 64/128-bit datasize for an Arm64 vector instruction - */ + case INS_ext: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isValidVectorDatasize(size)); + assert(isValidArrangement(size, opt)); + assert((opt == INS_OPTS_8B) || (opt == INS_OPTS_16B)); + assert(isValidVectorIndex(size, EA_1BYTE, imm)); + fmt = IF_DV_3G; + break; -/*static*/ emitter::code_t emitter::insEncodeVectorsize(emitAttr size) -{ - if (size == EA_16BYTE) - { - return 0x40000000; // set the bit at location 30 - } - else - { - assert(size == EA_8BYTE); - return 0; - } -} + case INS_smlal: + case INS_smlsl: + case INS_smull: + case INS_umlal: + case INS_umlsl: + case INS_umull: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_8BYTE); + assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); + elemsize = optGetElemsize(opt); + // Restricted to V0-V15 when element size is H. + if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) + { + assert(!"Invalid reg3"); + } + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + fmt = IF_DV_3AI; + break; -/***************************************************************************** - * - * Returns the encoding to select 'index' for an Arm64 vector elem instruction - */ -/*static*/ emitter::code_t emitter::insEncodeVectorIndex(emitAttr elemsize, ssize_t index) -{ - code_t bits = (code_t)index; - if (elemsize == EA_1BYTE) - { - bits <<= 1; - bits |= 1; - } - else if (elemsize == EA_2BYTE) - { - bits <<= 2; - bits |= 2; - } - else if (elemsize == EA_4BYTE) - { - bits <<= 3; - bits |= 4; - } - else - { - assert(elemsize == EA_8BYTE); - bits <<= 4; - bits |= 8; - } - assert((bits >= 1) && (bits <= 0x1f)); + case INS_sqdmlal: + case INS_sqdmlsl: + case INS_sqdmull: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(size == EA_8BYTE); + assert((opt == INS_OPTS_4H) || (opt == INS_OPTS_2S)); + elemsize = optGetElemsize(opt); + fmt = IF_DV_3AI; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert((size == EA_2BYTE) || (size == EA_4BYTE)); + elemsize = size; + fmt = IF_DV_3EI; + } + // Restricted to V0-V15 when element size is H. + if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) + { + assert(!"Invalid reg3"); + } + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + break; + + case INS_sqdmulh: + case INS_sqrdmlah: + case INS_sqrdmlsh: + case INS_sqrdmulh: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsAnyArrangement(opt)) + { + // Vector operation + assert(isValidVectorDatasize(size)); + elemsize = optGetElemsize(opt); + assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); + fmt = IF_DV_3AI; + } + else + { + // Scalar operation + assert(insOptsNone(opt)); + assert((size == EA_2BYTE) || (size == EA_4BYTE)); + elemsize = size; + fmt = IF_DV_3EI; + } + // Restricted to V0-V15 when element size is H. + if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) + { + assert(!"Invalid reg3"); + } + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + break; - return (bits << 16); // bits at locations [20,19,18,17,16] -} + case INS_smlal2: + case INS_smlsl2: + case INS_smull2: + case INS_sqdmlal2: + case INS_sqdmlsl2: + case INS_sqdmull2: + case INS_umlal2: + case INS_umlsl2: + case INS_umull2: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(size == EA_16BYTE); + assert((opt == INS_OPTS_8H) || (opt == INS_OPTS_4S)); + elemsize = optGetElemsize(opt); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + // Restricted to V0-V15 when element size is H + if ((elemsize == EA_2BYTE) && ((genRegMask(reg3) & RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS) == 0)) + { + assert(!"Invalid reg3"); + } + fmt = IF_DV_3AI; + break; -/***************************************************************************** - * - * Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction - */ -/*static*/ emitter::code_t emitter::insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2) -{ - code_t bits = (code_t)index2; - if (elemsize == EA_1BYTE) - { - // bits are correct - } - else if (elemsize == EA_2BYTE) - { - bits <<= 1; - } - else if (elemsize == EA_4BYTE) - { - bits <<= 2; - } - else - { - assert(elemsize == EA_8BYTE); - bits <<= 3; - } - assert((bits >= 0) && (bits <= 0xf)); + case INS_sdot: + case INS_udot: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(((size == EA_8BYTE) && (opt == INS_OPTS_2S)) || ((size == EA_16BYTE) && (opt == INS_OPTS_4S))); + assert(isValidVectorIndex(EA_16BYTE, EA_4BYTE, imm)); + fmt = IF_DV_3AI; + break; - return (bits << 11); // bits at locations [14,13,12,11] -} + default: + // fallback to emit SVE instructions. + return emitInsSve_R_R_R_I(ins, attr, reg1, reg2, reg3, imm, opt, sopt); -/***************************************************************************** - * - * Returns the encoding to select the 'index' for an Arm64 'mul' by element instruction - */ -/*static*/ emitter::code_t emitter::insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index) -{ - code_t bits = 0; + } // end switch (ins) - if (elemsize == EA_2BYTE) + assert(insScalableOptsNone(sopt)); + + if (isLdSt) { - assert((index >= 0) && (index <= 7)); - if (index & 0x4) + assert(!isAddSub); + assert(isGeneralRegisterOrSP(reg3)); + assert(insOptsNone(opt) || insOptsIndexed(opt)); + + if (isSIMD) { - bits |= (1 << 11); // set bit 11 'H' + assert(isValidVectorLSPDatasize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert((scale >= 2) && (scale <= 4)); } - if (index & 0x2) + else { - bits |= (1 << 21); // set bit 21 'L' + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert((scale == 2) || (scale == 3)); } - if (index & 0x1) + + // Load/Store Pair reserved encodings: + if (emitInsIsLoad(ins)) { - bits |= (1 << 20); // set bit 20 'M' + assert(reg1 != reg2); } - } - else if (elemsize == EA_4BYTE) - { - assert((index >= 0) && (index <= 3)); - if (index & 0x2) + if (insOptsIndexed(opt)) { - bits |= (1 << 11); // set bit 11 'H' + assert(reg1 != reg3); + assert(reg2 != reg3); } - if (index & 0x1) + + reg3 = encodingSPtoZR(reg3); + + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) { - bits |= (1 << 21); // set bit 21 'L' - } - } - else - { - assert(!"Invalid 'elemsize' value"); - } + assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero - return bits; -} + fmt = IF_LS_3B; + } + else + { + if ((imm & mask) == 0) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st -/***************************************************************************** - * - * Returns the encoding for a shift instruction, ready for insertion into an instruction. - */ -/*static*/ emitter::code_t emitter::insEncodeShiftImmediate(emitAttr size, bool isRightShift, ssize_t shiftAmount) -{ - if (isRightShift) - { - // The right shift amount must be in the range 1 to the destination element width in bits. - assert((shiftAmount > 0) && (shiftAmount <= getBitWidth(size))); - return (code_t)(2 * getBitWidth(size) - shiftAmount); + if ((imm >= -64) && (imm <= 63)) + { + fmt = IF_LS_3C; + } + } +#ifdef DEBUG + if (fmt != IF_LS_3C) + { + assert(!"Instruction cannot be encoded: IF_LS_3C"); + } +#endif + } } - else + else if (isAddSub) { - // The left shift amount must in the range 0 to the element width in bits minus 1. - assert(shiftAmount < getBitWidth(size)); - return (code_t)(getBitWidth(size) + shiftAmount); - } -} + bool reg2IsSP = (reg2 == REG_SP); + assert(!isLdSt); + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg3)); -/***************************************************************************** - * - * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction - */ + if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option + { + assert(isGeneralRegisterOrZR(reg1)); + } + else + { + assert(isGeneralRegisterOrSP(reg1)); + reg1 = encodingSPtoZR(reg1); + } -/*static*/ emitter::code_t emitter::insEncodeElemsize(emitAttr size) -{ - if (size == EA_8BYTE) - { - return 0x00C00000; // set the bit at location 23 and 22 - } - else if (size == EA_4BYTE) - { - return 0x00800000; // set the bit at location 23 - } - else if (size == EA_2BYTE) - { - return 0x00400000; // set the bit at location 22 - } - assert(size == EA_1BYTE); - return 0x00000000; -} + if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option + { + assert(isGeneralRegister(reg2)); + } + else + { + assert(isGeneralRegisterOrSP(reg2)); + reg2 = encodingSPtoZR(reg2); + } -/***************************************************************************** - * - * Returns the encoding to select the 4/8 byte elemsize for an Arm64 float vector instruction - */ + if (insOptsAnyExtend(opt)) + { + assert((imm >= 0) && (imm <= 4)); -/*static*/ emitter::code_t emitter::insEncodeFloatElemsize(emitAttr size) -{ - if (size == EA_8BYTE) - { - return 0x00400000; // set the bit at location 22 - } - assert(size == EA_4BYTE); - return 0x00000000; -} + fmt = IF_DR_3C; + } + else if (insOptsAluShift(opt)) + { + // imm should be non-zero and in [1..63] + assert(isValidImmShift(imm, size) && (imm != 0)); + fmt = IF_DR_3B; + } + else if (imm == 0) + { + assert(insOptsNone(opt)); -// Returns the encoding to select the index for an Arm64 float vector by element instruction -/*static*/ emitter::code_t emitter::insEncodeFloatIndex(emitAttr elemsize, ssize_t index) -{ - code_t result = 0x00000000; - if (elemsize == EA_8BYTE) - { - assert((index >= 0) && (index <= 1)); - if (index == 1) + if (reg2IsSP) + { + // To encode the SP register as reg2 we must use the IF_DR_3C encoding + // and also specify a LSL of zero (imm == 0) + opt = INS_OPTS_LSL; + fmt = IF_DR_3C; + } + else + { + fmt = IF_DR_3A; + } + } + else { - result |= 0x00000800; // 'H' - set the bit at location 11 + assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A"); } } - else + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + + // Record the attribute for the second register in the pair + id->idGCrefReg2(GCT_NONE); + if (attrReg2 != EA_UNKNOWN) { - assert(elemsize == EA_4BYTE); - assert((index >= 0) && (index <= 3)); - if (index & 2) + // Record the attribute for the second register in the pair + assert((fmt == IF_LS_3B) || (fmt == IF_LS_3C)); + if (EA_IS_GCREF(attrReg2)) { - result |= 0x00000800; // 'H' - set the bit at location 11 + id->idGCrefReg2(GCT_GCREF); } - if (index & 1) + else if (EA_IS_BYREF(attrReg2)) { - result |= 0x00200000; // 'L' - set the bit at location 21 + id->idGCrefReg2(GCT_BYREF); } } - return result; + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to select the vector elemsize for an Arm64 ld/st# vector instruction + * Add an instruction referencing three registers and two constants. */ -/*static*/ emitter::code_t emitter::insEncodeVLSElemsize(emitAttr size) +void emitter::emitIns_R_R_R_I_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + ssize_t imm1, + ssize_t imm2, + insOpts opt) { - code_t result = 0x00000000; + // Currently, only SVE instructions use this format. + emitInsSve_R_R_R_I_I(ins, attr, reg1, reg2, reg3, imm1, imm2, opt); +} - switch (size) +/***************************************************************************** + * + * Add an instruction referencing three registers, with an extend option + */ + +void emitter::emitIns_R_R_R_Ext(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + insOpts opt, /* = INS_OPTS_NONE */ + int shiftAmount) /* = -1 -- unset */ +{ + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + bool isSIMD = false; + int scale = -1; + + /* Figure out the encoding format of the instruction */ + switch (ins) { - case EA_1BYTE: - { - result |= 0x0000; // clear bits 10 and 11 + case INS_ldrb: + case INS_ldrsb: + case INS_strb: + scale = 0; break; - } - case EA_2BYTE: - { - result |= 0x0400; // set bit at location 10, clear bit at location 11 + case INS_ldrh: + case INS_ldrsh: + case INS_strh: + scale = 1; break; - } - case EA_4BYTE: - { - result |= 0x0800; // clear bit at location 10, set bit at location 11 + case INS_ldrsw: + scale = 2; break; - } - case EA_8BYTE: - { - result |= 0x0C00; // set bits at location 10 and 11 + case INS_ldr: + case INS_str: + // Is the target a vector register? + if (isVectorRegister(reg1)) + { + assert(isValidVectorLSDatasize(size)); + scale = NaturalScale_helper(size); + isSIMD = true; + } + else + { + assert(isValidGeneralDatasize(size)); + scale = (size == EA_8BYTE) ? 3 : 2; + } + break; - } default: - { - assert(!"Invalid element size"); + unreached(); break; - } + + } // end switch (ins) + + assert(scale != -1); + assert(insOptsLSExtend(opt)); + + if (isSIMD) + { + assert(isValidVectorLSDatasize(size)); + assert(isVectorRegister(reg1)); + } + else + { + assert(isValidGeneralLSDatasize(size)); + assert(isGeneralRegisterOrZR(reg1)); } - return result; + assert(isGeneralRegisterOrSP(reg2)); + assert(isGeneralRegister(reg3)); + + // Load/Store reserved encodings: + if (insOptsIndexed(opt)) + { + assert(reg1 != reg2); + } + + if (shiftAmount == -1) + { + shiftAmount = insOptsLSL(opt) ? scale : 0; + } + + assert((shiftAmount == scale) || (shiftAmount == 0)); + + reg2 = encodingSPtoZR(reg2); + fmt = IF_LS_3A; + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idReg3Scaled(shiftAmount == scale); + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to select the index for an Arm64 ld/st# vector by element instruction + * Add an instruction referencing two registers and two constants. */ -/*static*/ emitter::code_t emitter::insEncodeVLSIndex(emitAttr size, ssize_t index) +void emitter::emitIns_R_R_I_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt) { - code_t result = 0x00000000; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + size_t immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc - switch (size) + /* Figure out the encoding format of the instruction */ + switch (ins) { - case EA_1BYTE: - { - // Q = ? - bit location 30 - // xx = 00 - bit location 14 and 15 - // S = ? - bit location 12 - // ss = ?0 - bit location 10 and 11 + int lsb; + int width; + bitMaskImm bmi; + unsigned registerListSize; - result |= (index & 0x8) << 27; - result |= (index & 0x4) << 10; - result |= (index & 0x3) << 10; + case INS_bfm: + case INS_sbfm: + case INS_ubfm: + assert(isGeneralRegister(reg1)); + assert((ins == INS_bfm) ? isGeneralRegisterOrZR(reg2) : isGeneralRegister(reg2)); + assert(isValidImmShift(imm1, size)); + assert(isValidImmShift(imm2, size)); + assert(insOptsNone(opt)); + bmi.immNRS = 0; + bmi.immN = (size == EA_8BYTE); + bmi.immR = imm1; + bmi.immS = imm2; + immOut = bmi.immNRS; + fmt = IF_DI_2D; break; - } - case EA_2BYTE: - { - // Q = ? - bit location 30 - // xx = 01 - bit location 14 and 15 - // S = ? - bit location 12 - // ss = ?? - bit location 10 and 11 + case INS_bfi: + case INS_sbfiz: + case INS_ubfiz: + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + lsb = getBitWidth(size) - imm1; + width = imm2 - 1; + assert(isValidImmShift(lsb, size)); + assert(isValidImmShift(width, size)); + assert(insOptsNone(opt)); + bmi.immNRS = 0; + bmi.immN = (size == EA_8BYTE); + bmi.immR = lsb; + bmi.immS = width; + immOut = bmi.immNRS; + fmt = IF_DI_2D; + break; - result |= (index & 0x4) << 28; - result |= 0x4000; - result |= (index & 0x2) << 11; - result |= (index & 0x1) << 11; + case INS_bfxil: + case INS_sbfx: + case INS_ubfx: + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + lsb = imm1; + width = imm2 + imm1 - 1; + assert(isValidImmShift(lsb, size)); + assert(isValidImmShift(width, size)); + assert(insOptsNone(opt)); + bmi.immNRS = 0; + bmi.immN = (size == EA_8BYTE); + bmi.immR = imm1; + bmi.immS = imm2 + imm1 - 1; + immOut = bmi.immNRS; + fmt = IF_DI_2D; + break; + + case INS_mov: + case INS_ins: + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + elemsize = size; + assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1)); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm2)); + assert(insOptsNone(opt)); + immOut = (imm1 << 4) + imm2; + fmt = IF_DV_2F; break; - } - case EA_4BYTE: - { - // Q = ? - bit location 30 - // xx = 10 - bit location 14 and 15 - // S = ? - bit location 12 - // ss = 00 - bit location 10 and 11 + case INS_ld1: + case INS_ld2: + case INS_ld3: + case INS_ld4: + case INS_st1: + case INS_st2: + case INS_st3: + case INS_st4: + assert(isVectorRegister(reg1)); + assert(isGeneralRegisterOrSP(reg2)); - result |= (index & 0x2) << 29; - result |= 0x8000; - result |= (index & 0x1) << 12; - break; - } + elemsize = size; + assert(isValidVectorElemsize(elemsize)); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1)); - case EA_8BYTE: - { - // Q = ? - bit location 30 - // xx = 10 - bit location 14 and 15 - // S = 0 - bit location 12 - // ss = 01 - bit location 10 and 11 + registerListSize = insGetRegisterListSize(ins); + assert((elemsize * registerListSize) == (unsigned)imm2); + assert(insOptsPostIndex(opt)); - result |= (index & 0x1) << 30; - result |= 0x8400; + // Load/Store single structure post-indexed by an immediate + reg2 = encodingSPtoZR(reg2); + immOut = imm1; + fmt = IF_LS_2G; break; - } default: - { - assert(!"Invalid element size"); + unreached(); break; - } } + assert(fmt != IF_NONE); - return result; + instrDesc* id = emitNewInstrSC(attr, immOut); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to select the fcvt operation for Arm64 instructions + * Add an instruction referencing four registers. */ -/*static*/ emitter::code_t emitter::insEncodeConvertOpt(insFormat fmt, insOpts conversion) -{ - code_t result = 0; - switch (conversion) - { - case INS_OPTS_S_TO_D: // Single to Double - assert(fmt == IF_DV_2J); - result = 0x00008000; // type=00, opc=01 - break; - - case INS_OPTS_D_TO_S: // Double to Single - assert(fmt == IF_DV_2J); - result = 0x00400000; // type=01, opc=00 - break; - - case INS_OPTS_H_TO_S: // Half to Single - assert(fmt == IF_DV_2J); - result = 0x00C00000; // type=11, opc=00 - break; - - case INS_OPTS_H_TO_D: // Half to Double - assert(fmt == IF_DV_2J); - result = 0x00C08000; // type=11, opc=01 - break; - - case INS_OPTS_S_TO_H: // Single to Half - assert(fmt == IF_DV_2J); - result = 0x00018000; // type=00, opc=11 - break; - - case INS_OPTS_D_TO_H: // Double to Half - assert(fmt == IF_DV_2J); - result = 0x00418000; // type=01, opc=11 - break; - case INS_OPTS_S_TO_4BYTE: // Single to INT32 - assert(fmt == IF_DV_2H); - result = 0x00000000; // sf=0, type=00 - break; +void emitter::emitIns_R_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + insOpts opt /* = INS_OPTS_NONE*/, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) +{ + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; - case INS_OPTS_D_TO_4BYTE: // Double to INT32 - assert(fmt == IF_DV_2H); - result = 0x00400000; // sf=0, type=01 + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_madd: + case INS_msub: + case INS_smaddl: + case INS_smsubl: + case INS_umaddl: + case INS_umsubl: + assert(isValidGeneralDatasize(size)); + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isGeneralRegister(reg4)); + assert(insScalableOptsNone(sopt)); + fmt = IF_DR_4A; break; - case INS_OPTS_S_TO_8BYTE: // Single to INT64 - assert(fmt == IF_DV_2H); - result = 0x80000000; // sf=1, type=00 + case INS_fmadd: + case INS_fmsub: + case INS_fnmadd: + case INS_fnmsub: + // Scalar operation + assert(isValidScalarDatasize(size)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg4)); + assert(insScalableOptsNone(sopt)); + fmt = IF_DV_4A; break; - case INS_OPTS_D_TO_8BYTE: // Double to INT64 - assert(fmt == IF_DV_2H); - result = 0x80400000; // sf=1, type=01 + case INS_invalid: + fmt = IF_NONE; break; - case INS_OPTS_4BYTE_TO_S: // INT32 to Single - assert(fmt == IF_DV_2I); - result = 0x00000000; // sf=0, type=00 - break; + // Fallback handles emitting the SVE instructions. + default: + return emitInsSve_R_R_R_R(ins, attr, reg1, reg2, reg3, reg4, opt, sopt); + } + assert(fmt != IF_NONE); - case INS_OPTS_4BYTE_TO_D: // INT32 to Double - assert(fmt == IF_DV_2I); - result = 0x00400000; // sf=0, type=01 - break; + instrDesc* id = emitNewInstr(attr); - case INS_OPTS_8BYTE_TO_S: // INT64 to Single - assert(fmt == IF_DV_2I); - result = 0x80000000; // sf=1, type=00 - break; + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); - case INS_OPTS_8BYTE_TO_D: // INT64 to Double - assert(fmt == IF_DV_2I); - result = 0x80400000; // sf=1, type=01 - break; + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idReg4(reg4); - default: - assert(!"Invalid 'conversion' value"); - break; - } - return result; + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to have the Rn register be updated Pre/Post indexed - * or not updated + * Add an instruction referencing four registers and a constant. */ -/*static*/ emitter::code_t emitter::insEncodeIndexedOpt(insOpts opt) +void emitter::emitIns_R_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + ssize_t imm, + insOpts opt /* = INS_OPT_NONE*/) { - assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt)); - - if (emitter::insOptsIndexed(opt)) - { - if (emitter::insOptsPostIndex(opt)) - { - return 0x00000400; // set the bit at location 10 - } - else - { - assert(emitter::insOptsPreIndex(opt)); - return 0x00000C00; // set the bit at location 10 and 11 - } - } - else - { - assert(emitter::insOptsNone(opt)); - return 0; // bits 10 and 11 are zero - } + // Currently, only SVE instructions use this format. + emitInsSve_R_R_R_R_I(ins, attr, reg1, reg2, reg3, reg4, imm, opt); } /***************************************************************************** * - * Returns the encoding for a ldp/stp instruction to have the Rn register - * be updated Pre/Post indexed or not updated + * Add an instruction referencing a register and a condition code */ -/*static*/ emitter::code_t emitter::insEncodePairIndexedOpt(instruction ins, insOpts opt) +void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond) { - assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt)); + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; - if ((ins == INS_ldnp) || (ins == INS_stnp)) - { - assert(emitter::insOptsNone(opt)); - return 0; // bits 23 and 24 are zero - } - else + /* Figure out the encoding format of the instruction */ + switch (ins) { - if (emitter::insOptsIndexed(opt)) - { - if (emitter::insOptsPostIndex(opt)) - { - return 0x00800000; // set the bit at location 23 - } - else - { - assert(emitter::insOptsPreIndex(opt)); - return 0x01800000; // set the bit at location 24 and 23 - } - } - else - { - assert(emitter::insOptsNone(opt)); - return 0x01000000; // set the bit at location 24 - } - } + case INS_cset: + case INS_csetm: + assert(isGeneralRegister(reg)); + cfi.cond = cond; + fmt = IF_DR_1D; + break; + + default: + unreached(); + break; + + } // end switch (ins) + + assert(fmt != IF_NONE); + assert(isValidImmCond(cfi.immCFVal)); + + instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + id->idReg1(reg); + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to apply a Shift Type on the Rm register + * Add an instruction referencing two registers and a condition code */ -/*static*/ emitter::code_t emitter::insEncodeShiftType(insOpts opt) +void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond) { - if (emitter::insOptsNone(opt)) + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; + + /* Figure out the encoding format of the instruction */ + switch (ins) { - // None implies the we encode LSL (with a zero immediate) - opt = INS_OPTS_LSL; - } - assert(emitter::insOptsAnyShift(opt)); + case INS_cinc: + case INS_cinv: + case INS_cneg: + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + cfi.cond = cond; + fmt = IF_DR_2D; + break; + default: + unreached(); + break; - emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_LSL; - assert(option <= 3); + } // end switch (ins) - return option << 22; // bits 23, 22 + assert(fmt != IF_NONE); + assert(isValidImmCond(cfi.immCFVal)); + + instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + id->idReg1(reg1); + id->idReg2(reg2); + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to apply a 12 bit left shift to the immediate + * Add an instruction referencing two registers and a condition code */ -/*static*/ emitter::code_t emitter::insEncodeShiftImm12(insOpts opt) +void emitter::emitIns_R_R_R_COND( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond) { - if (emitter::insOptsLSL12(opt)) + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; + + /* Figure out the encoding format of the instruction */ + switch (ins) { - return 0x00400000; // set the bit at location 22 - } - return 0; + case INS_csel: + case INS_csinc: + case INS_csinv: + case INS_csneg: + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(isGeneralRegisterOrZR(reg3)); + cfi.cond = cond; + fmt = IF_DR_3D; + break; + + default: + unreached(); + break; + + } // end switch (ins) + + assert(fmt != IF_NONE); + assert(isValidImmCond(cfi.immCFVal)); + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idSmallCns(cfi.immCFVal); + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to have the Rm register use an extend operation + * Add an instruction referencing two registers the flags and a condition code */ -/*static*/ emitter::code_t emitter::insEncodeExtend(insOpts opt) +void emitter::emitIns_R_R_FLAGS_COND( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond) { - if (emitter::insOptsNone(opt) || (opt == INS_OPTS_LSL)) + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; + + /* Figure out the encoding format of the instruction */ + switch (ins) { - // None or LSL implies the we encode UXTX - opt = INS_OPTS_UXTX; - } - assert(emitter::insOptsAnyExtend(opt)); + case INS_ccmp: + case INS_ccmn: + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + cfi.flags = flags; + cfi.cond = cond; + fmt = IF_DR_2I; + break; + default: + unreached(); + break; + } // end switch (ins) - emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_UXTB; - assert(option <= 7); + assert(fmt != IF_NONE); + assert(isValidImmCondFlags(cfi.immCFVal)); - return option << 13; // bits 15,14,13 -} + instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); -/***************************************************************************** - * - * Returns the encoding to scale the Rm register by {0,1,2,3,4} - * when using an extend operation - */ + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); -/*static*/ emitter::code_t emitter::insEncodeExtendScale(ssize_t imm) -{ - assert((imm >= 0) && (imm <= 4)); + id->idReg1(reg1); + id->idReg2(reg2); - return (emitter::code_t)imm << 10; // bits 12,11,10 + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding to have the Rm register be auto scaled by the ld/st size + * Add an instruction referencing a register, an immediate, the flags and a condition code */ -/*static*/ emitter::code_t emitter::insEncodeReg3Scale(bool isScaled) +void emitter::emitIns_R_I_FLAGS_COND( + instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insCflags flags, insCond cond) { - if (isScaled) - { - return 0x00001000; // set the bit at location 12 - } - else + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; + + /* Figure out the encoding format of the instruction */ + switch (ins) { - return 0; - } + case INS_ccmp: + case INS_ccmn: + assert(isGeneralRegister(reg)); + if (imm < 0) + { + ins = insReverse(ins); + imm = -imm; + } + if (isValidUimm<5>(imm)) + { + cfi.imm5 = imm; + cfi.flags = flags; + cfi.cond = cond; + fmt = IF_DI_1F; + } + else + { + assert(!"Instruction cannot be encoded: ccmp/ccmn imm5"); + } + break; + default: + unreached(); + break; + } // end switch (ins) + + assert(fmt != IF_NONE); + assert(isValidImmCondFlagsImm5(cfi.immCFVal)); + + instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + id->idReg1(reg); + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding for the immediate value as 9-bits at bit locations '21-16' for high and '12-10' for low. + * Add a memory barrier instruction with a 'barrier' immediate */ -/*static*/ emitter::code_t emitter::insEncodeSimm9h9l_21_to_16_and_12_to_10(ssize_t imm) +void emitter::emitIns_BARR(instruction ins, insBarrier barrier) { - assert(isValidSimm<9>(imm)); + insFormat fmt = IF_NONE; + ssize_t imm = 0; - if (imm < 0) + /* Figure out the encoding format of the instruction */ + switch (ins) { - imm = (imm & 0x1FF); - } + case INS_dsb: + case INS_dmb: + case INS_isb: - code_t h = (code_t)(imm & 0x1F8) << 13; // encode high 6-bits at locations '21-16' - code_t l = (code_t)((imm & ~0x1F8) & 0x7) << 10; // encode low 3-bits at locations '12-10' + fmt = IF_SI_0B; + imm = (ssize_t)barrier; + break; + default: + unreached(); + break; + } // end switch (ins) + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrSC(EA_8BYTE, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); - return (h | l); + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Returns the encoding for the immediate value as 3-bits at bit locations '23-22' for high and '12' for low. + * Add an instruction with a static data member operand. If 'size' is 0, the + * instruction operates on the address of the static member instead of its + * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]"). */ -/*static*/ emitter::code_t emitter::insEncodeUimm3h3l_23_to_22_and_12(ssize_t imm) +void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) { - assert(isValidUimm<3>(imm)); - - code_t h = (code_t)(imm & 0x6) << 21; // encode high 2-bits at locations '23-22' - code_t l = (code_t)(imm & 0x1) << 12; // encode low 1-bit at locations '12' - - return (h | l); + NYI("emitIns_C"); } /***************************************************************************** * - * Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. + * Add an instruction referencing stack-based local variable. */ -/*static*/ emitter::code_t emitter::insEncodeImm8_12_to_5(ssize_t imm) -{ - assert(isValidSimm<8>(imm) || isValidUimm<8>(imm)); - return (code_t)((imm & 0xFF) << 5); -} - -BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id) +void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) { - instruction ins = id->idIns(); - insFormat fmt = id->idInsFmt(); - regNumber dstReg = id->idReg1(); - if (id->idjShort) - { - // adr x, [rel addr] -- compute address: current addr(ip) + rel addr. - assert(ins == INS_adr); - assert(fmt == IF_DI_1E); - ssize_t distVal = (ssize_t)(dstAddr - srcAddr); - dst = emitOutputShortAddress(dst, ins, fmt, distVal, dstReg); - } - else - { - // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr - assert(fmt == IF_LARGEADR); - ssize_t relPageAddr = computeRelPageAddr((size_t)dstAddr, (size_t)srcAddr); - dst = emitOutputShortAddress(dst, INS_adrp, IF_DI_1E, relPageAddr, dstReg); - - // add x, x, page offs -- compute address = page addr + page offs - ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits - assert(isValidUimm<12>(imm12)); - code_t code = - emitInsCode(INS_add, IF_DI_2A); // DI_2A X0010001shiiiiii iiiiiinnnnnddddd 1100 0000 imm(i12, sh) - code |= insEncodeDatasize(EA_8BYTE); // X - code |= ((code_t)imm12 << 10); // iiiiiiiiiiii - code |= insEncodeReg_Rd(dstReg); // ddddd - code |= insEncodeReg_Rn(dstReg); // nnnnn - dst += emitOutput_Instr(dst, code); - } - return dst; + NYI("emitIns_S"); } /***************************************************************************** * - * Output a local jump or other instruction with a pc-relative immediate. - * Note that this may be invoked to overwrite an existing jump instruction at 'dst' - * to handle forward branch patching. + * Add an instruction referencing a register and a stack-based local variable. */ - -BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) +void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { - instrDescJmp* id = (instrDescJmp*)i; + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + unsigned scale = 0; + bool isLdrStr = false; + bool isSimple = true; + bool useRegForImm = false; - unsigned srcOffs; - unsigned dstOffs; - BYTE* srcAddr; - BYTE* dstAddr; - ssize_t distVal; + assert(offs >= 0); - // Set default ins/fmt from id. - instruction ins = id->idIns(); - insFormat fmt = id->idInsFmt(); + /* Figure out the variable's frame position */ + bool FPbased; + int base = emitComp->lvaFrameAddress(varx, &FPbased); + int disp = base + offs; + ssize_t imm = disp; - bool loadLabel = false; - bool isJump = false; - bool loadConstant = false; + regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); + // TODO-ARM64-CQ: use unscaled loads? + /* Figure out the encoding format of the instruction */ switch (ins) { - default: - isJump = true; + case INS_strb: + case INS_ldrb: + case INS_ldrsb: + scale = 0; break; - case INS_tbz: - case INS_tbnz: - case INS_cbz: - case INS_cbnz: - isJump = true; + case INS_strh: + case INS_ldrh: + case INS_ldrsh: + scale = 1; break; - case INS_ldr: case INS_ldrsw: - loadConstant = true; + scale = 2; break; - case INS_adr: - case INS_adrp: - loadLabel = true; + case INS_str: + case INS_ldr: + assert(isValidGeneralDatasize(size) || isValidVectorDatasize(size)); + scale = genLog2(EA_SIZE_IN_BYTES(size)); + isLdrStr = true; break; - } - - /* Figure out the distance to the target */ - - srcOffs = emitCurCodeOffs(dst); - srcAddr = emitOffsetToPtr(srcOffs); - - if (id->idAddr()->iiaIsJitDataOffset()) - { - assert(loadConstant || loadLabel); - int doff = id->idAddr()->iiaGetJitDataOffset(); - assert(doff >= 0); - ssize_t imm = emitGetInsSC(id); - assert((imm >= 0) && (imm < 0x1000)); // 0x1000 is arbitrary, currently 'imm' is always 0 - - unsigned dataOffs = (unsigned)(doff + imm); - assert(dataOffs < emitDataSize()); - dstAddr = emitDataOffsetToPtr(dataOffs); - regNumber dstReg = id->idReg1(); - regNumber addrReg = dstReg; // an integer register to compute long address. - emitAttr opSize = id->idOpSize(); + case INS_lea: + assert(size == EA_8BYTE); + isSimple = false; + scale = 0; - if (loadConstant) - { - if (id->idjShort) + if (disp >= 0) { - // ldr x/v, [rel addr] -- load constant from current addr(ip) + rel addr. - assert(ins == INS_ldr); - assert(fmt == IF_LS_1A); - distVal = (ssize_t)(dstAddr - srcAddr); - dst = emitOutputShortConstant(dst, ins, fmt, distVal, dstReg, opSize); + ins = INS_add; } else { - // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr - assert(fmt == IF_LARGELDC); - ssize_t relPageAddr = computeRelPageAddr((size_t)dstAddr, (size_t)srcAddr); - if (isVectorRegister(dstReg)) - { - // Update addrReg with the reserved integer register - // since we cannot use dstReg (vector) to load constant directly from memory. - - // If loading a 16-byte value, we will need to load directly into dstReg. - // Thus, encode addrReg for the ld1 instruction. - if (opSize == EA_16BYTE) - { - addrReg = encodingSPtoZR(id->idReg2()); - } - else - { - addrReg = id->idReg2(); - } - - assert(isGeneralRegister(addrReg)); - } - - ins = INS_adrp; - fmt = IF_DI_1E; - dst = emitOutputShortAddress(dst, ins, fmt, relPageAddr, addrReg); - - ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits - assert(isValidUimm<12>(imm12)); - - // Special case: emit add + ld1 instructions for loading 16-byte data into vector register. - if (isVectorRegister(dstReg) && (opSize == EA_16BYTE)) - { - const emitAttr elemSize = EA_1BYTE; - const insOpts opt = optMakeArrangement(opSize, elemSize); + ins = INS_sub; + imm = -disp; + } - assert(isGeneralRegisterOrSP(addrReg)); - assert(isValidVectorElemsize(elemSize)); - assert(isValidArrangement(opSize, opt)); + if (imm <= 0x0fff) + { + fmt = IF_DI_2A; // add reg1,reg2,#disp + } + else + { + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + fmt = IF_DR_3A; // add reg1,reg2,rsvdReg + } + break; - // Calculate page addr + page offs, then emit ld1 instruction. - dst = emitOutputVectorConstant(dst, imm12, dstReg, addrReg, opSize, elemSize); - } - else - { - // ldr x, [x, 0] -- load constant from address into integer register. - ins = INS_ldr; - fmt = IF_LS_2B; - dst = emitOutputShortConstant(dst, ins, fmt, imm12, addrReg, opSize); + case INS_sve_ldr: + { + assert(isVectorRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_IE_2A; - // fmov v, d -- copy constant in integer register to vector register. - // This is needed only for vector constant. - if (addrReg != dstReg) - { - // fmov Vd,Rn DV_2I X00111100X100111 000000nnnnnddddd 1E27 0000 Vd,Rn - // (scalar, from general) - assert(isVectorRegister(dstReg) && isGeneralRegister(addrReg)); - ins = INS_fmov; - fmt = IF_DV_2I; - code_t code = emitInsCode(ins, fmt); + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - code |= insEncodeReg_Vd(dstReg); // ddddd - code |= insEncodeReg_Rn(addrReg); // nnnnn - if (id->idOpSize() == EA_8BYTE) - { - code |= 0x80400000; // X ... X - } - dst += emitOutput_Instr(dst, code); - } - } + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); } } - else - { - assert(loadLabel); - dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id); - } + break; - return dst; - } + // TODO-SVE: Fold into INS_sve_ldr once REG_V0 and REG_P0 are distinct + case INS_sve_ldr_mask: + { + assert(isPredicateRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_ID_2A; + ins = INS_sve_ldr; - assert(loadLabel || isJump); + // TODO-SVE: Don't assume 128bit vectors + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (id->idAddr()->iiaHasInstrCount()) - { - assert(ig != NULL); - int instrCount = id->idAddr()->iiaGetInstrCount(); - unsigned insNum = emitFindInsNum(ig, id); - if (instrCount < 0) - { - // Backward branches using instruction count must be within the same instruction group. - assert(insNum + 1 >= (unsigned)(-instrCount)); + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } } + break; - dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount)); - dstAddr = emitOffsetToPtr(dstOffs); - } - else - { - dstOffs = id->idAddr()->iiaIGlabel->igOffs; - dstAddr = emitOffsetToPtr(dstOffs); - } + default: + NYI("emitIns_R_S"); // FP locals? + return; - distVal = (ssize_t)(dstAddr - srcAddr); + } // end switch (ins) - if (dstOffs <= srcOffs) + assert((scale >= 0) && (scale <= 4)); + + if (isSimple) { -#if DEBUG_EMIT - /* This is a backward jump - distance is known at this point */ + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + if (imm == 0) { - size_t blkOffs = id->idjIG->igOffs; - - if (INTERESTING_JUMP_NUM == 0) - printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum); - printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj); - printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj); - printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj); + fmt = IF_LS_2A; } -#endif - } - else - { - /* This is a forward jump - distance will be an upper limit */ - - emitFwdJumps = true; - - /* The target offset will be closer by at least 'emitOffsAdj', but only if this - jump doesn't cross the hot-cold boundary. */ - - if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs)) + else if ((imm < 0) || ((imm & mask) != 0)) { - dstOffs -= emitOffsAdj; - distVal -= emitOffsAdj; + if (isValidSimm<9>(imm)) + { + fmt = IF_LS_2C; + } + else + { + useRegForImm = true; + } } - - /* Record the location of the jump for later patching */ - - id->idjOffs = dstOffs; - - /* Are we overflowing the id->idjOffs bitfield? */ - if (id->idjOffs != dstOffs) - IMPL_LIMITATION("Method is too large"); - -#if DEBUG_EMIT - if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + else if (imm > 0) { - size_t blkOffs = id->idjIG->igOffs; + if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st - if (INTERESTING_JUMP_NUM == 0) - printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum); - printf("[4] Jump block is at %08X\n", blkOffs); - printf("[4] Jump is at %08X\n", srcOffs); - printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs); + fmt = IF_LS_2B; + } + else + { + useRegForImm = true; + } } -#endif - } - -#ifdef DEBUG - if (0 && emitComp->verbose) - { - size_t sz = 4; - int distValSize = id->idjShort ? 4 : 8; - printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = 0x%08X\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd", - dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, distVal); - } -#endif - - /* For forward jumps, record the address of the distance value */ - id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL; - assert(insOptsNone(id->idInsOpt())); - - if (isJump) - { - if (id->idjShort) + if (useRegForImm) { - // Short conditional/unconditional jump - assert(!id->idjKeepLong); - assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false); - assert((fmt == IF_BI_0A) || (fmt == IF_BI_0B) || (fmt == IF_BI_1A) || (fmt == IF_BI_1B)); - dst = emitOutputShortBranch(dst, ins, fmt, distVal, id); + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + fmt = IF_LS_3A; } - else - { - // Long conditional/unconditional jump + } - if (fmt == IF_LARGEJMP) - { - // This is a pseudo-instruction format representing a large conditional branch, to allow - // us to get a greater branch target range than we can get by using a straightforward conditional - // branch. It is encoded as a short conditional branch that branches around a long unconditional - // branch. - // - // Conceptually, we have: - // - // b L_target - // - // The code we emit is: - // - // b L_not // 4 bytes. Note that we reverse the condition. - // b L_target // 4 bytes - // L_not: - // - // Note that we don't actually insert any blocks: we simply encode "b L_not" as a branch with - // the correct offset. Note also that this works for both integer and floating-point conditions, because - // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example, - // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered). + assert(fmt != IF_NONE); - instruction reverseIns; - insFormat reverseFmt; + // Try to optimize a load/store with an alternative instruction. + if (isLdrStr && emitComp->opts.OptimizationEnabled() && + OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm))) + { + return; + } - switch (ins) - { - case INS_cbz: - reverseIns = INS_cbnz; - reverseFmt = IF_BI_1A; - break; - case INS_cbnz: - reverseIns = INS_cbz; - reverseFmt = IF_BI_1A; - break; - case INS_tbz: - reverseIns = INS_tbnz; - reverseFmt = IF_BI_1B; - break; - case INS_tbnz: - reverseIns = INS_tbz; - reverseFmt = IF_BI_1B; - break; - default: - reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins))); - reverseFmt = IF_BI_0B; - } + instrDesc* id = emitNewInstrCns(attr, imm); - dst = emitOutputShortBranch(dst, - reverseIns, // reverse the conditional instruction - reverseFmt, 8, /* 8 bytes from start of this large conditional - pseudo-instruction to L_not. */ - id); + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); - // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that. - ins = INS_b; - fmt = IF_BI_0A; + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); - // The distVal was computed based on the beginning of the pseudo-instruction, - // So subtract the size of the conditional branch so that it is relative to the - // unconditional branch. - distVal -= 4; - } +#ifdef DEBUG + id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; +#endif - assert(fmt == IF_BI_0A); - assert((distVal & 1) == 0); - code_t code = emitInsCode(ins, fmt); - const bool doRecordRelocation = emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs); + dispIns(id); + appendToCurIG(id); +} - if (doRecordRelocation) - { - // dst isn't an actual final target location, just some intermediate - // location. Thus we cannot make any guarantees about distVal (not - // even the direction/sign). Instead we don't encode any offset and - // rely on the relocation to do all the work - } - else - { - // Branch offset encodings are scaled by 4. - noway_assert((distVal & 3) == 0); - distVal >>= 2; - noway_assert(isValidSimm<26>(distVal)); +/***************************************************************************** + * + * Add an instruction referencing two register and consecutive stack-based local variable slots. + */ +void emitter::emitIns_R_R_S_S( + instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) +{ + assert((ins == INS_ldp) || (ins == INS_ldnp)); + assert(EA_8BYTE == EA_SIZE(attr1)); + assert(EA_8BYTE == EA_SIZE(attr2)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(offs >= 0); - // Insert offset into unconditional branch instruction - distVal &= 0x3FFFFFFLL; - code |= distVal; - } + insFormat fmt = IF_LS_3B; + int disp = 0; + const unsigned scale = 3; - const unsigned instrSize = emitOutput_Instr(dst, code); + /* Figure out the variable's frame position */ + int base; + bool FPbased; - if (doRecordRelocation) + base = emitComp->lvaFrameAddress(varx, &FPbased); + disp = base + offs; + + // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? + regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE; + reg3 = encodingSPtoZR(reg3); + + bool useRegForAdr = true; + ssize_t imm = disp; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) + { + useRegForAdr = false; + } + else + { + if ((imm & mask) == 0) + { + ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st + + if ((immShift >= -64) && (immShift <= 63)) { - assert(id->idjKeepLong); - if (emitComp->info.compMatchedVM) - { - void* target = emitOffsetToPtr(dstOffs); - emitRecordRelocation((void*)dst, target, IMAGE_REL_ARM64_BRANCH26); - } + fmt = IF_LS_3C; + useRegForAdr = false; + imm = immShift; } - - dst += instrSize; } } - else if (loadLabel) + + if (useRegForAdr) { - dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id); + regNumber rsvd = codeGen->rsGetRsvdReg(); + emitIns_R_R_Imm(INS_add, EA_PTRSIZE, rsvd, reg3, imm); + reg3 = rsvd; + imm = 0; } - return dst; -} + assert(fmt != IF_NONE); -/***************************************************************************** -* -* Output a short branch instruction. -*/ -BYTE* emitter::emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id) -{ - code_t code = emitInsCode(ins, fmt); + instrDesc* id = emitNewInstrCns(attr1, imm); - ssize_t loBits = (distVal & 3); - noway_assert(loBits == 0); - distVal >>= 2; // branch offset encodings are scaled by 4. + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); - if (fmt == IF_BI_0A) - { - // INS_b or INS_bl_local - noway_assert(isValidSimm<26>(distVal)); - distVal &= 0x3FFFFFFLL; - code |= distVal; - } - else if (fmt == IF_BI_0B) // BI_0B 01010100iiiiiiii iiiiiiiiiiiXXXXX simm19:00 - { - // INS_beq, INS_bne, etc... - noway_assert(isValidSimm<19>(distVal)); - distVal &= 0x7FFFFLL; - code |= distVal << 5; - } - else if (fmt == IF_BI_1A) // BI_1A X.......iiiiiiii iiiiiiiiiiittttt Rt simm19:00 + // Record the attribute for the second register in the pair + if (EA_IS_GCREF(attr2)) { - // INS_cbz or INS_cbnz - assert(id != nullptr); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - - noway_assert(isValidSimm<19>(distVal)); - distVal &= 0x7FFFFLL; // 19 bits - code |= distVal << 5; + id->idGCrefReg2(GCT_GCREF); } - else if (fmt == IF_BI_1B) // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00 + else if (EA_IS_BYREF(attr2)) { - // INS_tbz or INS_tbnz - assert(id != nullptr); - ssize_t imm = emitGetInsSC(id); - assert(isValidImmShift(imm, id->idOpSize())); - - if (imm & 0x20) // test bit 32-63 ? - { - code |= 0x80000000; // B - } - code |= ((imm & 0x1F) << 19); // bbbbb - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - - noway_assert(isValidSimm<14>(distVal)); - distVal &= 0x3FFFLL; // 14 bits - code |= distVal << 5; + id->idGCrefReg2(GCT_BYREF); } else { - assert(!"Unknown fmt for emitOutputShortBranch"); + id->idGCrefReg2(GCT_NONE); } - dst += emitOutput_Instr(dst, code); + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); - return dst; +#ifdef DEBUG + id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; +#endif + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** -* -* Output a short address instruction. -*/ -BYTE* emitter::emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg) + * + * Add an instruction referencing a stack-based local variable and a register + */ +void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) { - ssize_t loBits = (distVal & 3); - distVal >>= 2; + assert(offs >= 0); + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + unsigned scale = 0; + bool isVectorStore = false; + bool isStr = false; + bool isSimple = true; + bool useRegForImm = false; - code_t code = emitInsCode(ins, fmt); - if (fmt == IF_DI_1E) // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 + /* Figure out the variable's frame position */ + bool FPbased; + int base = emitComp->lvaFrameAddress(varx, &FPbased); + int disp = base + offs; + ssize_t imm = disp; + + // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? + regNumber reg2 = encodingSPtoZR(FPbased ? REG_FPBASE : REG_SPBASE); + + // TODO-ARM64-CQ: use unscaled loads? + /* Figure out the encoding format of the instruction */ + switch (ins) { - // INS_adr or INS_adrp - code |= insEncodeReg_Rd(reg); // ddddd + case INS_strb: + scale = 0; + assert(isGeneralRegisterOrZR(reg1)); + break; - noway_assert(isValidSimm<19>(distVal)); - distVal &= 0x7FFFFLL; // 19 bits - code |= distVal << 5; - code |= loBits << 29; // 2 bits + case INS_strh: + scale = 1; + assert(isGeneralRegisterOrZR(reg1)); + break; + + case INS_str: + if (isGeneralRegisterOrZR(reg1)) + { + assert(isValidGeneralDatasize(size)); + scale = (size == EA_8BYTE) ? 3 : 2; + } + else + { + assert(isVectorRegister(reg1)); + assert(isValidVectorLSDatasize(size)); + scale = NaturalScale_helper(size); + isVectorStore = true; + } + isStr = true; + break; + + case INS_sve_str: + { + assert(isVectorRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_JH_2A; + + // TODO-SVE: Don't assume 128bit vectors + scale = NaturalScale_helper(EA_16BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } + break; + + // TODO-SVE: Fold into INS_sve_str once REG_V0 and REG_P0 are distinct + case INS_sve_str_mask: + { + assert(isPredicateRegister(reg1)); + isSimple = false; + size = EA_SCALABLE; + attr = size; + fmt = IF_SVE_JG_2A; + ins = INS_sve_str; + + // TODO-SVE: Don't assume 128bit vectors + // Predicate size is vector length / 8 + scale = NaturalScale_helper(EA_2BYTE); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + + if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) + { + imm >>= scale; // The immediate is scaled by the size of the ld/st + } + else + { + useRegForImm = true; + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + } + } + break; + + default: + NYI("emitIns_S_R"); // FP locals? + return; + + } // end switch (ins) + + if (isVectorStore || !isSimple) + { + assert(scale <= 4); } else { - assert(!"Unknown fmt for emitOutputShortAddress"); + assert(scale <= 3); } - dst += emitOutput_Instr(dst, code); - - return dst; -} - -/***************************************************************************** -* -* Output a short constant instruction. -*/ -BYTE* emitter::emitOutputShortConstant( - BYTE* dst, instruction ins, insFormat fmt, ssize_t imm, regNumber reg, emitAttr opSize) -{ - code_t code = emitInsCode(ins, fmt); - - if (fmt == IF_LS_1A) + if (isSimple) { - // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt simm21 - // INS_ldr or INS_ldrsw (PC-Relative) - - ssize_t loBits = (imm & 3); - noway_assert(loBits == 0); - ssize_t distVal = imm >> 2; // load offset encodings are scaled by 4. - - noway_assert(isValidSimm<19>(distVal)); + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - // Is the target a vector register? - if (isVectorRegister(reg)) + if (imm == 0) { - code |= insEncodeDatasizeVLS(code, opSize); // XX V - code |= insEncodeReg_Vt(reg); // ttttt + fmt = IF_LS_2A; } - else + else if ((imm < 0) || ((imm & mask) != 0)) { - assert(isGeneralRegister(reg)); - // insEncodeDatasizeLS is not quite right for this case. - // So just specialize it. - if ((ins == INS_ldr) && (opSize == EA_8BYTE)) + if (isValidSimm<9>(imm)) { - // set the operation size in bit 30 - code |= 0x40000000; + fmt = IF_LS_2C; + } + else + { + useRegForImm = true; } - - code |= insEncodeReg_Rt(reg); // ttttt } - - distVal &= 0x7FFFFLL; // 19 bits - code |= distVal << 5; - } - else if (fmt == IF_LS_2B) - { - // ldr Rt,[Xn+pimm12] LS_2B 1X11100101iiiiii iiiiiinnnnnttttt B940 0000 imm(0-4095<<{2,3}) - // INS_ldr or INS_ldrsw (PC-Relative) - noway_assert(isValidUimm<12>(imm)); - assert(isGeneralRegister(reg)); - - if (opSize == EA_8BYTE) + else if (imm > 0) { - // insEncodeDatasizeLS is not quite right for this case. - // So just specialize it. - if (ins == INS_ldr) + if (((imm & mask) == 0) && ((imm >> scale) < 0x1000)) { - // set the operation size in bit 30 - code |= 0x40000000; + imm >>= scale; // The immediate is scaled by the size of the ld/st + fmt = IF_LS_2B; + } + else + { + useRegForImm = true; } - // Low 3 bits should be 0 -- 8 byte JIT data should be aligned on 8 byte. - assert((imm & 7) == 0); - imm >>= 3; } - else + + if (useRegForImm) { - assert(opSize == EA_4BYTE); - // Low 2 bits should be 0 -- 4 byte aligned data. - assert((imm & 3) == 0); - imm >>= 2; + // The reserved register is not stored in idReg3() since that field overlaps with iiaLclVar. + // It is instead implicit when idSetIsLclVar() is set, with this encoding format. + regNumber rsvdReg = codeGen->rsGetRsvdReg(); + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); + fmt = IF_LS_3A; } - - code |= insEncodeReg_Rt(reg); // ttttt - code |= insEncodeReg_Rn(reg); // nnnnn - code |= imm << 10; } - else + + assert(fmt != IF_NONE); + + // Try to optimize a store with an alternative instruction. + if (isStr && emitComp->opts.OptimizationEnabled() && + OptimizeLdrStr(ins, attr, reg1, reg2, imm, size, fmt, true, varx, offs DEBUG_ARG(useRegForImm))) { - assert(!"Unknown fmt for emitOutputShortConstant"); + return; } - dst += emitOutput_Instr(dst, code); + instrDesc* id = emitNewInstrCns(attr, imm); - return dst; -} + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); -/***************************************************************************** - * - * Output instructions to load a constant into a vector register. - */ -BYTE* emitter::emitOutputVectorConstant( - BYTE* dst, ssize_t imm, regNumber dstReg, regNumber addrReg, emitAttr opSize, emitAttr elemSize) -{ - // add addrReg, addrReg, page offs -- compute address = page addr + page offs. - code_t code = emitInsCode(INS_add, IF_DI_2A); // DI_2A X0010001shiiiiii iiiiiinnnnnddddd 1100 0000 imm(i12, sh) - code |= insEncodeDatasize(EA_8BYTE); // X - use EA_8BYTE, as we are calculating 64-bit address - code |= ((code_t)imm << 10); // iiiiiiiiiiii - code |= insEncodeReg_Rd(addrReg); // ddddd - code |= insEncodeReg_Rn(addrReg); // nnnnn - dst += emitOutput_Instr(dst, code); + id->idReg1(reg1); + id->idReg2(reg2); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); - // ld1 dstReg, addrReg -- load constant at address in addrReg into dstReg. - code = emitInsCode(INS_ld1, IF_LS_2D); // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn - code |= insEncodeVectorsize(opSize); // Q - code |= insEncodeVLSElemsize(elemSize); // ss - code |= insEncodeReg_Rn(addrReg); // nnnnn - code |= insEncodeReg_Vt(dstReg); // ttttt - dst += emitOutput_Instr(dst, code); +#ifdef DEBUG + id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; +#endif - return dst; + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Output a call instruction. + * Add an instruction referencing consecutive stack-based local variable slots and two registers */ - -unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code) +void emitter::emitIns_S_S_R_R( + instruction ins, emitAttr attr1, emitAttr attr2, regNumber reg1, regNumber reg2, int varx, int offs) { - const unsigned char callInstrSize = sizeof(code_t); // 4 bytes - regMaskTP gcrefRegs; - regMaskTP byrefRegs; + assert((ins == INS_stp) || (ins == INS_stnp)); + assert(EA_8BYTE == EA_SIZE(attr1)); + assert(EA_8BYTE == EA_SIZE(attr2)); + assert(isGeneralRegisterOrZR(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + assert(offs >= 0); - VARSET_TP GCvars(VarSetOps::UninitVal()); + insFormat fmt = IF_LS_3B; + int disp = 0; + const unsigned scale = 3; - // Is this a "fat" call descriptor? - if (id->idIsLargeCall()) + /* Figure out the variable's frame position */ + int base; + bool FPbased; + + base = emitComp->lvaFrameAddress(varx, &FPbased); + disp = base + offs; + + // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead? + regNumber reg3 = FPbased ? REG_FPBASE : REG_SPBASE; + + bool useRegForAdr = true; + ssize_t imm = disp; + ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate + if (imm == 0) { - instrDescCGCA* idCall = (instrDescCGCA*)id; - gcrefRegs = idCall->idcGcrefRegs; - byrefRegs = idCall->idcByrefRegs; - VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); + useRegForAdr = false; } else { - assert(!id->idIsLargeDsp()); - assert(!id->idIsLargeCns()); + if ((imm & mask) == 0) + { + ssize_t immShift = imm >> scale; // The immediate is scaled by the size of the ld/st - gcrefRegs = emitDecodeCallGCregs(id); - byrefRegs = 0; - VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); + if ((immShift >= -64) && (immShift <= 63)) + { + fmt = IF_LS_3C; + useRegForAdr = false; + imm = immShift; + } + } } - /* We update the GC info before the call as the variables cannot be - used by the call. Killing variables before the call helps with - boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029. - If we ever track aliased variables (which could be used by the - call), we would have to keep them alive past the call. */ - - emitUpdateLiveGCvars(GCvars, dst); - -#ifdef DEBUG - // Output any delta in GC variable info, corresponding to the before-call GC var updates done above. - if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) + if (useRegForAdr) { - emitDispGCVarDelta(); + regNumber rsvd = codeGen->rsGetRsvdReg(); + emitIns_R_R_Imm(INS_add, EA_PTRSIZE, rsvd, reg3, imm); + reg3 = rsvd; + imm = 0; } -#endif // DEBUG - // Now output the call instruction and update the 'dst' pointer - // - unsigned outputInstrSize = emitOutput_Instr(dst, code); - dst += outputInstrSize; + assert(fmt != IF_NONE); - // All call instructions are 4-byte in size on ARM64 - // - assert(outputInstrSize == callInstrSize); + instrDesc* id = emitNewInstrCns(attr1, imm); - // If the method returns a GC ref, mark INTRET (R0) appropriately. - if (id->idGCref() == GCT_GCREF) - { - gcrefRegs |= RBM_INTRET; - } - else if (id->idGCref() == GCT_BYREF) - { - byrefRegs |= RBM_INTRET; - } + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); - // If is a multi-register return method is called, mark INTRET_1 (X1) appropriately - if (id->idIsLargeCall()) + // Record the attribute for the second register in the pair + if (EA_IS_GCREF(attr2)) { - instrDescCGCA* idCall = (instrDescCGCA*)id; - if (idCall->idSecondGCref() == GCT_GCREF) - { - gcrefRegs |= RBM_INTRET_1; - } - else if (idCall->idSecondGCref() == GCT_BYREF) - { - byrefRegs |= RBM_INTRET_1; - } + id->idGCrefReg2(GCT_GCREF); } - - // If the GC register set has changed, report the new set. - if (gcrefRegs != emitThisGCrefRegs) + else if (EA_IS_BYREF(attr2)) { - emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); + id->idGCrefReg2(GCT_BYREF); } - // If the Byref register set has changed, report the new set. - if (byrefRegs != emitThisByrefRegs) + else { - emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); + id->idGCrefReg2(GCT_NONE); } - // Some helper calls may be marked as not requiring GC info to be recorded. - if ((!id->idIsNoGC())) - { - // On ARM64, as on AMD64, we don't change the stack pointer to push/pop args. - // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism - // to record the call for GC info purposes. (It might be best to use an alternate call, - // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.) - emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0); + reg3 = encodingSPtoZR(reg3); - // Do we need to record a call location for GC purposes? - // - if (!emitFullGCinfo) - { - emitRecordGCcall(dst, callInstrSize); - } - } - return callInstrSize; + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + id->idSetIsLclVar(); + +#ifdef DEBUG + id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; +#endif + + dispIns(id); + appendToCurIG(id); } /***************************************************************************** * - * Emit a 32-bit Arm64 instruction + * Add an instruction referencing stack-based local variable and an immediate */ - -unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) +void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val) { - assert(sizeof(code_t) == 4); - BYTE* dstRW = dst + writeableOffset; - *((code_t*)dstRW) = code; - - return sizeof(code_t); + NYI("emitIns_S_I"); } /***************************************************************************** -* - * Append the machine code corresponding to the given instruction descriptor - * to the code block at '*dp'; the base of the code block is 'bp', and 'ig' - * is the instruction group that contains the instruction. Updates '*dp' to - * point past the generated code, and returns the size of the instruction - * descriptor in bytes. + * + * Add an instruction with a register + static member operands. + * Constant is stored into JIT data which is adjacent to code. + * No relocation is needed. PC-relative offset will be encoded directly into instruction. + * */ - -size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) +void emitter::emitIns_R_C( + instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs) { - BYTE* dst = *dp; - BYTE* odst = dst; - code_t code = 0; - size_t sz = emitGetInstrDescSize(id); // TODO-ARM64-Cleanup: on ARM, this is set in each case. why? - instruction ins = id->idIns(); - insFormat fmt = id->idInsFmt(); - emitAttr size = id->idOpSize(); - -#ifdef DEBUG -#if DUMP_GC_TABLES - bool dspOffs = emitComp->opts.dspGCtbls; -#else - bool dspOffs = !emitComp->opts.disDiffable; -#endif -#endif // DEBUG - - assert(REG_NA == (int)REG_NA); + assert(offs >= 0); + assert(instrDesc::fitsInSmallCns(offs)); - /* What instruction format have we got? */ + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + instrDescJmp* id = emitNewInstrJmp(); - switch (fmt) + switch (ins) { - ssize_t imm; - ssize_t index; - ssize_t index2; - unsigned cmode; - unsigned immShift; - emitAttr elemsize; - emitAttr datasize; - - case IF_BI_0A: // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 - case IF_BI_0B: // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00 - case IF_LARGEJMP: - assert(id->idGCref() == GCT_NONE); - assert(id->idIsBound()); - dst = emitOutputLJ(ig, dst, id); - sz = sizeof(instrDescJmp); + case INS_adr: + // This is case to get address to the constant data. + fmt = IF_LARGEADR; + assert(isGeneralRegister(reg)); + assert(isValidGeneralDatasize(size)); break; - case IF_BI_0C: // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 - code = emitInsCode(ins, fmt); - sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc); - dst += emitOutputCall(ig, dst, id, code); - // Always call RecordRelocation so that we wire in a JumpStub when we don't reach - emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_BRANCH26); + case INS_ldr: + fmt = IF_LARGELDC; + if (isVectorRegister(reg)) + { + assert(isValidVectorLSDatasize(size)); + // For vector (float/double) register, we should have an integer address reg to + // compute long address which consists of page address and page offset. + // For integer constant, this is not needed since the dest reg can be used to + // compute address as well as contain the final contents. + assert(isGeneralRegister(reg) || (addrReg != REG_NA)); + } + else + { + assert(isGeneralRegister(reg)); + assert(isValidGeneralDatasize(size)); + } break; - case IF_BI_1A: // BI_1A ......iiiiiiiiii iiiiiiiiiiittttt Rt simm19:00 - assert(insOptsNone(id->idInsOpt())); - assert(id->idIsBound()); - - dst = emitOutputLJ(ig, dst, id); - sz = sizeof(instrDescJmp); - break; + default: + unreached(); + } - case IF_BI_1B: // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00 - assert(insOptsNone(id->idInsOpt())); - assert(id->idIsBound()); + assert(fmt != IF_NONE); - dst = emitOutputLJ(ig, dst, id); - sz = sizeof(instrDescJmp); - break; + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + id->idSmallCns(offs); + id->idOpSize(size); + id->idAddr()->iiaFieldHnd = fldHnd; + id->idSetIsBound(); // We won't patch address since we will know the exact distance once JIT code and data are + // allocated together. - case IF_BR_1A: // BR_1A ................ ......nnnnn..... Rn - assert(insOptsNone(id->idInsOpt())); - assert((ins == INS_ret) || (ins == INS_br)); - code = emitInsCode(ins, fmt); - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + id->idReg1(reg); // destination register that will get the constant value. + if (addrReg != REG_NA) + { + id->idReg2(addrReg); // integer register to compute long address (used for vector dest when we end up with long + // address) + } + id->idjShort = false; // Assume loading constant from long address - dst += emitOutput_Instr(dst, code); - break; + // Keep it long if it's in cold code. + id->idjKeepLong = emitComp->fgIsBlockCold(emitComp->compCurBB); - case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn - assert(insOptsNone(id->idInsOpt())); - assert((ins == INS_br_tail) || (ins == INS_blr)); - code = emitInsCode(ins, fmt); +#ifdef DEBUG + if (emitComp->opts.compLongAddress) + id->idjKeepLong = 1; +#endif // DEBUG - if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idIsTlsGD()) - { - emitRecordRelocation(odst, (CORINFO_METHOD_HANDLE)id->idAddr()->iiaAddr, - IMAGE_REL_AARCH64_TLSDESC_CALL); - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn - } - else - { - code |= insEncodeReg_Rn(id->idReg3()); // nnnnn - } - dst += emitOutputCall(ig, dst, id, code); - sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc); - break; + // If it's possible to be shortened, then put it in jump list + // to be revisited by emitJumpDistBind. + if (!id->idjKeepLong) + { + /* Record the jump's IG and offset within it */ + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; - case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) - case IF_LARGELDC: - assert(insOptsNone(id->idInsOpt())); - assert(id->idIsBound()); + /* Append this jump to this IG's jump list */ + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; - dst = emitOutputLJ(ig, dst, id); - sz = sizeof(instrDescJmp); - break; +#if EMITTER_STATS + emitTotalIGjmps++; +#endif + } - case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn - assert(insOptsNone(id->idInsOpt())); - code = emitInsCode(ins, fmt); - // Is the target a vector register? - if (isVectorRegister(id->idReg1())) - { - code &= 0x3FFFFFFF; // clear the size bits - code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX - code |= insEncodeReg_Vt(id->idReg1()); // ttttt - } - else - { - code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - } - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - if (id->idIsTlsGD()) - { - emitRecordRelocation(odst, (void*)emitGetInsSC(id), IMAGE_REL_AARCH64_TLSDESC_LD64_LO12); - } - break; + dispIns(id); + appendToCurIG(id); +} - case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095) - assert(insOptsNone(id->idInsOpt())); - imm = emitGetInsSC(id); - assert(isValidUimm<12>(imm)); - code = emitInsCode(ins, fmt); - // Is the target a vector register? - if (isVectorRegister(id->idReg1())) - { - code &= 0x3FFFFFFF; // clear the size bits - code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX - code |= insEncodeReg_Vt(id->idReg1()); // ttttt - } - else - { - code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - } - code |= ((code_t)imm << 10); // iiiiiiiiiiii - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; +/***************************************************************************** + * + * Add an instruction with a static member + constant. + */ - case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiPPnnnnnttttt Rt Rn imm(-256..+255) no/pre/post inc - assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt())); - imm = emitGetInsSC(id); - assert((imm >= -256) && (imm <= 255)); // signed 9 bits - imm &= 0x1ff; // force into unsigned 9 bit representation - code = emitInsCode(ins, fmt); - // Is the target a vector register? - if (isVectorRegister(id->idReg1())) - { - code &= 0x3FFFFFFF; // clear the size bits - code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX - code |= insEncodeReg_Vt(id->idReg1()); // ttttt - } - else - { - code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - } - code |= insEncodeIndexedOpt(id->idInsOpt()); // PP - code |= ((code_t)imm << 12); // iiiiiiiii - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val) +{ + NYI("emitIns_C_I"); +} - case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn - case IF_LS_2E: // LS_2E .Q.............. ....ssnnnnnttttt Vt Rn - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); +/***************************************************************************** + * + * Add an instruction with a static member + register operands. + */ - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeVLSElemsize(elemsize); // ss - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vt(id->idReg1()); // ttttt +void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs) +{ + assert(!"emitIns_C_R not supported for RyuJIT backend"); +} - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_R_AR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) +{ + NYI("emitIns_R_AR"); +} - case IF_LS_2F: // LS_2F .Q.............. xx.Sssnnnnnttttt Vt[] Rn - case IF_LS_2G: // LS_2G .Q.............. xx.Sssnnnnnttttt Vt[] Rn - elemsize = id->idOpSize(); - index = id->idSmallCns(); - code = emitInsCode(ins, fmt); +// This generates code to populate the access for TLS on linux +void emitter::emitIns_Adrp_Ldr_Add(emitAttr attr, + regNumber reg1, + regNumber reg2, + ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +{ + assert(emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI)); + assert(TargetOS::IsUnix); + assert(EA_IS_RELOC(attr)); + assert(EA_IS_CNS_TLSGD_RELOC(attr)); - code |= insEncodeVLSIndex(elemsize, index); // Q xx S ss - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vt(id->idReg1()); // ttttt + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_DI_1E; + bool needAdd = false; + instrDescJmp* id = emitNewInstrJmp(); - dst += emitOutput_Instr(dst, code); - break; + // adrp + id->idIns(INS_adrp); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + id->idOpSize(size); + id->idAddr()->iiaAddr = (BYTE*)addr; + id->idReg1(reg1); + id->idSetIsDspReloc(); + id->idSetTlsGD(); - case IF_LS_3A: // LS_3A .X.......X.mmmmm oooS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {} - assert(insOptsLSExtend(id->idInsOpt())); - code = emitInsCode(ins, fmt); - // Is the target a vector register? - if (isVectorRegister(id->idReg1())) - { - code &= 0x3FFFFFFF; // clear the size bits - code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX - code |= insEncodeReg_Vt(id->idReg1()); // ttttt - } - else - { - code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - } - code |= insEncodeExtend(id->idInsOpt()); // ooo - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - if (id->idIsLclVar()) - { - code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm - } - else - { - code |= insEncodeReg3Scale(id->idReg3Scaled()); // S - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - } - dst += emitOutput_Instr(dst, code); - break; +#ifdef DEBUG + id->idDebugOnlyInfo()->idMemCookie = targetHandle; + id->idDebugOnlyInfo()->idFlags = gtFlags; +#endif - case IF_LS_3B: // LS_3B X............... .aaaaannnnnddddd Rd Ra Rn - assert(insOptsNone(id->idInsOpt())); - code = emitInsCode(ins, fmt); - // Is the target a vector register? - if (isVectorRegister(id->idReg1())) - { - code &= 0x3FFFFFFF; // clear the size bits - code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX - code |= insEncodeReg_Vt(id->idReg1()); // ttttt - code |= insEncodeReg_Va(id->idReg2()); // aaaaa - } - else - { - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - code |= insEncodeReg_Ra(id->idReg2()); // aaaaa - } - code |= insEncodeReg_Rn(id->idReg3()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + dispIns(id); + appendToCurIG(id); - case IF_LS_3C: // LS_3C X......PP.iiiiii iaaaaannnnnddddd Rd Ra Rn imm(im7,sh) - assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt())); - imm = emitGetInsSC(id); - assert((imm >= -64) && (imm <= 63)); // signed 7 bits - imm &= 0x7f; // force into unsigned 7 bit representation - code = emitInsCode(ins, fmt); - // Is the target a vector register? - if (isVectorRegister(id->idReg1())) - { - code &= 0x3FFFFFFF; // clear the size bits - code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX - code |= insEncodeReg_Vt(id->idReg1()); // ttttt - code |= insEncodeReg_Va(id->idReg2()); // aaaaa - } - else - { - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - code |= insEncodeReg_Ra(id->idReg2()); // aaaaa - } - code |= insEncodePairIndexedOpt(ins, id->idInsOpt()); // PP - code |= ((code_t)imm << 15); // iiiiiiiii - code |= insEncodeReg_Rn(id->idReg3()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + // ldr + emitIns_R_R_I(INS_ldr, attr, reg2, reg1, (ssize_t)addr); - case IF_LS_3D: // LS_3D .X.......X.mmmmm ......nnnnnttttt Wm Rt Rn - code = emitInsCode(ins, fmt); - // Arm64 store exclusive unpredictable cases - assert(id->idReg1() != id->idReg2()); - assert(id->idReg1() != id->idReg3()); - code |= insEncodeDatasizeLS(code, id->idOpSize()); // X - code |= insEncodeReg_Rm(id->idReg1()); // mmmmm - code |= insEncodeReg_Rt(id->idReg2()); // ttttt - code |= insEncodeReg_Rn(id->idReg3()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + // add + fmt = IF_DI_2A; + instrDesc* addId = emitNewInstr(attr); + assert(id->idIsReloc()); - case IF_LS_3E: // LS_3E .X.........mmmmm ......nnnnnttttt Rm Rt Rn ARMv8.1 LSE Atomics - code = emitInsCode(ins, fmt); - code |= insEncodeDatasizeLS(code, id->idOpSize()); // X - code |= insEncodeReg_Rm(id->idReg1()); // mmmmm - code |= insEncodeReg_Rt(id->idReg2()); // ttttt - code |= insEncodeReg_Rn(id->idReg3()); // nnnnn - dst += emitOutput_Instr(dst, code); + addId->idIns(INS_add); + addId->idInsFmt(fmt); + addId->idInsOpt(INS_OPTS_NONE); + addId->idOpSize(size); + addId->idAddr()->iiaAddr = (BYTE*)addr; + addId->idReg1(reg1); + addId->idReg2(reg1); + addId->idSetTlsGD(); - // Some instructions with this encoding return their result in the - // second operand register instead of the first so we special case - // the GC update here and skip the common path down below. - if (emitInsDestIsOp2(ins)) - { - if (id->idGCref() != GCT_NONE) - { - emitGCregLiveUpd(id->idGCref(), id->idReg2(), dst); - } - else - { - emitGCregDeadUpd(id->idReg2(), dst); - } + dispIns(addId); + appendToCurIG(addId); +} - goto SKIP_GC_UPDATE; - } +// This computes address from the immediate which is relocatable. +void emitter::emitIns_R_AI(instruction ins, + emitAttr attr, + regNumber ireg, + ssize_t addr DEBUGARG(size_t targetHandle) DEBUGARG(GenTreeFlags gtFlags)) +{ + assert(EA_IS_RELOC(attr)); + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_DI_1E; + bool needAdd = false; + instrDescJmp* id = emitNewInstrJmp(); + switch (ins) + { + case INS_adrp: + // This computes page address. + // page offset is needed using add. + needAdd = true; + break; + case INS_adr: break; + default: + unreached(); + } - case IF_LS_3F: // LS_3F .Q.........mmmmm ....ssnnnnnttttt Vt Rn Rm - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + id->idOpSize(size); + id->idAddr()->iiaAddr = (BYTE*)addr; + id->idReg1(ireg); + id->idSetIsDspReloc(); +#ifdef DEBUG + id->idDebugOnlyInfo()->idMemCookie = targetHandle; + id->idDebugOnlyInfo()->idFlags = gtFlags; +#endif - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - code |= insEncodeVLSElemsize(elemsize); // ss - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vt(id->idReg1()); // ttttt + dispIns(id); + appendToCurIG(id); - dst += emitOutput_Instr(dst, code); - break; + if (needAdd) + { + // add reg, reg, imm + ins = INS_add; + fmt = IF_DI_2A; + instrDesc* id = emitNewInstr(attr); + assert(id->idIsReloc()); - case IF_LS_3G: // LS_3G .Q.........mmmmm ...Sssnnnnnttttt Vt[] Rn Rm - elemsize = id->idOpSize(); - index = id->idSmallCns(); - code = emitInsCode(ins, fmt); + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + id->idOpSize(size); + id->idAddr()->iiaAddr = (BYTE*)addr; + id->idReg1(ireg); + id->idReg2(ireg); - code |= insEncodeVLSIndex(elemsize, index); // Q xx S ss - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vt(id->idReg1()); // ttttt + dispIns(id); + appendToCurIG(id); + } +} - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_AR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs) +{ + NYI("emitIns_AR_R"); +} - case IF_DI_1A: // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh) - assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt())); - imm = emitGetInsSC(id); - assert(isValidUimm<12>(imm)); - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeShiftImm12(id->idInsOpt()); // sh - code |= ((code_t)imm << 10); // iiiiiiiiiiii - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) +{ + NYI("emitIns_R_ARR"); +} - case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw) - imm = emitGetInsSC(id); - assert(isValidImmHWVal(imm, id->idOpSize())); - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= ((code_t)imm << 5); // hwiiiii iiiiiiiiiii - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp) +{ + NYI("emitIns_R_ARR"); +} - case IF_DI_1C: // DI_1C X........Nrrrrrr ssssssnnnnn..... Rn imm(N,r,s) - imm = emitGetInsSC(id); - assert(isValidImmNRS(imm, id->idOpSize())); - code = emitInsCode(ins, fmt); - code |= ((code_t)imm << 10); // Nrrrrrrssssss - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_R_ARX( + instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp) +{ + NYI("emitIns_R_ARR"); +} - case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s) - imm = emitGetInsSC(id); - assert(isValidImmNRS(imm, id->idOpSize())); - code = emitInsCode(ins, fmt); - code |= ((code_t)imm << 10); // Nrrrrrrssssss - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - dst += emitOutput_Instr(dst, code); - break; +/***************************************************************************** + * + * Record that a jump instruction uses the short encoding + * + */ +void emitter::emitSetShortJump(instrDescJmp* id) +{ + if (id->idjKeepLong) + return; - case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 - case IF_LARGEADR: - assert(insOptsNone(id->idInsOpt())); - if (id->idIsReloc()) - { - code = emitInsCode(ins, fmt); - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - dst += emitOutput_Instr(dst, code); - emitRecordRelocation(odst, id->idAddr()->iiaAddr, id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21 - : IMAGE_REL_ARM64_PAGEBASE_REL21); - } - else - { - // Local jmp/load case which does not need a relocation. - assert(id->idIsBound()); - dst = emitOutputLJ(ig, dst, id); - } - sz = sizeof(instrDescJmp); - break; + insFormat fmt = IF_NONE; + if (emitIsCondJump(id)) + { + switch (id->idIns()) + { + case INS_cbz: + case INS_cbnz: + fmt = IF_BI_1A; + break; + case INS_tbz: + case INS_tbnz: + fmt = IF_BI_1B; + break; + default: + fmt = IF_BI_0B; + break; + } + } + else if (emitIsLoadLabel(id)) + { + fmt = IF_DI_1E; + } + else if (emitIsLoadConstant(id)) + { + fmt = IF_LS_1A; + } + else + { + unreached(); + } - case IF_DI_1F: // DI_1F X..........iiiii cccc..nnnnn.nzcv Rn imm5 nzcv cond - imm = emitGetInsSC(id); - assert(isValidImmCondFlagsImm5(imm)); - { - condFlagsImm cfi; - cfi.immCFVal = (unsigned)imm; - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn - code |= ((code_t)cfi.imm5 << 16); // iiiii - code |= insEncodeFlags(cfi.flags); // nzcv - code |= insEncodeCond(cfi.cond); // cccc - dst += emitOutput_Instr(dst, code); - } - break; + id->idInsFmt(fmt); + id->idjShort = true; +} - case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh) - assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt())); - imm = emitGetInsSC(id); - assert(isValidUimm<12>(imm)); - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeShiftImm12(id->idInsOpt()); // sh - code |= ((code_t)imm << 10); // iiiiiiiiiiii - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); +/***************************************************************************** + * + * Add a label instruction. + */ - if (id->idIsReloc()) - { - assert(sz == sizeof(instrDesc)); - assert(id->idAddr()->iiaAddr != nullptr); - emitRecordRelocation(odst, id->idAddr()->iiaAddr, id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADD_LO12 - : IMAGE_REL_ARM64_PAGEOFFSET_12A); - } - break; +void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) +{ + assert(dst->HasFlag(BBF_HAS_LABEL)); - case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - assert(isValidImmShift(imm, id->idOpSize())); - code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Rm(id->idReg2()); // Reg2 also in mmmmm - code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss - dst += emitOutput_Instr(dst, code); - break; + insFormat fmt = IF_NONE; - case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s) - imm = emitGetInsSC(id); - assert(isValidImmNRS(imm, id->idOpSize())); - code = emitInsCode(ins, fmt); - code |= ((code_t)imm << 10); // Nrrrrrrssssss - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); + switch (ins) + { + case INS_adr: + fmt = IF_LARGEADR; break; + default: + unreached(); + } - case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s) - if (ins == INS_asr || ins == INS_lsl || ins == INS_lsr) - { - imm = emitGetInsSC(id); - assert(isValidImmShift(imm, id->idOpSize())); + instrDescJmp* id = emitNewInstrJmp(); - // Shift immediates are aliases of the SBFM/UBFM instructions - // that actually take 2 registers and 2 constants, - // Since we stored the shift immediate value - // we need to calculate the N,R and S values here. + id->idIns(ins); + id->idInsFmt(fmt); + id->idjShort = false; + id->idAddr()->iiaBBlabel = dst; + id->idReg1(reg); + id->idOpSize(EA_PTRSIZE); - bitMaskImm bmi; - bmi.immNRS = 0; +#ifdef DEBUG + // Mark the catch return + if (emitComp->compCurBB->KindIs(BBJ_EHCATCHRET)) + { + id->idDebugOnlyInfo()->idCatchRet = true; + } +#endif // DEBUG - bmi.immN = (size == EA_8BYTE) ? 1 : 0; - bmi.immR = imm; - bmi.immS = (size == EA_8BYTE) ? 0x3f : 0x1f; + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); - // immR and immS are now set correctly for INS_asr and INS_lsr - // but for INS_lsl we have to adjust the values for immR and immS - // - if (ins == INS_lsl) - { - bmi.immR = -imm & bmi.immS; - bmi.immS = bmi.immS - imm; - } +#ifdef DEBUG + if (emitComp->opts.compLongAddress) + id->idjKeepLong = 1; +#endif // DEBUG - // setup imm with the proper 13 bit value N:R:S - // - imm = bmi.immNRS; - } - else - { - // The other instructions have already have encoded N,R and S values - imm = emitGetInsSC(id); - } - assert(isValidImmNRS(imm, id->idOpSize())); + /* Record the jump's IG and offset within it */ - code = emitInsCode(ins, fmt); - code |= ((code_t)imm << 10); // Nrrrrrrssssss - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; - case IF_DR_1D: // DR_1D X............... cccc.......ddddd Rd cond - imm = emitGetInsSC(id); - assert(isValidImmCond(imm)); - { - condFlagsImm cfi; - cfi.immCFVal = (unsigned)imm; - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeInvertedCond(cfi.cond); // cccc - dst += emitOutput_Instr(dst, code); - } - break; + /* Append this jump to this IG's jump list */ - case IF_DR_2A: // DR_2A X..........mmmmm ......nnnnn..... Rn Rm - assert(insOptsNone(id->idInsOpt())); - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn - code |= insEncodeReg_Rm(id->idReg2()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; - case IF_DR_2B: // DR_2B X.......sh.mmmmm ssssssnnnnn..... Rn Rm {LSL,LSR,ASR,ROR} imm(0-63) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - assert(isValidImmShift(imm, id->idOpSize())); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeShiftType(id->idInsOpt()); // sh - code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn - code |= insEncodeReg_Rm(id->idReg2()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; +#if EMITTER_STATS + emitTotalIGjmps++; +#endif - case IF_DR_2C: // DR_2C X..........mmmmm ooosssnnnnn..... Rn Rm ext(Rm) LSL imm(0-4) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - assert((imm >= 0) && (imm <= 4)); // imm [0..4] - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeExtend(id->idInsOpt()); // ooo - code |= insEncodeExtendScale(imm); // sss - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn - code |= insEncodeReg_Rm(id->idReg2()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + dispIns(id); + appendToCurIG(id); +} - case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond - imm = emitGetInsSC(id); - assert(isValidImmCond(imm)); - { - condFlagsImm cfi; - cfi.immCFVal = (unsigned)imm; - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Rm(id->idReg2()); // mmmmm - code |= insEncodeInvertedCond(cfi.cond); // cccc - dst += emitOutput_Instr(dst, code); - } - break; +/***************************************************************************** + * + * Add a data label instruction. + */ - case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rm(id->idReg2()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg) +{ + NYI("emitIns_R_D"); +} - case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - assert(isValidImmShift(imm, id->idOpSize())); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeShiftType(id->idInsOpt()); // sh - code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rm(id->idReg2()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg) +{ + assert((ins == INS_cbz) || (ins == INS_cbnz)); - case IF_DR_2G: // DR_2G X............... .....xnnnnnddddd Rd Rn - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - if (ins == INS_rev) - { - if (size == EA_8BYTE) - { - code |= 0x00000400; // x - bit at location 10 - } - } - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + assert(dst != nullptr); + assert(dst->HasFlag(BBF_HAS_LABEL)); - case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn - code = emitInsCode(ins, fmt); - code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + insFormat fmt = IF_LARGEJMP; - case IF_DR_2I: // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond - imm = emitGetInsSC(id); - assert(isValidImmCondFlags(imm)); - { - condFlagsImm cfi; - cfi.immCFVal = (unsigned)imm; - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rn(id->idReg1()); // nnnnn - code |= insEncodeReg_Rm(id->idReg2()); // mmmmm - code |= insEncodeFlags(cfi.flags); // nzcv - code |= insEncodeCond(cfi.cond); // cccc - dst += emitOutput_Instr(dst, code); - } - break; + instrDescJmp* id = emitNewInstrJmp(); - case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnmmmmm Rd Rn Rm - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - if (id->idIsLclVar()) - { - code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm - } - else - { - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - } - dst += emitOutput_Instr(dst, code); - break; + id->idIns(ins); + id->idInsFmt(fmt); + id->idReg1(reg); + id->idjShort = false; + id->idOpSize(EA_SIZE(attr)); - case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - assert(isValidImmShift(imm, id->idOpSize())); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - code |= insEncodeShiftType(id->idInsOpt()); // sh - code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss - dst += emitOutput_Instr(dst, code); - break; + id->idAddr()->iiaBBlabel = dst; + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); - case IF_DR_3C: // DR_3C X..........mmmmm ooosssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - assert((imm >= 0) && (imm <= 4)); // imm [0..4] - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeExtend(id->idInsOpt()); // ooo - code |= insEncodeExtendScale(imm); // sss - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + /* Record the jump's IG and offset within it */ - case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnddddd Rd Rn Rm cond - imm = emitGetInsSC(id); - assert(isValidImmCond(imm)); - { - condFlagsImm cfi; - cfi.immCFVal = (unsigned)imm; - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - code |= insEncodeCond(cfi.cond); // cccc - dst += emitOutput_Instr(dst, code); - } - break; + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; - case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - assert(isValidImmShift(imm, id->idOpSize())); - code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss - dst += emitOutput_Instr(dst, code); - break; + /* Append this jump to this IG's jump list */ - case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnmmmmm Rd Rn Rm Ra - code = emitInsCode(ins, fmt); - code |= insEncodeDatasize(id->idOpSize()); // X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - code |= insEncodeReg_Rm(id->idReg3()); // mmmmm - code |= insEncodeReg_Ra(id->idReg4()); // aaaaa - dst += emitOutput_Instr(dst, code); - break; + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; - case IF_DV_1A: // DV_1A .........X.iiiii iii........ddddd Vd imm8 (fmov - immediate scalar) - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeFloatElemsize(elemsize); // X - code |= ((code_t)imm << 13); // iiiii iii - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - dst += emitOutput_Instr(dst, code); - break; +#if EMITTER_STATS + emitTotalIGjmps++; +#endif + + dispIns(id); + appendToCurIG(id); +} - case IF_DV_1B: // DV_1B .QX..........iii cmod..iiiiiddddd Vd imm8 (immediate vector) - imm = emitGetInsSC(id) & 0x0ff; - immShift = (emitGetInsSC(id) & 0x700) >> 8; - elemsize = optGetElemsize(id->idInsOpt()); - cmode = 0; - switch (elemsize) - { // cmode - case EA_1BYTE: - cmode = 0xE; // 1110 - break; - case EA_2BYTE: - cmode = 0x8; - cmode |= (immShift << 1); // 10x0 - break; - case EA_4BYTE: - if (immShift < 4) - { - cmode = 0x0; - cmode |= (immShift << 1); // 0xx0 - } - else // MSL - { - cmode = 0xC; - if (immShift & 2) - cmode |= 1; // 110x - } - break; - case EA_8BYTE: - cmode = 0xE; // 1110 - break; - default: - unreached(); - break; - } +void emitter::emitIns_J_R_I(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg, int imm) +{ + assert((ins == INS_tbz) || (ins == INS_tbnz)); - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(id->idOpSize()); // Q - if ((ins == INS_fmov) || (ins == INS_movi)) - { - if (elemsize == EA_8BYTE) - { - code |= 0x20000000; // X - } - } - if (ins != INS_fmov) - { - assert((cmode >= 0) && (cmode <= 0xF)); - code |= (cmode << 12); // cmod - } - code |= (((code_t)imm >> 5) << 16); // iii - code |= (((code_t)imm & 0x1f) << 5); // iiiii - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - dst += emitOutput_Instr(dst, code); - break; + assert(dst != nullptr); + assert(dst->HasFlag(BBF_HAS_LABEL)); + assert((EA_SIZE(attr) == EA_4BYTE) || (EA_SIZE(attr) == EA_8BYTE)); + assert(imm < ((EA_SIZE(attr) == EA_4BYTE) ? 32 : 64)); - case IF_DV_1C: // DV_1C .........X...... ......nnnnn..... Vn #0.0 (fcmp - with zero) - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeFloatElemsize(elemsize); // X - code |= insEncodeReg_Vn(id->idReg1()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + insFormat fmt = IF_LARGEJMP; - case IF_DV_2A: // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvt - vector) - case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(id->idOpSize()); // Q - if ((ins == INS_fcvtl) || (ins == INS_fcvtl2) || (ins == INS_fcvtn) || (ins == INS_fcvtn2)) - { - // fcvtl{2} and fcvtn{2} encode the element size as - // esize = 16 << UInt(sz) - if (elemsize == EA_4BYTE) - { - code |= 0x00400000; // X - } - else - { - assert(elemsize == EA_2BYTE); - } - } - else - { - code |= insEncodeFloatElemsize(elemsize); // X - } - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + instrDescJmp* id = emitNewInstrJmp(); - case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general) - elemsize = id->idOpSize(); - index = emitGetInsSC(id); - datasize = (elemsize == EA_8BYTE) ? EA_16BYTE : EA_8BYTE; - if (ins == INS_smov) - { - datasize = EA_16BYTE; - } - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(datasize); // Q - code |= insEncodeVectorIndex(elemsize, index); // iiiii - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + id->idIns(ins); + id->idInsFmt(fmt); + id->idReg1(reg); + id->idjShort = false; + id->idSmallCns(imm); + id->idOpSize(EA_SIZE(attr)); - case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general) - if (ins == INS_dup) - { - datasize = id->idOpSize(); - elemsize = optGetElemsize(id->idInsOpt()); - index = 0; - } - else // INS_ins - { - datasize = EA_16BYTE; - elemsize = id->idOpSize(); - index = emitGetInsSC(id); - } - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(datasize); // Q - code |= insEncodeVectorIndex(elemsize, index); // iiiii - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + id->idAddr()->iiaBBlabel = dst; + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); - case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector) - index = emitGetInsSC(id); - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeVectorIndex(elemsize, index); // iiiii - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + /* Record the jump's IG and offset within it */ - case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar) - index = emitGetInsSC(id); - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeVectorIndex(elemsize, index); // iiiii - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; - case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element) - elemsize = id->idOpSize(); - imm = emitGetInsSC(id); - index = (imm >> 4) & 0xf; - index2 = imm & 0xf; - code = emitInsCode(ins, fmt); - code |= insEncodeVectorIndex(elemsize, index); // iiiii - code |= insEncodeVectorIndex2(elemsize, index2); // jjjj - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + /* Append this jump to this IG's jump list */ - case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register) - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeFloatElemsize(elemsize); // X - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; - case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov - to general) - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X X - code |= insEncodeReg_Rd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; +#if EMITTER_STATS + emitTotalIGjmps++; +#endif - case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov - from general) - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X X - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Rn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + dispIns(id); + appendToCurIG(id); +} - case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt) - code = emitInsCode(ins, fmt); - code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // SS DD - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) +{ + insFormat fmt = IF_NONE; - case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp) - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeFloatElemsize(elemsize); // X - code |= insEncodeReg_Vn(id->idReg1()); // nnnnn - code |= insEncodeReg_Vm(id->idReg2()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + if (dst != nullptr) + { + assert(dst->HasFlag(BBF_HAS_LABEL)); + } + else + { + assert(instrCount != 0); + } - case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar) - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeElemsize(elemsize); // XX - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + /* Figure out the encoding format of the instruction */ - case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) - case IF_DV_2T: // DV_2T .Q......XX...... ......nnnnnddddd Sd Vn (addv, saddlv, smaxv, sminv, uaddlv, - // umaxv, uminv) - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeElemsize(elemsize); // XX - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); + switch (ins) + { + case INS_bl_local: + case INS_b: + // Unconditional jump is a single form. + // Assume is long in case we cross hot/cold sections. + fmt = IF_BI_0A; break; - case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar) - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - code = emitInsCode(ins, fmt); - code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins), imm); // iiiiiii - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); + case INS_beq: + case INS_bne: + case INS_bhs: + case INS_blo: + case INS_bmi: + case INS_bpl: + case INS_bvs: + case INS_bvc: + case INS_bhi: + case INS_bls: + case INS_bge: + case INS_blt: + case INS_bgt: + case INS_ble: + // Assume conditional jump is long. + fmt = IF_LARGEJMP; break; - case IF_DV_2O: // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector) - imm = emitGetInsSC(id); - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins), imm); // iiiiiii - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); + default: + unreached(); break; + } - case IF_DV_2P: // DV_2P ............... ......nnnnnddddd Vd Vn (aes*, sha1su1) - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + instrDescJmp* id = emitNewInstrJmp(); - case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Vd Vn (faddp, fmaxnmp, fmaxp, fminnmp, - // fminp - scalar) - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); - code |= insEncodeFloatElemsize(elemsize); // X - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + id->idIns(ins); + id->idInsFmt(fmt); + id->idjShort = false; - case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) - elemsize = optGetElemsize(id->idInsOpt()); - code = emitInsCode(ins, fmt); - code |= insEncodeElemsize(elemsize); // XX - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; +#ifdef DEBUG + // Mark the finally call + if (ins == INS_bl_local && emitComp->compCurBB->KindIs(BBJ_CALLFINALLY)) + { + id->idDebugOnlyInfo()->idFinallyCall = true; + } +#endif // DEBUG - case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Sd Sn (sha1h) - code = emitInsCode(ins, fmt); - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - dst += emitOutput_Instr(dst, code); - break; + if (dst != nullptr) + { + id->idAddr()->iiaBBlabel = dst; - case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) - code = emitInsCode(ins, fmt); - elemsize = optGetElemsize(id->idInsOpt()); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeElemsize(elemsize); // XX - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + // Skip unconditional jump that has a single form. + // The target needs to be relocated. + id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst); - case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - elemsize = optGetElemsize(id->idInsOpt()); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeElemsize(elemsize); // XX - code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; +#ifdef DEBUG + if (emitComp->opts.compLongAddress) // Force long branches + { + id->idjKeepLong = true; + } +#endif // DEBUG + } + else + { + id->idAddr()->iiaSetInstrCount(instrCount); + id->idjKeepLong = false; + /* This jump must be short */ + emitSetShortJump(id); + id->idSetIsBound(); + } - case IF_DV_3B: // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector) - code = emitInsCode(ins, fmt); - elemsize = optGetElemsize(id->idInsOpt()); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeFloatElemsize(elemsize); // X - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + /* Record the jump's IG and offset within it */ - case IF_DV_3BI: // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - elemsize = optGetElemsize(id->idInsOpt()); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeFloatElemsize(elemsize); // X - code |= insEncodeFloatIndex(elemsize, imm); // L H - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + id->idjIG = emitCurIG; + id->idjOffs = emitCurIGsize; - case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + /* Append this jump to this IG's jump list */ - case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) - code = emitInsCode(ins, fmt); - code |= insEncodeFloatElemsize(id->idOpSize()); // X - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + id->idjNext = emitCurIGjmpList; + emitCurIGjmpList = id; - case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - code |= insEncodeFloatElemsize(elemsize); // X - code |= insEncodeFloatIndex(elemsize, imm); // L H - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; +#if EMITTER_STATS + emitTotalIGjmps++; +#endif - case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) - code = emitInsCode(ins, fmt); - elemsize = id->idOpSize(); - code |= insEncodeElemsize(elemsize); // XX - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; + dispIns(id); + appendToCurIG(id); +} - case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) - code = emitInsCode(ins, fmt); - imm = emitGetInsSC(id); - elemsize = id->idOpSize(); - assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); - code |= insEncodeElemsize(elemsize); // XX - code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; +/***************************************************************************** + * + * Add a call instruction (direct or indirect). + * argSize<0 means that the caller will pop the arguments + * + * The other arguments are interpreted depending on callType as shown: + * Unless otherwise specified, ireg,xreg,xmul,disp should have default values. + * + * EC_FUNC_TOKEN : addr is the method address + * EC_FUNC_ADDR : addr is the absolute address of the function + * + * If callType is one of these emitCallTypes, addr has to be NULL. + * EC_INDIR_R : "call ireg". + * + * For ARM xreg, xmul and disp are never used and should always be 0/REG_NA. + * + * Please consult the "debugger team notification" comment in genFnProlog(). + */ - case IF_DV_3F: // DV_3F ...........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - source dest regs overlap - code = emitInsCode(ins, fmt); - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - dst += emitOutput_Instr(dst, code); - break; +void emitter::emitIns_Call(EmitCallType callType, + CORINFO_METHOD_HANDLE methHnd, + INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE + void* addr, + ssize_t argSize, + emitAttr retSize, + emitAttr secondRetSize, + VARSET_VALARG_TP ptrVars, + regMaskTP gcrefRegs, + regMaskTP byrefRegs, + const DebugInfo& di /* = DebugInfo() */, + regNumber ireg /* = REG_NA */, + regNumber xreg /* = REG_NA */, + unsigned xmul /* = 0 */, + ssize_t disp /* = 0 */, + bool isJump /* = false */) +{ + /* Sanity check the arguments depending on callType */ + + assert(callType < EC_COUNT); + assert((callType != EC_FUNC_TOKEN) || (addr != nullptr && ireg == REG_NA)); + assert(callType != EC_INDIR_R || (addr == nullptr && ireg < REG_COUNT)); - case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) - imm = emitGetInsSC(id); - code = emitInsCode(ins, fmt); - code |= insEncodeVectorsize(id->idOpSize()); // Q - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - code |= ((code_t)imm << 11); // iiii - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - dst += emitOutput_Instr(dst, code); - break; + // ARM never uses these + assert(xreg == REG_NA && xmul == 0 && disp == 0); - case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) - code = emitInsCode(ins, fmt); - elemsize = id->idOpSize(); - code |= insEncodeFloatElemsize(elemsize); // X - code |= insEncodeReg_Vd(id->idReg1()); // ddddd - code |= insEncodeReg_Vn(id->idReg2()); // nnnnn - code |= insEncodeReg_Vm(id->idReg3()); // mmmmm - code |= insEncodeReg_Va(id->idReg4()); // aaaaa - dst += emitOutput_Instr(dst, code); - break; + // Our stack level should be always greater than the bytes of arguments we push. Just + // a sanity test. + assert((unsigned)abs(argSize) <= codeGen->genStackLevel); - case IF_SN_0A: // SN_0A ................ ................ - { - bool skipIns = false; -#if FEATURE_LOOP_ALIGN - if (id->idIns() == INS_align) - { - // IG can be marked as not needing alignment after emitting align instruction. - // Alternatively, there are fewer align instructions needed than emitted. - // If that is the case, skip outputting alignment. - if (!ig->endsWithAlignInstr() || id->idIsEmptyAlign()) - { - skipIns = true; - } + // Trim out any callee-trashed registers from the live set. + regMaskTP savedSet = emitGetGCRegsSavedOrModified(methHnd); + gcrefRegs &= savedSet; + byrefRegs &= savedSet; #ifdef DEBUG - if (!ig->endsWithAlignInstr()) - { - // Validate if the state is correctly updated - assert(id->idIsEmptyAlign()); - } + if (EMIT_GC_VERBOSE) + { + printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars)); + dumpConvertedVarSet(emitComp, ptrVars); + printf(", gcrefRegs="); + printRegMaskInt(gcrefRegs); + emitDispRegSet(gcrefRegs); + printf(", byrefRegs="); + printRegMaskInt(byrefRegs); + emitDispRegSet(byrefRegs); + printf("\n"); + } #endif - sz = sizeof(instrDescAlign); - ins = INS_nop; - -#ifdef DEBUG - // Under STRESS_EMITTER, if this is the 'align' before the 'jmp' instruction, - // then add "bkpt" instruction. - instrDescAlign* alignInstr = (instrDescAlign*)id; - if (emitComp->compStressCompile(Compiler::STRESS_EMITTER, 50) && alignInstr->isPlacedAfterJmp && - !skipIns) - { - // There is no good way to squeeze in "bkpt" as well as display it - // in the disassembly because there is no corresponding instrDesc for - // it. As such, leave it as is, the "0xD43E0000" bytecode will be seen - // next to the nop instruction in disasm. - // e.g. D43E0000 align [4 bytes for IG07] - ins = INS_BREAKPOINT; - fmt = IF_SI_0A; - } -#endif - } -#endif // FEATURE_LOOP_ALIGN + /* Managed RetVal: emit sequence point for the call */ + if (emitComp->opts.compDbgInfo && di.GetLocation().IsValid()) + { + codeGen->genIPmappingAdd(IPmappingDscKind::Normal, di, false); + } - if (!skipIns) - { - code = emitInsCode(ins, fmt); - dst += emitOutput_Instr(dst, code); - } + /* + We need to allocate the appropriate instruction descriptor based + on whether this is a direct/indirect call, and whether we need to + record an updated set of live GC variables. + */ + instrDesc* id; - break; - } + assert(argSize % REGSIZE_BYTES == 0); + int argCnt = (int)(argSize / (int)REGSIZE_BYTES); - case IF_SI_0A: // SI_0A ...........iiiii iiiiiiiiiii..... imm16 - imm = emitGetInsSC(id); - assert(isValidUimm<16>(imm)); - code = emitInsCode(ins, fmt); - code |= ((code_t)imm << 5); // iiiii iiiiiiiiiii - dst += emitOutput_Instr(dst, code); - break; + if (callType == EC_INDIR_R) + { + /* Indirect call, virtual calls */ - case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier - imm = emitGetInsSC(id); - assert((imm >= 0) && (imm <= 15)); - code = emitInsCode(ins, fmt); - code |= ((code_t)imm << 8); // bbbb - dst += emitOutput_Instr(dst, code); - break; + id = emitNewInstrCallInd(argCnt, 0 /* disp */, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); + } + else + { + /* Helper/static/nonvirtual/function calls (direct or through handle), + and calls to an absolute addr. */ - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) - assert(insOptsNone(id->idInsOpt())); - code = emitInsCode(ins, fmt); - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - dst += emitOutput_Instr(dst, code); - break; + assert(callType == EC_FUNC_TOKEN); - default: - dst = emitOutput_InstrSve(dst, id); - break; + id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize); } - // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref. - // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a - // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as - // for stores, but we ignore those cases here.) - if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref. + /* Update the emitter's live GC ref sets */ + + VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars); + emitThisGCrefRegs = gcrefRegs; + emitThisByrefRegs = byrefRegs; + + id->idSetIsNoGC(emitNoGChelper(methHnd)); + + /* Set the instruction - special case jumping a function */ + instruction ins; + insFormat fmt = IF_NONE; + + /* Record the address: method, indirection, or funcptr */ + + if (callType == EC_INDIR_R) { - // We assume that "idReg1" is the primary destination register for all instructions - assert(!emitInsDestIsOp2(ins)); - if (id->idGCref() != GCT_NONE) + /* This is an indirect call (either a virtual call or func ptr call) */ + + if (isJump) { - emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); + ins = INS_br_tail; // INS_br_tail Reg } else { - emitGCregDeadUpd(id->idReg1(), dst); + ins = INS_blr; // INS_blr Reg } + fmt = IF_BR_1B; - if (emitInsMayWriteMultipleRegs(id)) - { - // INS_ldp etc... - // "idReg2" is the secondary destination register - if (id->idGCrefReg2() != GCT_NONE) - { - emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); - } - else - { - emitGCregDeadUpd(id->idReg2(), dst); - } - } - } + id->idIns(ins); + id->idInsFmt(fmt); -SKIP_GC_UPDATE: - // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC - // ref or overwritten one. - if (emitInsWritesToLclVarStackLoc(id) || emitInsWritesToLclVarStackLocPair(id)) - { - int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); - unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); - bool FPbased; - int adr = emitComp->lvaFrameAddress(varNum, &FPbased); - if (id->idGCref() != GCT_NONE) + assert(xreg == REG_NA); + if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && EA_IS_CNS_TLSGD_RELOC(retSize)) { - emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum)); + // For NativeAOT linux/arm64, we need to also record the relocation of methHnd. + // Since we do not have space to embed it in instrDesc, we store the register in + // reg1 and instead use the `iiaAdd` to store the method handle. Likewise, during + // emitOutputInstr, we retrieve the register from reg1 for this specific case. + id->idSetTlsGD(); + id->idReg1(ireg); + id->idAddr()->iiaAddr = (BYTE*)methHnd; } else { - // If the type of the local is a gc ref type, update the liveness. - var_types vt; - if (varNum >= 0) - { - // "Regular" (non-spill-temp) local. - vt = var_types(emitComp->lvaTable[varNum].lvType); - } - else - { - TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); - vt = tmpDsc->tdTempType(); - } - if (vt == TYP_REF || vt == TYP_BYREF) - { - emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum)); - } - } - if (emitInsWritesToLclVarStackLocPair(id)) - { - int varNum2 = varNum; - int adr2 = adr; - unsigned ofs2 = ofs; - unsigned ofs2Dist; - - if (id->idIsLclVarPair()) - { - bool FPbased2; - - emitLclVarAddr* lclVarAddr2 = emitGetLclVarPairLclVar2(id); - varNum2 = lclVarAddr2->lvaVarNum(); - ofs2 = lclVarAddr2->lvaOffset(); - - // If there are 2 GC vars in this instrDesc, get the 2nd variable - // that should be tracked. - adr2 = emitComp->lvaFrameAddress(varNum2, &FPbased2); - ofs2Dist = EA_SIZE_IN_BYTES(size); -#ifdef DEBUG - assert(FPbased == FPbased2); - if (FPbased) - { - assert(id->idReg3() == REG_FP); - } - else - { - assert(id->idReg3() == REG_SP); - } - assert(varNum2 != -1); -#endif // DEBUG - } - else - { - ofs2Dist = TARGET_POINTER_SIZE; - ofs2 += ofs2Dist; - } + id->idReg3(ireg); + } + } + else + { + /* This is a simple direct call: "call helper/method/addr" */ - ofs2 = AlignDown(ofs2, ofs2Dist); + assert(callType == EC_FUNC_TOKEN); - if (id->idGCrefReg2() != GCT_NONE) - { -#ifdef DEBUG - if (id->idGCref() != GCT_NONE) - { - // If 1st register was a gc-var, then make sure the offset - // are correctly set for the 2nd register that is holding - // another gc-var. - assert((adr + ofs + ofs2Dist) == (adr2 + ofs2)); - } -#endif - emitGCvarLiveUpd(adr2 + ofs2, varNum2, id->idGCrefReg2(), dst DEBUG_ARG(varNum2)); - } - else - { - // If the type of the local is a gc ref type, update the liveness. - var_types vt; - if (varNum2 >= 0) - { - // "Regular" (non-spill-temp) local. - vt = var_types(emitComp->lvaTable[varNum2].lvType); - } - else - { - TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum2); - vt = tmpDsc->tdTempType(); - } - if (vt == TYP_REF || vt == TYP_BYREF) - { - emitGCvarDeadUpd(adr2 + ofs2, dst DEBUG_ARG(varNum2)); - } - } + assert(addr != NULL); + + if (isJump) + { + ins = INS_b_tail; // INS_b_tail imm28 } - } + else + { + ins = INS_bl; // INS_bl imm28 + } + fmt = IF_BI_0C; -#ifdef DEBUG - /* Make sure we set the instruction descriptor size correctly */ + id->idIns(ins); + id->idInsFmt(fmt); - size_t expected = emitSizeOfInsDsc(id); - assert(sz == expected); + id->idAddr()->iiaAddr = (BYTE*)addr; - if (emitComp->opts.disAsm || emitComp->verbose) - { - emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + if (emitComp->opts.compReloc) + { + id->idSetIsDspReloc(); + } } - if (emitComp->compDebugBreak) +#ifdef DEBUG + if (EMIT_GC_VERBOSE) { - // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for - // emitting instruction a6, (i.e. IN00a6 in jitdump). - if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum) + if (id->idIsLargeCall()) { - assert(!"JitBreakEmitOutputInstr reached"); + printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum, + VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars)); } } +#endif - // Output any delta in GC info. - if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) + if (m_debugInfoSize > 0) { - emitDispGCInfoDelta(); + INDEBUG(id->idDebugOnlyInfo()->idCallSig = sigInfo); + id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token } -#else - if (emitComp->opts.disAsm) + +#ifdef LATE_DISASM + if (addr != nullptr) { - size_t expected = emitSizeOfInsDsc(id); - assert(sz == expected); - emitDispIns(id, false, 0, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd); } -#endif +#endif // LATE_DISASM - /* All instructions are expected to generate code */ + dispIns(id); + appendToCurIG(id); + emitLastMemBarrier = nullptr; // Cannot optimize away future memory barriers +} - assert(*dp != dst || id->idIsEmptyAlign()); +/***************************************************************************** + * + * Returns true if 'imm' is valid Cond encoding + */ - *dp = dst; +/*static*/ bool emitter::isValidImmCond(ssize_t imm) +{ + // range check the ssize_t value, to make sure it is a small unsigned value + // and that only the bits in the cfi.cond are set + if ((imm < 0) || (imm > 0xF)) + return false; - return sz; + condFlagsImm cfi; + cfi.immCFVal = (unsigned)imm; + + return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV). } -/*****************************************************************************/ -/*****************************************************************************/ +/***************************************************************************** + * + * Returns true if 'imm' is valid Cond/Flags encoding + */ + +/*static*/ bool emitter::isValidImmCondFlags(ssize_t imm) +{ + // range check the ssize_t value, to make sure it is a small unsigned value + // and that only the bits in the cfi.cond or cfi.flags are set + if ((imm < 0) || (imm > 0xFF)) + return false; + + condFlagsImm cfi; + cfi.immCFVal = (unsigned)imm; + + return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV). +} /***************************************************************************** * - * Display a comma + * Returns true if 'imm' is valid Cond/Flags/Imm5 encoding */ -void emitter::emitDispComma() + +/*static*/ bool emitter::isValidImmCondFlagsImm5(ssize_t imm) { - printf(", "); + // range check the ssize_t value, to make sure it is a small unsigned value + // and that only the bits in the cfi.cond, cfi.flags or cfi.imm5 are set + if ((imm < 0) || (imm > 0x1FFF)) + return false; + + condFlagsImm cfi; + cfi.immCFVal = (unsigned)imm; + + return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV). } /***************************************************************************** * - * Display the instruction name + * Return an encoding for the specified 'V' register used in '9' thru '6' position with the times two encoding. + * This encoding requires that the register number be divisible by two. */ -void emitter::emitDispInst(instruction ins) + +/*static*/ emitter::code_t emitter::insEncodeReg_V_9_to_6_Times_Two(regNumber reg) { - const char* insstr = codeGen->genInsName(ins); - size_t len = strlen(insstr); + assert(isVectorRegister(reg)); + emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0; + assert(ureg % 2 == 0); + ureg /= 2u; + assert((ureg >= 0) && (ureg <= 31)); + return ureg << 6; +} - /* Display the instruction name */ +/***************************************************************************** + * + * Returns an encoding for the specified condition code. + */ - printf("%s", insstr); +/*static*/ emitter::code_t emitter::insEncodeCond(insCond cond) +{ + emitter::code_t uimm = (emitter::code_t)cond; + return uimm << 12; +} - // - // Add at least one space after the instruction name - // and add spaces until we have reach the normal size of 8 - do +/***************************************************************************** + * + * Returns an encoding for the condition code with the lowest bit inverted (marked by invert() in the + * architecture manual). + */ + +/*static*/ emitter::code_t emitter::insEncodeInvertedCond(insCond cond) +{ + emitter::code_t uimm = (emitter::code_t)cond; + uimm ^= 1; // invert the lowest bit + return uimm << 12; +} + +/***************************************************************************** + * + * Returns an encoding for the specified flags. + */ + +/*static*/ emitter::code_t emitter::insEncodeFlags(insCflags flags) +{ + emitter::code_t uimm = (emitter::code_t)flags; + return uimm; +} + +/***************************************************************************** + * + * Returns the encoding for the Shift Count bits to be used for Arm64 encodings + */ + +/*static*/ emitter::code_t emitter::insEncodeShiftCount(ssize_t imm, emitAttr size) +{ + assert((imm & 0x003F) == imm); + assert(((imm & 0x0020) == 0) || (size == EA_8BYTE)); + + return (emitter::code_t)imm << 10; +} + +/***************************************************************************** + * + * Returns the encoding to select a 64-bit datasize for an Arm64 instruction + */ + +/*static*/ emitter::code_t emitter::insEncodeDatasize(emitAttr size) +{ + if (size == EA_8BYTE) { - printf(" "); - len++; - } while (len < 8); + return 0x80000000; // set the bit at location 31 + } + else + { + assert(size == EA_4BYTE); + return 0; + } +} + +/***************************************************************************** + * + * Returns the encoding to select the datasize for the general load/store Arm64 instructions + * + */ + +/*static*/ emitter::code_t emitter::insEncodeDatasizeLS(emitter::code_t code, emitAttr size) +{ + bool exclusive = ((code & 0x35000000) == 0); + bool atomic = ((code & 0x31200C00) == 0x30200000); + + if ((code & 0x00800000) && !exclusive && !atomic) // Is this a sign-extending opcode? (i.e. ldrsw, ldrsh, ldrsb) + { + if ((code & 0x80000000) == 0) // Is it a ldrsh or ldrsb and not ldrsw ? + { + if (EA_SIZE(size) != EA_8BYTE) // Do we need to encode the 32-bit Rt size bit? + { + return 0x00400000; // set the bit at location 22 + } + } + } + else if (code & 0x80000000) // Is this a ldr/str/ldur/stur opcode? + { + if (EA_SIZE(size) == EA_8BYTE) // Do we need to encode the 64-bit size bit? + { + return 0x40000000; // set the bit at location 30 + } + } + return 0; } /***************************************************************************** * - * Display an immediate value + * Returns the encoding to select the datasize for the vector load/store Arm64 instructions + * */ -void emitter::emitDispImm(ssize_t imm, bool addComma, bool alwaysHex /* =false */, bool isAddrOffset /* =false */) -{ - if (isAddrOffset) - { - alwaysHex = true; - } - else if (imm == 0) - { - // Non-offset values of zero are never displayed as hex. - alwaysHex = false; - } - if (strictArmAsm) - { - printf("#"); - } +/*static*/ emitter::code_t emitter::insEncodeDatasizeVLS(emitter::code_t code, emitAttr size) +{ + code_t result = 0; - // Munge any pointers if we want diff-able disassembly. - // Since some may be emitted as partial words, print as diffable anything that has - // significant bits beyond the lowest 8-bits. - if (emitComp->opts.disDiffable) + // Check bit 29 + if ((code & 0x20000000) == 0) { - ssize_t top56bits = (imm >> 8); - if ((top56bits != 0) && (top56bits != -1)) - imm = 0xD1FFAB1E; - } + // LDR literal - if (!alwaysHex && (imm > -1000) && (imm < 1000)) - { - printf("%d", (int)imm); + if (size == EA_16BYTE) + { + // set the operation size in bit 31 + result = 0x80000000; + } + else if (size == EA_8BYTE) + { + // set the operation size in bit 30 + result = 0x40000000; + } + else + { + assert(size == EA_4BYTE); + // no bits are set + result = 0x00000000; + } } else { - if ((imm < 0) && ((imm & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL)) + // LDR non-literal + + if (size == EA_16BYTE) { - printf("-"); - imm = -imm; + // The operation size in bits 31 and 30 are zero + // Bit 23 specifies a 128-bit Load/Store + result = 0x00800000; } - - if ((imm & 0xFFFFFFFF00000000LL) != 0) + else if (size == EA_8BYTE) { - if (isAddrOffset) - { - printf("0x%llX", imm); - } - else - { - printf("0x%llx", imm); - } + // set the operation size in bits 31 and 30 + result = 0xC0000000; + } + else if (size == EA_4BYTE) + { + // set the operation size in bit 31 + result = 0x80000000; + } + else if (size == EA_2BYTE) + { + // set the operation size in bit 30 + result = 0x40000000; } else { - printf("0x%02X", (unsigned)imm); + assert(size == EA_1BYTE); + // The operation size in bits 31 and 30 are zero + result = 0x00000000; } } - if (addComma) - emitDispComma(); + // Or in bit 26 to indicate a Vector register is used as 'target' + result |= 0x04000000; + + return result; } /***************************************************************************** * - * Display an immediate value as an index operation + * Returns the encoding to select the datasize for the vector load/store Arm64 instructions + * */ -void emitter::emitDispElementIndex(const ssize_t imm, const bool addComma) + +/*static*/ emitter::code_t emitter::insEncodeDatasizeVPLS(emitter::code_t code, emitAttr size) { - printf("[%d]", imm); + code_t result = 0; - if (addComma) + if (size == EA_16BYTE) { - emitDispComma(); + // The operation size in bits 31 and 30 are zero + // Bit 23 specifies a 128-bit Load/Store + result = 0x80000000; } -} - -/***************************************************************************** - * - * Display a float zero constant - */ -void emitter::emitDispFloatZero() -{ - if (strictArmAsm) + else if (size == EA_8BYTE) { - printf("#"); + // set the operation size in bits 31 and 30 + result = 0x40000000; } - printf("0.0"); -} - -/***************************************************************************** - * - * Display an encoded float constant value - */ -void emitter::emitDispFloatImm(ssize_t imm8) -{ - assert((0 <= imm8) && (imm8 <= 0x0ff)); - if (strictArmAsm) + else if (size == EA_4BYTE) { - printf("#"); + // set the operation size in bit 31 + result = 0x00000000; } - floatImm8 fpImm; - fpImm.immFPIVal = (unsigned)imm8; - double result = emitDecodeFloatImm8(fpImm); + // Or in bit 26 to indicate a Vector register is used as 'target' + result |= 0x04000000; - printf("%.4f", result); + return result; } /***************************************************************************** * - * Display an encoded small float constant value + * Returns the encoding to set the size bit and the N bits for a 'bitfield' instruction + * */ -void emitter::emitDispSmallFloatImm(ssize_t imm, instruction ins) + +/*static*/ emitter::code_t emitter::insEncodeDatasizeBF(emitter::code_t code, emitAttr size) { - if (strictArmAsm) + // is bit 30 equal to 0? + if ((code & 0x40000000) == 0) // is the opcode one of extr, sxtb, sxth or sxtw { - printf("#"); + if (size == EA_8BYTE) // Do we need to set the sf and N bits? + { + return 0x80400000; // set the sf-bit at location 31 and the N-bit at location 22 + } } - printf("%.4f", emitDecodeSmallFloatImm(imm, ins)); + return 0; // don't set any bits } /***************************************************************************** * - * Display an immediate with an optional left-shift. + * Returns the encoding to select the 64/128-bit datasize for an Arm64 vector instruction */ -void emitter::emitDispImmOptsLSL(ssize_t imm, bool hasShift, unsigned shiftAmount) + +/*static*/ emitter::code_t emitter::insEncodeVectorsize(emitAttr size) { - if (!strictArmAsm && hasShift) + if (size == EA_16BYTE) { - imm <<= shiftAmount; + return 0x40000000; // set the bit at location 30 } - emitDispImm(imm, false); - if (strictArmAsm && hasShift) + else { - printf(", LSL #%u", shiftAmount); + assert(size == EA_8BYTE); + return 0; } } /***************************************************************************** * - * Display an ARM64 condition code for the conditional instructions + * Returns the encoding to select 'index' for an Arm64 vector elem instruction */ -void emitter::emitDispCond(insCond cond) +/*static*/ emitter::code_t emitter::insEncodeVectorIndex(emitAttr elemsize, ssize_t index) { - const static char* armCond[16] = {"eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc", - "hi", "ls", "ge", "lt", "gt", "le", "AL", "NV"}; // The last two are invalid - unsigned imm = (unsigned)cond; - assert((0 <= imm) && (imm < ArrLen(armCond))); - printf(armCond[imm]); -} + code_t bits = (code_t)index; + if (elemsize == EA_1BYTE) + { + bits <<= 1; + bits |= 1; + } + else if (elemsize == EA_2BYTE) + { + bits <<= 2; + bits |= 2; + } + else if (elemsize == EA_4BYTE) + { + bits <<= 3; + bits |= 4; + } + else + { + assert(elemsize == EA_8BYTE); + bits <<= 4; + bits |= 8; + } + assert((bits >= 1) && (bits <= 0x1f)); -/***************************************************************************** - * - * Display an ARM64 flags for the conditional instructions - */ -void emitter::emitDispFlags(insCflags flags) -{ - const static char* armFlags[16] = {"0", "v", "c", "cv", "z", "zv", "zc", "zcv", - "n", "nv", "nc", "ncv", "nz", "nzv", "nzc", "nzcv"}; - unsigned imm = (unsigned)flags; - assert((0 <= imm) && (imm < ArrLen(armFlags))); - printf(armFlags[imm]); + return (bits << 16); // bits at locations [20,19,18,17,16] } /***************************************************************************** * - * Display an ARM64 'barrier' for the memory barrier instructions + * Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction */ -void emitter::emitDispBarrier(insBarrier barrier) +/*static*/ emitter::code_t emitter::insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2) { - const static char* armBarriers[16] = {"#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh", - "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy"}; - unsigned imm = (unsigned)barrier; - assert((0 <= imm) && (imm < ArrLen(armBarriers))); - printf(armBarriers[imm]); + code_t bits = (code_t)index2; + if (elemsize == EA_1BYTE) + { + // bits are correct + } + else if (elemsize == EA_2BYTE) + { + bits <<= 1; + } + else if (elemsize == EA_4BYTE) + { + bits <<= 2; + } + else + { + assert(elemsize == EA_8BYTE); + bits <<= 3; + } + assert((bits >= 0) && (bits <= 0xf)); + + return (bits << 11); // bits at locations [14,13,12,11] } /***************************************************************************** * - * Prints the encoding for the Shift Type encoding + * Returns the encoding to select the 'index' for an Arm64 'mul' by element instruction */ - -void emitter::emitDispShiftOpts(insOpts opt) +/*static*/ emitter::code_t emitter::insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index) { - if (opt == INS_OPTS_LSL) - printf(" LSL "); - else if (opt == INS_OPTS_LSR) - printf(" LSR "); - else if (opt == INS_OPTS_ASR) - printf(" ASR "); - else if (opt == INS_OPTS_ROR) - printf(" ROR "); - else if (opt == INS_OPTS_MSL) - printf(" MSL "); + code_t bits = 0; + + if (elemsize == EA_2BYTE) + { + assert((index >= 0) && (index <= 7)); + if (index & 0x4) + { + bits |= (1 << 11); // set bit 11 'H' + } + if (index & 0x2) + { + bits |= (1 << 21); // set bit 21 'L' + } + if (index & 0x1) + { + bits |= (1 << 20); // set bit 20 'M' + } + } + else if (elemsize == EA_4BYTE) + { + assert((index >= 0) && (index <= 3)); + if (index & 0x2) + { + bits |= (1 << 11); // set bit 11 'H' + } + if (index & 0x1) + { + bits |= (1 << 21); // set bit 21 'L' + } + } else - assert(!"Bad value"); + { + assert(!"Invalid 'elemsize' value"); + } + + return bits; } /***************************************************************************** * - * Prints the encoding for the Extend Type encoding + * Returns the encoding for a shift instruction, ready for insertion into an instruction. */ - -void emitter::emitDispExtendOpts(insOpts opt) +/*static*/ emitter::code_t emitter::insEncodeShiftImmediate(emitAttr size, bool isRightShift, ssize_t shiftAmount) { - if (opt == INS_OPTS_UXTB) - printf("UXTB"); - else if (opt == INS_OPTS_UXTH) - printf("UXTH"); - else if (opt == INS_OPTS_UXTW) - printf("UXTW"); - else if (opt == INS_OPTS_UXTX) - printf("UXTX"); - else if (opt == INS_OPTS_SXTB) - printf("SXTB"); - else if (opt == INS_OPTS_SXTH) - printf("SXTH"); - else if (opt == INS_OPTS_SXTW) - printf("SXTW"); - else if (opt == INS_OPTS_SXTX) - printf("SXTX"); + if (isRightShift) + { + // The right shift amount must be in the range 1 to the destination element width in bits. + assert((shiftAmount > 0) && (shiftAmount <= getBitWidth(size))); + return (code_t)(2 * getBitWidth(size) - shiftAmount); + } else - assert(!"Bad value"); + { + // The left shift amount must in the range 0 to the element width in bits minus 1. + assert(shiftAmount < getBitWidth(size)); + return (code_t)(getBitWidth(size) + shiftAmount); + } } -//------------------------------------------------------------------------ -// emitDispReg: Display a general-purpose register name or SIMD and floating-point scalar register name -// -void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma) -{ - emitAttr size = EA_SIZE(attr); - printf(emitRegName(reg, size)); - - if (addComma) - emitDispComma(); -} +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction + */ -//------------------------------------------------------------------------ -// emitDispVectorReg: Display a SIMD vector register name with an arrangement suffix -// -void emitter::emitDispVectorReg(regNumber reg, insOpts opt, bool addComma) +/*static*/ emitter::code_t emitter::insEncodeElemsize(emitAttr size) { - assert(isVectorRegister(reg)); - printf(emitVectorRegName(reg)); - emitDispArrangement(opt); - - if (addComma) - emitDispComma(); + if (size == EA_8BYTE) + { + return 0x00C00000; // set the bit at location 23 and 22 + } + else if (size == EA_4BYTE) + { + return 0x00800000; // set the bit at location 23 + } + else if (size == EA_2BYTE) + { + return 0x00400000; // set the bit at location 22 + } + assert(size == EA_1BYTE); + return 0x00000000; } -//------------------------------------------------------------------------ -// emitDispVectorRegIndex: Display a SIMD vector register name with element index -// -void emitter::emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma) -{ - assert(isVectorRegister(reg)); - printf(emitVectorRegName(reg)); - emitDispElemsize(elemsize); - printf("[%d]", (int)index); - - if (addComma) - emitDispComma(); -} +/***************************************************************************** + * + * Returns the encoding to select the 4/8 byte elemsize for an Arm64 float vector instruction + */ -//------------------------------------------------------------------------ -// emitDispVectorRegList: Display a SIMD vector register list -// -void emitter::emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma) +/*static*/ emitter::code_t emitter::insEncodeFloatElemsize(emitAttr size) { - assert(isVectorRegister(firstReg)); - - regNumber currReg = firstReg; - - printf("{"); - for (unsigned i = 0; i < listSize; i++) - { - const bool notLastRegister = (i != listSize - 1); - emitDispVectorReg(currReg, opt, notLastRegister); - currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); - } - printf("}"); - - if (addComma) + if (size == EA_8BYTE) { - emitDispComma(); + return 0x00400000; // set the bit at location 22 } + assert(size == EA_4BYTE); + return 0x00000000; } -//------------------------------------------------------------------------ -// emitDispVectorElemList: Display a SIMD vector element list -// -void emitter::emitDispVectorElemList( - regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma) +// Returns the encoding to select the index for an Arm64 float vector by element instruction +/*static*/ emitter::code_t emitter::insEncodeFloatIndex(emitAttr elemsize, ssize_t index) { - assert(isVectorRegister(firstReg)); - - regNumber currReg = firstReg; - - printf("{"); - for (unsigned i = 0; i < listSize; i++) + code_t result = 0x00000000; + if (elemsize == EA_8BYTE) { - printf(emitVectorRegName(currReg)); - emitDispElemsize(elemsize); - const bool notLastRegister = (i != listSize - 1); - if (notLastRegister) + assert((index >= 0) && (index <= 1)); + if (index == 1) { - emitDispComma(); + result |= 0x00000800; // 'H' - set the bit at location 11 } - currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); - } - printf("}"); - printf("[%d]", index); - - if (addComma) - { - emitDispComma(); } -} - -//------------------------------------------------------------------------ -// emitDispArrangement: Display a SIMD vector arrangement suffix -// -void emitter::emitDispArrangement(insOpts opt) -{ - const char* str = "???"; - - switch (opt) + else { - case INS_OPTS_8B: - str = "8b"; - break; - case INS_OPTS_16B: - str = "16b"; - break; - case INS_OPTS_SCALABLE_B: - str = "b"; - break; - case INS_OPTS_4H: - str = "4h"; - break; - case INS_OPTS_8H: - str = "8h"; - break; - case INS_OPTS_SCALABLE_H: - str = "h"; - break; - case INS_OPTS_2S: - str = "2s"; - break; - case INS_OPTS_4S: - str = "4s"; - break; - case INS_OPTS_SCALABLE_S: - case INS_OPTS_SCALABLE_S_UXTW: - case INS_OPTS_SCALABLE_S_SXTW: - str = "s"; - break; - case INS_OPTS_1D: - str = "1d"; - break; - case INS_OPTS_2D: - str = "2d"; - break; - case INS_OPTS_SCALABLE_D: - case INS_OPTS_SCALABLE_D_UXTW: - case INS_OPTS_SCALABLE_D_SXTW: - str = "d"; - break; - case INS_OPTS_SCALABLE_Q: - str = "q"; - break; - - default: - assert(!"Invalid insOpt"); + assert(elemsize == EA_4BYTE); + assert((index >= 0) && (index <= 3)); + if (index & 2) + { + result |= 0x00000800; // 'H' - set the bit at location 11 + } + if (index & 1) + { + result |= 0x00200000; // 'L' - set the bit at location 21 + } } - printf("."); - printf(str); + return result; } -//------------------------------------------------------------------------ -// emitDispElemsize: Display a SIMD vector element suffix -// -void emitter::emitDispElemsize(emitAttr elemsize) +/***************************************************************************** + * + * Returns the encoding to select the vector elemsize for an Arm64 ld/st# vector instruction + */ + +/*static*/ emitter::code_t emitter::insEncodeVLSElemsize(emitAttr size) { - const char* str = "???"; + code_t result = 0x00000000; - switch (elemsize) + switch (size) { case EA_1BYTE: - str = ".b"; + { + result |= 0x0000; // clear bits 10 and 11 break; + } + case EA_2BYTE: - str = ".h"; + { + result |= 0x0400; // set bit at location 10, clear bit at location 11 break; + } + case EA_4BYTE: - str = ".s"; + { + result |= 0x0800; // clear bit at location 10, set bit at location 11 break; + } + case EA_8BYTE: - str = ".d"; + { + result |= 0x0C00; // set bits at location 10 and 11 break; + } default: - assert(!"invalid elemsize"); - break; - } - - printf(str); -} - -//------------------------------------------------------------------------ -// emitDispShiftedReg: Display a register with an optional shift operation -// -void emitter::emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr) -{ - emitAttr size = EA_SIZE(attr); - assert((imm & 0x003F) == imm); - assert(((imm & 0x0020) == 0) || (size == EA_8BYTE)); - - printf(emitRegName(reg, size)); - - if (imm > 0) - { - if (strictArmAsm) { - emitDispComma(); + assert(!"Invalid element size"); + break; } - emitDispShiftOpts(opt); - emitDispImm(imm, false); } + + return result; } /***************************************************************************** * - * Display a register with an optional extend and scale operations + * Returns the encoding to select the index for an Arm64 ld/st# vector by element instruction */ -void emitter::emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm) -{ - assert((imm >= 0) && (imm <= 4)); - assert(insOptsNone(opt) || insOptsAnyExtend(opt) || (opt == INS_OPTS_LSL)); - // size is based on the extend option, not the instr size. - // Assume INS_OPTS_NONE and INS_OPTS_LSL are 64bit as they usually are. - emitAttr size = (insOptsNone(opt) || insOptsLSL(opt) || insOpts64BitExtend(opt)) ? EA_8BYTE : EA_4BYTE; +/*static*/ emitter::code_t emitter::insEncodeVLSIndex(emitAttr size, ssize_t index) +{ + code_t result = 0x00000000; - if (strictArmAsm) + switch (size) { - if (insOptsNone(opt) || (insOptsLSL(opt) && imm == 0)) + case EA_1BYTE: { - emitDispReg(reg, size, false); + // Q = ? - bit location 30 + // xx = 00 - bit location 14 and 15 + // S = ? - bit location 12 + // ss = ?0 - bit location 10 and 11 + + result |= (index & 0x8) << 27; + result |= (index & 0x4) << 10; + result |= (index & 0x3) << 10; + break; } - else - { - emitDispReg(reg, size, true); - if (insOptsLSL(opt)) - printf("LSL"); - else - emitDispExtendOpts(opt); + case EA_2BYTE: + { + // Q = ? - bit location 30 + // xx = 01 - bit location 14 and 15 + // S = ? - bit location 12 + // ss = ?? - bit location 10 and 11 - if (imm > 0) - { - printf(" "); - emitDispImm(imm, false); - } + result |= (index & 0x4) << 28; + result |= 0x4000; + result |= (index & 0x2) << 11; + result |= (index & 0x1) << 11; + break; } - } - else // !strictArmAsm - { - if (insOptsNone(opt)) + + case EA_4BYTE: { - emitDispReg(reg, size, false); + // Q = ? - bit location 30 + // xx = 10 - bit location 14 and 15 + // S = ? - bit location 12 + // ss = 00 - bit location 10 and 11 + + result |= (index & 0x2) << 29; + result |= 0x8000; + result |= (index & 0x1) << 12; + break; } - else + + case EA_8BYTE: { - if (opt != INS_OPTS_LSL) - { - emitDispExtendOpts(opt); - printf("("); - emitDispReg(reg, size, false); - printf(")"); - } + // Q = ? - bit location 30 + // xx = 10 - bit location 14 and 15 + // S = 0 - bit location 12 + // ss = 01 - bit location 10 and 11 + + result |= (index & 0x1) << 30; + result |= 0x8400; + break; } - if (imm > 0) + + default: { - printf("*"); - emitDispImm(ssize_t{1} << imm, false); + assert(!"Invalid element size"); + break; } } + + return result; } /***************************************************************************** * - * Display an addressing operand [reg + imm] + * Returns the encoding to select the fcvt operation for Arm64 instructions */ -void emitter::emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm) +/*static*/ emitter::code_t emitter::insEncodeConvertOpt(insFormat fmt, insOpts conversion) { - reg = encodingZRtoSP(reg); // ZR (R31) encodes the SP register - - if (strictArmAsm) + code_t result = 0; + switch (conversion) { - printf("["); + case INS_OPTS_S_TO_D: // Single to Double + assert(fmt == IF_DV_2J); + result = 0x00008000; // type=00, opc=01 + break; - emitDispReg(reg, EA_8BYTE, false); + case INS_OPTS_D_TO_S: // Double to Single + assert(fmt == IF_DV_2J); + result = 0x00400000; // type=01, opc=00 + break; - if (!insOptsPostIndex(opt) && (imm != 0)) - { - emitDispComma(); - emitDispImm(imm, false, true, true); - } - printf("]"); + case INS_OPTS_H_TO_S: // Half to Single + assert(fmt == IF_DV_2J); + result = 0x00C00000; // type=11, opc=00 + break; - if (insOptsPreIndex(opt)) - { - printf("!"); - } - else if (insOptsPostIndex(opt)) - { - emitDispComma(); - emitDispImm(imm, false, true, true); - } - } - else // !strictArmAsm - { - printf("["); + case INS_OPTS_H_TO_D: // Half to Double + assert(fmt == IF_DV_2J); + result = 0x00C08000; // type=11, opc=01 + break; - const char* operStr = "++"; - if (imm < 0) - { - operStr = "--"; - imm = -imm; - } + case INS_OPTS_S_TO_H: // Single to Half + assert(fmt == IF_DV_2J); + result = 0x00018000; // type=00, opc=11 + break; - if (insOptsPreIndex(opt)) - { - printf(operStr); - } + case INS_OPTS_D_TO_H: // Double to Half + assert(fmt == IF_DV_2J); + result = 0x00418000; // type=01, opc=11 + break; + + case INS_OPTS_S_TO_4BYTE: // Single to INT32 + assert(fmt == IF_DV_2H); + result = 0x00000000; // sf=0, type=00 + break; + + case INS_OPTS_D_TO_4BYTE: // Double to INT32 + assert(fmt == IF_DV_2H); + result = 0x00400000; // sf=0, type=01 + break; + + case INS_OPTS_S_TO_8BYTE: // Single to INT64 + assert(fmt == IF_DV_2H); + result = 0x80000000; // sf=1, type=00 + break; + + case INS_OPTS_D_TO_8BYTE: // Double to INT64 + assert(fmt == IF_DV_2H); + result = 0x80400000; // sf=1, type=01 + break; + + case INS_OPTS_4BYTE_TO_S: // INT32 to Single + assert(fmt == IF_DV_2I); + result = 0x00000000; // sf=0, type=00 + break; + + case INS_OPTS_4BYTE_TO_D: // INT32 to Double + assert(fmt == IF_DV_2I); + result = 0x00400000; // sf=0, type=01 + break; - emitDispReg(reg, EA_8BYTE, false); + case INS_OPTS_8BYTE_TO_S: // INT64 to Single + assert(fmt == IF_DV_2I); + result = 0x80000000; // sf=1, type=00 + break; - if (insOptsPostIndex(opt)) - { - printf(operStr); - } + case INS_OPTS_8BYTE_TO_D: // INT64 to Double + assert(fmt == IF_DV_2I); + result = 0x80400000; // sf=1, type=01 + break; - if (insOptsIndexed(opt)) - { - emitDispComma(); - } - else - { - printf("%c", operStr[1]); - } - emitDispImm(imm, false, true, true); - printf("]"); + default: + assert(!"Invalid 'conversion' value"); + break; } + return result; } /***************************************************************************** * - * Display an addressing operand [reg + extended reg] + * Returns the encoding to have the Rn register be updated Pre/Post indexed + * or not updated */ -void emitter::emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size) -{ - reg1 = encodingZRtoSP(reg1); // ZR (R31) encodes the SP register - - unsigned scale = 0; - if (isScaled) - { - scale = NaturalScale_helper(size); - } - printf("["); +/*static*/ emitter::code_t emitter::insEncodeIndexedOpt(insOpts opt) +{ + assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt)); - if (strictArmAsm) + if (emitter::insOptsIndexed(opt)) { - emitDispReg(reg1, EA_8BYTE, true); - emitDispExtendReg(reg2, opt, scale); + if (emitter::insOptsPostIndex(opt)) + { + return 0x00000400; // set the bit at location 10 + } + else + { + assert(emitter::insOptsPreIndex(opt)); + return 0x00000C00; // set the bit at location 10 and 11 + } } - else // !strictArmAsm + else { - emitDispReg(reg1, EA_8BYTE, false); - printf("+"); - emitDispExtendReg(reg2, opt, scale); + assert(emitter::insOptsNone(opt)); + return 0; // bits 10 and 11 are zero } - - printf("]"); } /***************************************************************************** * - * Display (optionally) the instruction encoding in hex + * Returns the encoding for a ldp/stp instruction to have the Rn register + * be updated Pre/Post indexed or not updated */ -void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz) +/*static*/ emitter::code_t emitter::insEncodePairIndexedOpt(instruction ins, insOpts opt) { - if (!emitComp->opts.disCodeBytes) + assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt)); + + if ((ins == INS_ldnp) || (ins == INS_stnp)) { - return; + assert(emitter::insOptsNone(opt)); + return 0; // bits 23 and 24 are zero } - - // We do not display the instruction hex if we want diff-able disassembly - if (!emitComp->opts.disDiffable) + else { - if (sz == 4) + if (emitter::insOptsIndexed(opt)) { - printf(" %08X ", (*((code_t*)code))); + if (emitter::insOptsPostIndex(opt)) + { + return 0x00800000; // set the bit at location 23 + } + else + { + assert(emitter::insOptsPreIndex(opt)); + return 0x01800000; // set the bit at location 24 and 23 + } } else { - printf(" "); + assert(emitter::insOptsNone(opt)); + return 0x01000000; // set the bit at location 24 } } } /***************************************************************************** * - * Handles printing of LARGEJMP pseudo-instruction. + * Returns the encoding to apply a Shift Type on the Rm register */ -void emitter::emitDispLargeJmp( - instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +/*static*/ emitter::code_t emitter::insEncodeShiftType(insOpts opt) { - // Note: don't touch the actual instrDesc. If we accidentally messed it up, it would create a very - // difficult-to-find bug. - - inlineInstrDesc idJmp; - instrDescJmp* pidJmp = idJmp.id(); - - const instruction ins = id->idIns(); - instruction reverseIns; - insFormat reverseFmt; - - // Reverse the conditional instruction. - switch (ins) + if (emitter::insOptsNone(opt)) { - case INS_cbz: - reverseIns = INS_cbnz; - reverseFmt = IF_BI_1A; - break; - case INS_cbnz: - reverseIns = INS_cbz; - reverseFmt = IF_BI_1A; - break; - case INS_tbz: - reverseIns = INS_tbnz; - reverseFmt = IF_BI_1B; - break; - case INS_tbnz: - reverseIns = INS_tbz; - reverseFmt = IF_BI_1B; - break; - default: - reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins))); - reverseFmt = IF_BI_0B; + // None implies the we encode LSL (with a zero immediate) + opt = INS_OPTS_LSL; } + assert(emitter::insOptsAnyShift(opt)); - pidJmp->idIns(reverseIns); - pidJmp->idInsFmt(reverseFmt); - pidJmp->idOpSize(id->idOpSize()); - pidJmp->idAddr()->iiaSetInstrCount(1); - pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // Share the idDebugOnlyInfo() field. - - const size_t bcondSizeOrZero = (pCode == NULL) ? 0 : 4; // Branch is 4 bytes. - emitDispInsHelp(pidJmp, false, doffs, asmfm, offset, pCode, bcondSizeOrZero, - NULL /* force display of pc-relative branch */); - - pCode += bcondSizeOrZero; - offset += 4; - - // Next, display the unconditional branch. + emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_LSL; + assert(option <= 3); - // Reset the local instrDesc. - memset(pidJmp, 0, sizeof(instrDescJmp)); + return option << 22; // bits 23, 22 +} - pidJmp->idIns(INS_b); - pidJmp->idInsFmt(IF_LARGEJMP); +/***************************************************************************** + * + * Returns the encoding to apply a 12 bit left shift to the immediate + */ - if (id->idIsBound()) +/*static*/ emitter::code_t emitter::insEncodeShiftImm12(insOpts opt) +{ + if (emitter::insOptsLSL12(opt)) { - pidJmp->idSetIsBound(); - pidJmp->idAddr()->iiaIGlabel = id->idAddr()->iiaIGlabel; + return 0x00400000; // set the bit at location 22 } - else + return 0; +} + +/***************************************************************************** + * + * Returns the encoding to have the Rm register use an extend operation + */ + +/*static*/ emitter::code_t emitter::insEncodeExtend(insOpts opt) +{ + if (emitter::insOptsNone(opt) || (opt == INS_OPTS_LSL)) { - pidJmp->idAddr()->iiaBBlabel = id->idAddr()->iiaBBlabel; + // None or LSL implies the we encode UXTX + opt = INS_OPTS_UXTX; } + assert(emitter::insOptsAnyExtend(opt)); - pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // Share the idDebugOnlyInfo() field. + emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_UXTB; + assert(option <= 7); - const size_t brSizeOrZero = (pCode == NULL) ? 0 : 4; // Unconditional branch is 4 bytes. - emitDispInsHelp(pidJmp, isNew, doffs, asmfm, offset, pCode, brSizeOrZero, ig); + return option << 13; // bits 15,14,13 } /***************************************************************************** * - * Wrapper for emitter::emitDispInsHelp() that handles special large jump - * pseudo-instruction. + * Returns the encoding to scale the Rm register by {0,1,2,3,4} + * when using an extend operation */ -void emitter::emitDispIns( - instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +/*static*/ emitter::code_t emitter::insEncodeExtendScale(ssize_t imm) { - // Special case: IF_LARGEJMP + assert((imm >= 0) && (imm <= 4)); - if ((id->idInsFmt() == IF_LARGEJMP) && id->idIsBound()) - { - // This is a pseudo-instruction format representing a large conditional branch. See the comment - // in emitter::emitOutputLJ() for the full description. - // - // For this pseudo-instruction, we will actually generate: - // - // b L_not // 4 bytes. Note that we reverse the condition. - // b L_target // 4 bytes. - // L_not: - // - // These instructions don't exist in the actual instruction stream, so we need to fake them - // up to display them. - emitDispLargeJmp(id, isNew, doffs, asmfm, offset, pCode, sz, ig); - } - else - { - emitDispInsHelp(id, isNew, doffs, asmfm, offset, pCode, sz, ig); - } + return (emitter::code_t)imm << 10; // bits 12,11,10 } -//-------------------------------------------------------------------- -// emitDispInsHelp: Dump the given instruction to jitstdout. -// -// Arguments: -// id - The instruction -// isNew - Whether the instruction is newly generated (before encoding). -// doffs - If true, always display the passed-in offset. -// asmfm - Whether the instruction should be displayed in assembly format. -// If false some additional information may be printed for the instruction. -// offset - The offset of the instruction. Only displayed if doffs is true or if -// !isNew && !asmfm. -// code - Pointer to the actual code, used for displaying the address and encoded bytes -// if turned on. -// sz - The size of the instruction, used to display the encoded bytes. -// ig - The instruction group containing the instruction. -// -void emitter::emitDispInsHelp( - instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +/***************************************************************************** + * + * Returns the encoding to have the Rm register be auto scaled by the ld/st size + */ + +/*static*/ emitter::code_t emitter::insEncodeReg3Scale(bool isScaled) { -#ifdef DEBUG - if (EMITVERBOSE) + if (isScaled) { - unsigned idNum = - id->idDebugOnlyInfo()->idNum; // Do not remove this! It is needed for VisualStudio conditional breakpoints - - printf("IN%04x: ", idNum); - } -#endif - - if (pCode == NULL) + return 0x00001000; // set the bit at location 12 + } + else { - sz = 0; + return 0; } +} - if (!isNew && !asmfm && sz) +/***************************************************************************** + * + * Returns the encoding for the immediate value as 9-bits at bit locations '21-16' for high and '12-10' for low. + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm9h9l_21_to_16_and_12_to_10(ssize_t imm) +{ + assert(isValidSimm<9>(imm)); + + if (imm < 0) { - doffs = true; + imm = (imm & 0x1FF); } - /* Display the instruction address */ + code_t h = (code_t)(imm & 0x1F8) << 13; // encode high 6-bits at locations '21-16' + code_t l = (code_t)((imm & ~0x1F8) & 0x7) << 10; // encode low 3-bits at locations '12-10' - emitDispInsAddr(pCode); + return (h | l); +} - /* Display the instruction offset */ +/***************************************************************************** + * + * Returns the encoding for the immediate value as 3-bits at bit locations '23-22' for high and '12' for low. + */ - emitDispInsOffs(offset, doffs); +/*static*/ emitter::code_t emitter::insEncodeUimm3h3l_23_to_22_and_12(ssize_t imm) +{ + assert(isValidUimm<3>(imm)); - BYTE* pCodeRW = nullptr; - if (pCode != nullptr) + code_t h = (code_t)(imm & 0x6) << 21; // encode high 2-bits at locations '23-22' + code_t l = (code_t)(imm & 0x1) << 12; // encode low 1-bit at locations '12' + + return (h | l); +} + +/***************************************************************************** + * + * Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. + */ + +/*static*/ emitter::code_t emitter::insEncodeImm8_12_to_5(ssize_t imm) +{ + assert(isValidSimm<8>(imm) || isValidUimm<8>(imm)); + return (code_t)((imm & 0xFF) << 5); +} + +BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id) +{ + instruction ins = id->idIns(); + insFormat fmt = id->idInsFmt(); + regNumber dstReg = id->idReg1(); + if (id->idjShort) { - /* Display the instruction hex code */ - assert(((pCode >= emitCodeBlock) && (pCode < emitCodeBlock + emitTotalHotCodeSize)) || - ((pCode >= emitColdCodeBlock) && (pCode < emitColdCodeBlock + emitTotalColdCodeSize))); + // adr x, [rel addr] -- compute address: current addr(ip) + rel addr. + assert(ins == INS_adr); + assert(fmt == IF_DI_1E); + ssize_t distVal = (ssize_t)(dstAddr - srcAddr); + dst = emitOutputShortAddress(dst, ins, fmt, distVal, dstReg); + } + else + { + // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr + assert(fmt == IF_LARGEADR); + ssize_t relPageAddr = computeRelPageAddr((size_t)dstAddr, (size_t)srcAddr); + dst = emitOutputShortAddress(dst, INS_adrp, IF_DI_1E, relPageAddr, dstReg); - pCodeRW = pCode + writeableOffset; + // add x, x, page offs -- compute address = page addr + page offs + ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits + assert(isValidUimm<12>(imm12)); + code_t code = + emitInsCode(INS_add, IF_DI_2A); // DI_2A X0010001shiiiiii iiiiiinnnnnddddd 1100 0000 imm(i12, sh) + code |= insEncodeDatasize(EA_8BYTE); // X + code |= ((code_t)imm12 << 10); // iiiiiiiiiiii + code |= insEncodeReg_Rd(dstReg); // ddddd + code |= insEncodeReg_Rn(dstReg); // nnnnn + dst += emitOutput_Instr(dst, code); } + return dst; +} - emitDispInsHex(id, pCodeRW, sz); +/***************************************************************************** + * + * Output a local jump or other instruction with a pc-relative immediate. + * Note that this may be invoked to overwrite an existing jump instruction at 'dst' + * to handle forward branch patching. + */ - printf(" "); +BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) +{ + instrDescJmp* id = (instrDescJmp*)i; - /* Get the instruction and format */ + unsigned srcOffs; + unsigned dstOffs; + BYTE* srcAddr; + BYTE* dstAddr; + ssize_t distVal; + // Set default ins/fmt from id. instruction ins = id->idIns(); insFormat fmt = id->idInsFmt(); - emitDispInst(ins); + bool loadLabel = false; + bool isJump = false; + bool loadConstant = false; - /* If this instruction has just been added, check its size */ + switch (ins) + { + default: + isJump = true; + break; - assert(isNew == false || (int)emitSizeOfInsDsc(id) == emitCurIGfreeNext - (BYTE*)id); + case INS_tbz: + case INS_tbnz: + case INS_cbz: + case INS_cbnz: + isJump = true; + break; - /* Figure out the operand size */ - emitAttr size = id->idOpSize(); - emitAttr attr = size; - if (id->idGCref() == GCT_GCREF) - attr = EA_GCREF; - else if (id->idGCref() == GCT_BYREF) - attr = EA_BYREF; + case INS_ldr: + case INS_ldrsw: + loadConstant = true; + break; - switch (fmt) + case INS_adr: + case INS_adrp: + loadLabel = true; + break; + } + + /* Figure out the distance to the target */ + + srcOffs = emitCurCodeOffs(dst); + srcAddr = emitOffsetToPtr(srcOffs); + + if (id->idAddr()->iiaIsJitDataOffset()) { - ssize_t imm; - int doffs; - bitMaskImm bmi; - halfwordImm hwi; - condFlagsImm cfi; - unsigned scale; - unsigned immShift; - bool hasShift; - const char* methodName; - emitAttr elemsize; - emitAttr datasize; - emitAttr srcsize; - emitAttr dstsize; - ssize_t index; - ssize_t index2; - unsigned registerListSize; - const char* targetName; + assert(loadConstant || loadLabel); + int doff = id->idAddr()->iiaGetJitDataOffset(); + assert(doff >= 0); + ssize_t imm = emitGetInsSC(id); + assert((imm >= 0) && (imm < 0x1000)); // 0x1000 is arbitrary, currently 'imm' is always 0 - case IF_BI_0A: // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 - case IF_BI_0B: // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00 - case IF_LARGEJMP: + unsigned dataOffs = (unsigned)(doff + imm); + assert(dataOffs < emitDataSize()); + dstAddr = emitDataOffsetToPtr(dataOffs); + + regNumber dstReg = id->idReg1(); + regNumber addrReg = dstReg; // an integer register to compute long address. + emitAttr opSize = id->idOpSize(); + + if (loadConstant) { - if (fmt == IF_LARGEJMP) + if (id->idjShort) { - printf("(LARGEJMP)"); + // ldr x/v, [rel addr] -- load constant from current addr(ip) + rel addr. + assert(ins == INS_ldr); + assert(fmt == IF_LS_1A); + distVal = (ssize_t)(dstAddr - srcAddr); + dst = emitOutputShortConstant(dst, ins, fmt, distVal, dstReg, opSize); } - if (id->idAddr()->iiaHasInstrCount()) + else { - int instrCount = id->idAddr()->iiaGetInstrCount(); + // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr + assert(fmt == IF_LARGELDC); + ssize_t relPageAddr = computeRelPageAddr((size_t)dstAddr, (size_t)srcAddr); + if (isVectorRegister(dstReg)) + { + // Update addrReg with the reserved integer register + // since we cannot use dstReg (vector) to load constant directly from memory. - if (ig == nullptr) + // If loading a 16-byte value, we will need to load directly into dstReg. + // Thus, encode addrReg for the ld1 instruction. + if (opSize == EA_16BYTE) + { + addrReg = encodingSPtoZR(id->idReg2()); + } + else + { + addrReg = id->idReg2(); + } + + assert(isGeneralRegister(addrReg)); + } + + ins = INS_adrp; + fmt = IF_DI_1E; + dst = emitOutputShortAddress(dst, ins, fmt, relPageAddr, addrReg); + + ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits + assert(isValidUimm<12>(imm12)); + + // Special case: emit add + ld1 instructions for loading 16-byte data into vector register. + if (isVectorRegister(dstReg) && (opSize == EA_16BYTE)) { - printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount); + const emitAttr elemSize = EA_1BYTE; + const insOpts opt = optMakeArrangement(opSize, elemSize); + + assert(isGeneralRegisterOrSP(addrReg)); + assert(isValidVectorElemsize(elemSize)); + assert(isValidArrangement(opSize, opt)); + + // Calculate page addr + page offs, then emit ld1 instruction. + dst = emitOutputVectorConstant(dst, imm12, dstReg, addrReg, opSize, elemSize); } else { - unsigned insNum = emitFindInsNum(ig, id); - UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1); - UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount); - ssize_t relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs)); - printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", (int)relOffs, (int)instrCount); + // ldr x, [x, 0] -- load constant from address into integer register. + ins = INS_ldr; + fmt = IF_LS_2B; + dst = emitOutputShortConstant(dst, ins, fmt, imm12, addrReg, opSize); + + // fmov v, d -- copy constant in integer register to vector register. + // This is needed only for vector constant. + if (addrReg != dstReg) + { + // fmov Vd,Rn DV_2I X00111100X100111 000000nnnnnddddd 1E27 0000 Vd,Rn + // (scalar, from general) + assert(isVectorRegister(dstReg) && isGeneralRegister(addrReg)); + ins = INS_fmov; + fmt = IF_DV_2I; + code_t code = emitInsCode(ins, fmt); + + code |= insEncodeReg_Vd(dstReg); // ddddd + code |= insEncodeReg_Rn(addrReg); // nnnnn + if (id->idOpSize() == EA_8BYTE) + { + code |= 0x80400000; // X ... X + } + dst += emitOutput_Instr(dst, code); + } } } - else if (id->idIsBound()) - { - emitPrintLabel(id->idAddr()->iiaIGlabel); - } - else - { - printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum); - } } - break; + else + { + assert(loadLabel); + dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id); + } - case IF_BI_0C: // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 - methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); - printf("%s", methodName); - break; + return dst; + } - case IF_BI_1A: // BI_1A ......iiiiiiiiii iiiiiiiiiiittttt Rt simm19:00 - case IF_BI_1B: // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00 + assert(loadLabel || isJump); + + if (id->idAddr()->iiaHasInstrCount()) + { + assert(ig != NULL); + int instrCount = id->idAddr()->iiaGetInstrCount(); + unsigned insNum = emitFindInsNum(ig, id); + if (instrCount < 0) { - assert(insOptsNone(id->idInsOpt())); - emitDispReg(id->idReg1(), size, true); + // Backward branches using instruction count must be within the same instruction group. + assert(insNum + 1 >= (unsigned)(-instrCount)); + } - if (fmt == IF_BI_1B) - { - emitDispImm(emitGetInsSC(id), true); - } + dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount)); + dstAddr = emitOffsetToPtr(dstOffs); + } + else + { + dstOffs = id->idAddr()->iiaIGlabel->igOffs; + dstAddr = emitOffsetToPtr(dstOffs); + } - if (id->idAddr()->iiaHasInstrCount()) - { - int instrCount = id->idAddr()->iiaGetInstrCount(); + distVal = (ssize_t)(dstAddr - srcAddr); - if (ig == nullptr) - { - printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount); - } - else - { - unsigned insNum = emitFindInsNum(ig, id); - UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1); - UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount); - ssize_t relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs)); - printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", (int)relOffs, (int)instrCount); - } - } - else if (id->idIsBound()) - { - emitPrintLabel(id->idAddr()->iiaIGlabel); - } - else - { - printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum); - } + if (dstOffs <= srcOffs) + { +#if DEBUG_EMIT + /* This is a backward jump - distance is known at this point */ + + if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + { + size_t blkOffs = id->idjIG->igOffs; + + if (INTERESTING_JUMP_NUM == 0) + printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum); + printf("[3] Jump block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj); + printf("[3] Jump is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj); + printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj); } - break; +#endif + } + else + { + /* This is a forward jump - distance will be an upper limit */ - case IF_BR_1A: // BR_1A ................ ......nnnnn..... Rn - assert(insOptsNone(id->idInsOpt())); - emitDispReg(id->idReg1(), size, false); - break; + emitFwdJumps = true; - case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn - // The size of a branch target is always EA_PTRSIZE - assert(insOptsNone(id->idInsOpt())); - emitDispReg(id->idReg3(), EA_PTRSIZE, false); - break; + /* The target offset will be closer by at least 'emitOffsAdj', but only if this + jump doesn't cross the hot-cold boundary. */ - case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) - case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 - case IF_LARGELDC: - case IF_LARGEADR: - assert(insOptsNone(id->idInsOpt())); - emitDispReg(id->idReg1(), size, true); - imm = emitGetInsSC(id); - targetName = nullptr; + if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs)) + { + dstOffs -= emitOffsAdj; + distVal -= emitOffsAdj; + } - /* Is this actually a reference to a data section? */ - if (fmt == IF_LARGEADR) - { - printf("(LARGEADR)"); - } - else if (fmt == IF_LARGELDC) - { - printf("(LARGELDC)"); - } + /* Record the location of the jump for later patching */ - printf("["); - if (id->idAddr()->iiaIsJitDataOffset()) - { - doffs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd); - /* Display a data section reference */ + id->idjOffs = dstOffs; - if (doffs & 1) - printf("@CNS%02u", doffs - 1); - else - printf("@RWD%02u", doffs); + /* Are we overflowing the id->idjOffs bitfield? */ + if (id->idjOffs != dstOffs) + IMPL_LIMITATION("Method is too large"); - if (imm != 0) - printf("%+Id", imm); - } - else - { - assert(imm == 0); - if (id->idIsReloc()) - { - printf("HIGH RELOC "); - emitDispImm((ssize_t)id->idAddr()->iiaAddr, false); - size_t targetHandle = id->idDebugOnlyInfo()->idMemCookie; +#if DEBUG_EMIT + if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0) + { + size_t blkOffs = id->idjIG->igOffs; + + if (INTERESTING_JUMP_NUM == 0) + printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum); + printf("[4] Jump block is at %08X\n", blkOffs); + printf("[4] Jump is at %08X\n", srcOffs); + printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs); + } +#endif + } #ifdef DEBUG - if (targetHandle == THT_InitializeArrayIntrinsics) - { - targetName = "InitializeArrayIntrinsics"; - } - else if (targetHandle == THT_GSCookieCheck) - { - targetName = "GlobalSecurityCookieCheck"; - } - else if (targetHandle == THT_SetGSCookie) - { - targetName = "SetGlobalSecurityCookie"; - } + if (0 && emitComp->verbose) + { + size_t sz = 4; + int distValSize = id->idjShort ? 4 : 8; + printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = 0x%08X\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd", + dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, distVal); + } #endif - } - else if (id->idIsBound()) - { - emitPrintLabel(id->idAddr()->iiaIGlabel); - } - else + + /* For forward jumps, record the address of the distance value */ + id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL; + + assert(insOptsNone(id->idInsOpt())); + + if (isJump) + { + if (id->idjShort) + { + // Short conditional/unconditional jump + assert(!id->idjKeepLong); + assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false); + assert((fmt == IF_BI_0A) || (fmt == IF_BI_0B) || (fmt == IF_BI_1A) || (fmt == IF_BI_1B)); + dst = emitOutputShortBranch(dst, ins, fmt, distVal, id); + } + else + { + // Long conditional/unconditional jump + + if (fmt == IF_LARGEJMP) + { + // This is a pseudo-instruction format representing a large conditional branch, to allow + // us to get a greater branch target range than we can get by using a straightforward conditional + // branch. It is encoded as a short conditional branch that branches around a long unconditional + // branch. + // + // Conceptually, we have: + // + // b L_target + // + // The code we emit is: + // + // b L_not // 4 bytes. Note that we reverse the condition. + // b L_target // 4 bytes + // L_not: + // + // Note that we don't actually insert any blocks: we simply encode "b L_not" as a branch with + // the correct offset. Note also that this works for both integer and floating-point conditions, because + // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example, + // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered). + + instruction reverseIns; + insFormat reverseFmt; + + switch (ins) { - printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum); + case INS_cbz: + reverseIns = INS_cbnz; + reverseFmt = IF_BI_1A; + break; + case INS_cbnz: + reverseIns = INS_cbz; + reverseFmt = IF_BI_1A; + break; + case INS_tbz: + reverseIns = INS_tbnz; + reverseFmt = IF_BI_1B; + break; + case INS_tbnz: + reverseIns = INS_tbz; + reverseFmt = IF_BI_1B; + break; + default: + reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins))); + reverseFmt = IF_BI_0B; } - } - printf("]"); - if (targetName != nullptr) - { - printf(" // [%s]", targetName); - } - else - { - emitDispCommentForHandle(id->idDebugOnlyInfo()->idMemCookie, 0, id->idDebugOnlyInfo()->idFlags); - } - break; - case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn - assert(insOptsNone(id->idInsOpt())); - assert((emitGetInsSC(id) == 0) || id->idIsTlsGD()); - emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); - emitDispAddrRI(id->idReg2(), id->idInsOpt(), 0); - break; + dst = emitOutputShortBranch(dst, + reverseIns, // reverse the conditional instruction + reverseFmt, 8, /* 8 bytes from start of this large conditional + pseudo-instruction to L_not. */ + id); - case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095) - assert(insOptsNone(id->idInsOpt())); - imm = emitGetInsSC(id); - scale = NaturalScale_helper(emitInsLoadStoreSize(id)); - imm <<= scale; // The immediate is scaled by the size of the ld/st - emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); - emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm); - break; + // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that. + ins = INS_b; + fmt = IF_BI_0A; - case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiPPnnnnnttttt Rt Rn imm(-256..+255) no/pre/post inc - assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt())); - imm = emitGetInsSC(id); - emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); - emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm); - break; + // The distVal was computed based on the beginning of the pseudo-instruction, + // So subtract the size of the conditional branch so that it is relative to the + // unconditional branch. + distVal -= 4; + } - case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn - case IF_LS_2E: // LS_2E .Q.............. ....ssnnnnnttttt Vt Rn - registerListSize = insGetRegisterListSize(id->idIns()); - emitDispVectorRegList(id->idReg1(), registerListSize, id->idInsOpt(), true); + assert(fmt == IF_BI_0A); + assert((distVal & 1) == 0); + code_t code = emitInsCode(ins, fmt); + const bool doRecordRelocation = emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs); - if (fmt == IF_LS_2D) + if (doRecordRelocation) { - // Load/Store multiple structures base register - // Load single structure and replicate base register - emitDispAddrRI(id->idReg2(), INS_OPTS_NONE, 0); + // dst isn't an actual final target location, just some intermediate + // location. Thus we cannot make any guarantees about distVal (not + // even the direction/sign). Instead we don't encode any offset and + // rely on the relocation to do all the work } else { - // Load/Store multiple structures post-indexed by an immediate - // Load single structure and replicate post-indexed by an immediate - emitDispAddrRI(id->idReg2(), INS_OPTS_POST_INDEX, id->idSmallCns()); + // Branch offset encodings are scaled by 4. + noway_assert((distVal & 3) == 0); + distVal >>= 2; + noway_assert(isValidSimm<26>(distVal)); + + // Insert offset into unconditional branch instruction + distVal &= 0x3FFFFFFLL; + code |= distVal; } - break; - case IF_LS_2F: // LS_2F .Q.............. xx.Sssnnnnnttttt Vt[] Rn - case IF_LS_2G: // LS_2G .Q.............. xx.Sssnnnnnttttt Vt[] Rn - registerListSize = insGetRegisterListSize(id->idIns()); - elemsize = id->idOpSize(); - emitDispVectorElemList(id->idReg1(), registerListSize, elemsize, id->idSmallCns(), true); + const unsigned instrSize = emitOutput_Instr(dst, code); - if (fmt == IF_LS_2F) - { - // Load/Store single structure base register - emitDispAddrRI(id->idReg2(), INS_OPTS_NONE, 0); - } - else + if (doRecordRelocation) { - // Load/Store single structure post-indexed by an immediate - emitDispAddrRI(id->idReg2(), INS_OPTS_POST_INDEX, (registerListSize * elemsize)); + assert(id->idjKeepLong); + if (emitComp->info.compMatchedVM) + { + void* target = emitOffsetToPtr(dstOffs); + emitRecordRelocation((void*)dst, target, IMAGE_REL_ARM64_BRANCH26); + } } - break; - case IF_LS_3A: // LS_3A .X.......X.mmmmm oooS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {} - assert(insOptsLSExtend(id->idInsOpt())); - emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); - if (id->idIsLclVar()) - { - emitDispAddrRRExt(id->idReg2(), codeGen->rsGetRsvdReg(), id->idInsOpt(), false, size); - } - else - { - emitDispAddrRRExt(id->idReg2(), id->idReg3(), id->idInsOpt(), id->idReg3Scaled(), size); - } - break; + dst += instrSize; + } + } + else if (loadLabel) + { + dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id); + } - case IF_LS_3B: // LS_3B X............... .aaaaannnnnddddd Rt Ra Rn - assert(insOptsNone(id->idInsOpt())); - assert(emitGetInsSC(id) == 0); - emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); - emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true); - emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0); - break; + return dst; +} - case IF_LS_3C: // LS_3C X.........iiiiii iaaaaannnnnddddd Rt Ra Rn imm(im7,sh) - assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt())); - imm = emitGetInsSC(id); - scale = NaturalScale_helper(emitInsLoadStoreSize(id)); - imm <<= scale; - emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); - emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true); - emitDispAddrRI(id->idReg3(), id->idInsOpt(), imm); - break; +/***************************************************************************** +* +* Output a short branch instruction. +*/ +BYTE* emitter::emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id) +{ + code_t code = emitInsCode(ins, fmt); - case IF_LS_3D: // LS_3D .X.......X.mmmmm ......nnnnnttttt Wm Rt Rn - assert(insOptsNone(id->idInsOpt())); - emitDispReg(id->idReg1(), EA_4BYTE, true); - emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true); - emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0); - break; + ssize_t loBits = (distVal & 3); + noway_assert(loBits == 0); + distVal >>= 2; // branch offset encodings are scaled by 4. - case IF_LS_3E: // LS_3E .X.........mmmmm ......nnnnnttttt Rm Rt Rn ARMv8.1 LSE Atomics - assert(insOptsNone(id->idInsOpt())); - assert((EA_SIZE(size) == 4) || (EA_SIZE(size) == 8)); - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0); - break; + if (fmt == IF_BI_0A) + { + // INS_b or INS_bl_local + noway_assert(isValidSimm<26>(distVal)); + distVal &= 0x3FFFFFFLL; + code |= distVal; + } + else if (fmt == IF_BI_0B) // BI_0B 01010100iiiiiiii iiiiiiiiiiiXXXXX simm19:00 + { + // INS_beq, INS_bne, etc... + noway_assert(isValidSimm<19>(distVal)); + distVal &= 0x7FFFFLL; + code |= distVal << 5; + } + else if (fmt == IF_BI_1A) // BI_1A X.......iiiiiiii iiiiiiiiiiittttt Rt simm19:00 + { + // INS_cbz or INS_cbnz + assert(id != nullptr); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rt(id->idReg1()); // ttttt - case IF_LS_3F: // LS_3F .Q.........mmmmm ....ssnnnnnttttt Vt Rn Rm - case IF_LS_3G: // LS_3G .Q.........mmmmm ...Sssnnnnnttttt Vt[] Rn Rm - registerListSize = insGetRegisterListSize(id->idIns()); + noway_assert(isValidSimm<19>(distVal)); + distVal &= 0x7FFFFLL; // 19 bits + code |= distVal << 5; + } + else if (fmt == IF_BI_1B) // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00 + { + // INS_tbz or INS_tbnz + assert(id != nullptr); + ssize_t imm = emitGetInsSC(id); + assert(isValidImmShift(imm, id->idOpSize())); - if (fmt == IF_LS_3F) - { - // Load/Store multiple structures post-indexed by a register - // Load single structure and replicate post-indexed by a register - emitDispVectorRegList(id->idReg1(), registerListSize, id->idInsOpt(), true); - } - else - { - // Load/Store single structure post-indexed by a register - elemsize = id->idOpSize(); - emitDispVectorElemList(id->idReg1(), registerListSize, elemsize, id->idSmallCns(), true); - } + if (imm & 0x20) // test bit 32-63 ? + { + code |= 0x80000000; // B + } + code |= ((imm & 0x1F) << 19); // bbbbb + code |= insEncodeReg_Rt(id->idReg1()); // ttttt - printf("["); - emitDispReg(encodingZRtoSP(id->idReg2()), EA_8BYTE, false); - printf("], "); - emitDispReg(id->idReg3(), EA_8BYTE, false); - break; + noway_assert(isValidSimm<14>(distVal)); + distVal &= 0x3FFFLL; // 14 bits + code |= distVal << 5; + } + else + { + assert(!"Unknown fmt for emitOutputShortBranch"); + } - case IF_DI_1A: // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh) - emitDispReg(id->idReg1(), size, true); - emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12); - emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); - break; + dst += emitOutput_Instr(dst, code); + + return dst; +} + +/***************************************************************************** +* +* Output a short address instruction. +*/ +BYTE* emitter::emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg) +{ + ssize_t loBits = (distVal & 3); + distVal >>= 2; + + code_t code = emitInsCode(ins, fmt); + if (fmt == IF_DI_1E) // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 + { + // INS_adr or INS_adrp + code |= insEncodeReg_Rd(reg); // ddddd + + noway_assert(isValidSimm<19>(distVal)); + distVal &= 0x7FFFFLL; // 19 bits + code |= distVal << 5; + code |= loBits << 29; // 2 bits + } + else + { + assert(!"Unknown fmt for emitOutputShortAddress"); + } - case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw) - emitDispReg(id->idReg1(), size, true); - hwi.immHWVal = (unsigned)emitGetInsSC(id); - if (ins == INS_mov) - { - emitDispImm(emitDecodeHalfwordImm(hwi, size), false); - } - else // movz, movn, movk - { - emitDispImm(hwi.immVal, false); - if (hwi.immHW != 0) - { - emitDispShiftOpts(INS_OPTS_LSL); - emitDispImm(hwi.immHW * 16, false); - } - } - emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); - break; + dst += emitOutput_Instr(dst, code); - case IF_DI_1C: // DI_1C X........Nrrrrrr ssssssnnnnn..... Rn imm(N,r,s) - emitDispReg(id->idReg1(), size, true); - bmi.immNRS = (unsigned)emitGetInsSC(id); - emitDispImm(emitDecodeBitMaskImm(bmi, size), false); - emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); - break; + return dst; +} - case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s) - emitDispReg(encodingZRtoSP(id->idReg1()), size, true); - bmi.immNRS = (unsigned)emitGetInsSC(id); - emitDispImm(emitDecodeBitMaskImm(bmi, size), false); - emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); - break; +/***************************************************************************** +* +* Output a short constant instruction. +*/ +BYTE* emitter::emitOutputShortConstant( + BYTE* dst, instruction ins, insFormat fmt, ssize_t imm, regNumber reg, emitAttr opSize) +{ + code_t code = emitInsCode(ins, fmt); - case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh) - if ((ins == INS_add) || (ins == INS_sub)) - { - emitDispReg(encodingZRtoSP(id->idReg1()), size, true); - emitDispReg(encodingZRtoSP(id->idReg2()), size, true); - } - else - { - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - } - if (id->idIsReloc()) - { - assert(ins == INS_add); - printf("[LOW RELOC "); - emitDispImm((ssize_t)id->idAddr()->iiaAddr, false); - printf("]"); - } - else - { - emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12); - } - break; + if (fmt == IF_LS_1A) + { + // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt simm21 + // INS_ldr or INS_ldrsw (PC-Relative) - case IF_DI_2B: // DI_2B X........X.nnnnn ssssssnnnnnddddd Rd Rn imm(0-63) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispImm(emitGetInsSC(id), false); - break; + ssize_t loBits = (imm & 3); + noway_assert(loBits == 0); + ssize_t distVal = imm >> 2; // load offset encodings are scaled by 4. - case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s) - if (ins == INS_ands) - { - emitDispReg(id->idReg1(), size, true); - } - else + noway_assert(isValidSimm<19>(distVal)); + + // Is the target a vector register? + if (isVectorRegister(reg)) + { + code |= insEncodeDatasizeVLS(code, opSize); // XX V + code |= insEncodeReg_Vt(reg); // ttttt + } + else + { + assert(isGeneralRegister(reg)); + // insEncodeDatasizeLS is not quite right for this case. + // So just specialize it. + if ((ins == INS_ldr) && (opSize == EA_8BYTE)) { - emitDispReg(encodingZRtoSP(id->idReg1()), size, true); + // set the operation size in bit 30 + code |= 0x40000000; } - emitDispReg(id->idReg2(), size, true); - bmi.immNRS = (unsigned)emitGetInsSC(id); - emitDispImm(emitDecodeBitMaskImm(bmi, size), false); - break; - case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, ims (N,r,s) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); + code |= insEncodeReg_Rt(reg); // ttttt + } - imm = emitGetInsSC(id); - bmi.immNRS = (unsigned)imm; + distVal &= 0x7FFFFLL; // 19 bits + code |= distVal << 5; + } + else if (fmt == IF_LS_2B) + { + // ldr Rt,[Xn+pimm12] LS_2B 1X11100101iiiiii iiiiiinnnnnttttt B940 0000 imm(0-4095<<{2,3}) + // INS_ldr or INS_ldrsw (PC-Relative) + noway_assert(isValidUimm<12>(imm)); + assert(isGeneralRegister(reg)); - switch (ins) + if (opSize == EA_8BYTE) + { + // insEncodeDatasizeLS is not quite right for this case. + // So just specialize it. + if (ins == INS_ldr) { - case INS_bfm: - case INS_sbfm: - case INS_ubfm: - emitDispImm(bmi.immR, true); - emitDispImm(bmi.immS, false); - break; - - case INS_bfi: - case INS_sbfiz: - case INS_ubfiz: - emitDispImm(getBitWidth(size) - bmi.immR, true); - emitDispImm(bmi.immS + 1, false); - break; + // set the operation size in bit 30 + code |= 0x40000000; + } + // Low 3 bits should be 0 -- 8 byte JIT data should be aligned on 8 byte. + assert((imm & 7) == 0); + imm >>= 3; + } + else + { + assert(opSize == EA_4BYTE); + // Low 2 bits should be 0 -- 4 byte aligned data. + assert((imm & 3) == 0); + imm >>= 2; + } - case INS_bfxil: - case INS_sbfx: - case INS_ubfx: - emitDispImm(bmi.immR, true); - emitDispImm(bmi.immS - bmi.immR + 1, false); - break; + code |= insEncodeReg_Rt(reg); // ttttt + code |= insEncodeReg_Rn(reg); // nnnnn + code |= imm << 10; + } + else + { + assert(!"Unknown fmt for emitOutputShortConstant"); + } - case INS_asr: - case INS_lsr: - case INS_lsl: - emitDispImm(imm, false); - break; + dst += emitOutput_Instr(dst, code); - default: - assert(!"Unexpected instruction in IF_DI_2D"); - } + return dst; +} - break; +/***************************************************************************** + * + * Output instructions to load a constant into a vector register. + */ +BYTE* emitter::emitOutputVectorConstant( + BYTE* dst, ssize_t imm, regNumber dstReg, regNumber addrReg, emitAttr opSize, emitAttr elemSize) +{ + // add addrReg, addrReg, page offs -- compute address = page addr + page offs. + code_t code = emitInsCode(INS_add, IF_DI_2A); // DI_2A X0010001shiiiiii iiiiiinnnnnddddd 1100 0000 imm(i12, sh) + code |= insEncodeDatasize(EA_8BYTE); // X - use EA_8BYTE, as we are calculating 64-bit address + code |= ((code_t)imm << 10); // iiiiiiiiiiii + code |= insEncodeReg_Rd(addrReg); // ddddd + code |= insEncodeReg_Rn(addrReg); // nnnnn + dst += emitOutput_Instr(dst, code); - case IF_DI_1F: // DI_1F X..........iiiii cccc..nnnnn.nzcv Rn imm5 nzcv cond - emitDispReg(id->idReg1(), size, true); - cfi.immCFVal = (unsigned)emitGetInsSC(id); - emitDispImm(cfi.imm5, true); - emitDispFlags(cfi.flags); - emitDispComma(); - emitDispCond(cfi.cond); - break; + // ld1 dstReg, addrReg -- load constant at address in addrReg into dstReg. + code = emitInsCode(INS_ld1, IF_LS_2D); // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn + code |= insEncodeVectorsize(opSize); // Q + code |= insEncodeVLSElemsize(elemSize); // ss + code |= insEncodeReg_Rn(addrReg); // nnnnn + code |= insEncodeReg_Vt(dstReg); // ttttt + dst += emitOutput_Instr(dst, code); - case IF_DR_1D: // DR_1D X............... cccc.......mmmmm Rd cond - emitDispReg(id->idReg1(), size, true); - cfi.immCFVal = (unsigned)emitGetInsSC(id); - emitDispCond(cfi.cond); - break; + return dst; +} - case IF_DR_2A: // DR_2A X..........mmmmm ......nnnnn..... Rn Rm - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, false); - break; +/***************************************************************************** + * + * Output a call instruction. + */ - case IF_DR_2B: // DR_2B X.......sh.mmmmm ssssssnnnnn..... Rn Rm {LSL,LSR,ASR,ROR} imm(0-63) - emitDispReg(id->idReg1(), size, true); - emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size); - break; +unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code) +{ + const unsigned char callInstrSize = sizeof(code_t); // 4 bytes + regMaskTP gcrefRegs; + regMaskTP byrefRegs; - case IF_DR_2C: // DR_2C X..........mmmmm ooosssnnnnn..... Rn Rm ext(Rm) LSL imm(0-4) - emitDispReg(encodingZRtoSP(id->idReg1()), size, true); - imm = emitGetInsSC(id); - emitDispExtendReg(id->idReg2(), id->idInsOpt(), imm); - break; + VARSET_TP GCvars(VarSetOps::UninitVal()); - case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - cfi.immCFVal = (unsigned)emitGetInsSC(id); - emitDispCond(cfi.cond); - break; + // Is this a "fat" call descriptor? + if (id->idIsLargeCall()) + { + instrDescCGCA* idCall = (instrDescCGCA*)id; + gcrefRegs = idCall->idcGcrefRegs; + byrefRegs = idCall->idcByrefRegs; + VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars); + } + else + { + assert(!id->idIsLargeDsp()); + assert(!id->idIsLargeCns()); - case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm - case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Sd Sn - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, false); - break; + gcrefRegs = emitDecodeCallGCregs(id); + byrefRegs = 0; + VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp)); + } - case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63) - emitDispReg(id->idReg1(), size, true); - emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size); - break; + /* We update the GC info before the call as the variables cannot be + used by the call. Killing variables before the call helps with + boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029. + If we ever track aliased variables (which could be used by the + call), we would have to keep them alive past the call. */ - case IF_DR_2G: // DR_2G X............... ......nnnnnddddd Rd Rn - emitDispReg(encodingZRtoSP(id->idReg1()), size, true); - emitDispReg(encodingZRtoSP(id->idReg2()), size, false); - break; + emitUpdateLiveGCvars(GCvars, dst); - case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn - if ((ins == INS_uxtb) || (ins == INS_uxth)) - { - // There is no 64-bit variant of uxtb and uxth - // However, we allow idOpSize() to have EA_8BYTE value for these instruction - emitDispReg(id->idReg1(), EA_4BYTE, true); - emitDispReg(id->idReg2(), EA_4BYTE, false); - } - else - { - emitDispReg(id->idReg1(), size, true); - // sxtb, sxth and sxtb always operate on 32-bit source register - emitDispReg(id->idReg2(), EA_4BYTE, false); - } - break; +#ifdef DEBUG + // Output any delta in GC variable info, corresponding to the before-call GC var updates done above. + if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) + { + emitDispGCVarDelta(); + } +#endif // DEBUG - case IF_DR_2I: // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - cfi.immCFVal = (unsigned)emitGetInsSC(id); - emitDispFlags(cfi.flags); - emitDispComma(); - emitDispCond(cfi.cond); - break; + // Now output the call instruction and update the 'dst' pointer + // + unsigned outputInstrSize = emitOutput_Instr(dst, code); + dst += outputInstrSize; - case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnmmmmm Rd Rn Rm - if ((ins == INS_add) || (ins == INS_sub)) - { - emitDispReg(encodingZRtoSP(id->idReg1()), size, true); - emitDispReg(encodingZRtoSP(id->idReg2()), size, true); - } - else if ((ins == INS_smulh) || (ins == INS_umulh)) - { - size = EA_8BYTE; - // smulh Xd, Xn, Xm - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - } - else if ((ins == INS_smull) || (ins == INS_umull) || (ins == INS_smnegl) || (ins == INS_umnegl)) - { - // smull Xd, Wn, Wm - emitDispReg(id->idReg1(), EA_8BYTE, true); - size = EA_4BYTE; - emitDispReg(id->idReg2(), size, true); - } - else - { - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - } + // All call instructions are 4-byte in size on ARM64 + // + assert(outputInstrSize == callInstrSize); - if (id->idIsLclVar()) - { - emitDispReg(codeGen->rsGetRsvdReg(), size, false); - } - else - { - emitDispReg(id->idReg3(), size, false); - } + // If the method returns a GC ref, mark INTRET (R0) appropriately. + if (id->idGCref() == GCT_GCREF) + { + gcrefRegs |= RBM_INTRET; + } + else if (id->idGCref() == GCT_BYREF) + { + byrefRegs |= RBM_INTRET; + } - break; + // If is a multi-register return method is called, mark INTRET_1 (X1) appropriately + if (id->idIsLargeCall()) + { + instrDescCGCA* idCall = (instrDescCGCA*)id; + if (idCall->idSecondGCref() == GCT_GCREF) + { + gcrefRegs |= RBM_INTRET_1; + } + else if (idCall->idSecondGCref() == GCT_BYREF) + { + byrefRegs |= RBM_INTRET_1; + } + } - case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispShiftedReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id), size); - break; + // If the GC register set has changed, report the new set. + if (gcrefRegs != emitThisGCrefRegs) + { + emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst); + } + // If the Byref register set has changed, report the new set. + if (byrefRegs != emitThisByrefRegs) + { + emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst); + } - case IF_DR_3C: // DR_3C X..........mmmmm ooosssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4) - emitDispReg(encodingZRtoSP(id->idReg1()), size, true); - emitDispReg(encodingZRtoSP(id->idReg2()), size, true); - imm = emitGetInsSC(id); - emitDispExtendReg(id->idReg3(), id->idInsOpt(), imm); - break; + // Some helper calls may be marked as not requiring GC info to be recorded. + if ((!id->idIsNoGC())) + { + // On ARM64, as on AMD64, we don't change the stack pointer to push/pop args. + // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism + // to record the call for GC info purposes. (It might be best to use an alternate call, + // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.) + emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0); - case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnmmmmm Rd Rn Rm cond - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispReg(id->idReg3(), size, true); - cfi.immCFVal = (unsigned)emitGetInsSC(id); - emitDispCond(cfi.cond); - break; + // Do we need to record a call location for GC purposes? + // + if (!emitFullGCinfo) + { + emitRecordGCcall(dst, callInstrSize); + } + } + return callInstrSize; +} - case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispReg(id->idReg3(), size, true); - emitDispImm(emitGetInsSC(id), false); - break; +/***************************************************************************** + * + * Emit a 32-bit Arm64 instruction + */ - case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnmmmmm Rd Rn Rm Ra - if ((ins == INS_smaddl) || (ins == INS_smsubl) || (ins == INS_umaddl) || (ins == INS_umsubl)) - { - // smaddl Xd, Wn, Wm, Xa - emitDispReg(id->idReg1(), EA_8BYTE, true); - emitDispReg(id->idReg2(), EA_4BYTE, true); - emitDispReg(id->idReg3(), EA_4BYTE, true); - emitDispReg(id->idReg4(), EA_8BYTE, false); - } - else - { - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispReg(id->idReg3(), size, true); - emitDispReg(id->idReg4(), size, false); - } - break; +unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code) +{ + assert(sizeof(code_t) == 4); + BYTE* dstRW = dst + writeableOffset; + *((code_t*)dstRW) = code; - case IF_DV_1A: // DV_1A .........X.iiiii iii........ddddd Vd imm8 (fmov - immediate scalar) - elemsize = id->idOpSize(); - emitDispReg(id->idReg1(), elemsize, true); - emitDispFloatImm(emitGetInsSC(id)); - break; + return sizeof(code_t); +} - case IF_DV_1B: // DV_1B .QX..........iii cmod..iiiiiddddd Vd imm8 (immediate vector) - imm = emitGetInsSC(id) & 0x0ff; - immShift = (emitGetInsSC(id) & 0x700) >> 8; - hasShift = (immShift != 0); - elemsize = optGetElemsize(id->idInsOpt()); - if (id->idInsOpt() == INS_OPTS_1D) - { - assert(elemsize == size); - emitDispReg(id->idReg1(), size, true); - } - else - { - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - } - if (ins == INS_fmov) - { - emitDispFloatImm(imm); - assert(hasShift == false); - } - else - { - if (elemsize == EA_8BYTE) - { - assert(ins == INS_movi); - ssize_t imm64 = 0; - const ssize_t mask8 = 0xFF; - for (unsigned b = 0; b < 8; b++) - { - if (imm & (ssize_t{1} << b)) - { - imm64 |= (mask8 << (b * 8)); - } - } - emitDispImm(imm64, hasShift, true); - } - else - { - emitDispImm(imm, hasShift, true); - } - if (hasShift) - { - insOpts opt = (immShift & 0x4) ? INS_OPTS_MSL : INS_OPTS_LSL; - unsigned shift = (immShift & 0x3) * 8; - emitDispShiftOpts(opt); - emitDispImm(shift, false); - } - } +/***************************************************************************** +* + * Append the machine code corresponding to the given instruction descriptor + * to the code block at '*dp'; the base of the code block is 'bp', and 'ig' + * is the instruction group that contains the instruction. Updates '*dp' to + * point past the generated code, and returns the size of the instruction + * descriptor in bytes. + */ + +size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) +{ + BYTE* dst = *dp; + BYTE* odst = dst; + code_t code = 0; + size_t sz = emitGetInstrDescSize(id); // TODO-ARM64-Cleanup: on ARM, this is set in each case. why? + instruction ins = id->idIns(); + insFormat fmt = id->idInsFmt(); + emitAttr size = id->idOpSize(); + +#ifdef DEBUG +#if DUMP_GC_TABLES + bool dspOffs = emitComp->opts.dspGCtbls; +#else + bool dspOffs = !emitComp->opts.disDiffable; +#endif +#endif // DEBUG + + assert(REG_NA == (int)REG_NA); + + /* What instruction format have we got? */ + + switch (fmt) + { + ssize_t imm; + ssize_t index; + ssize_t index2; + unsigned cmode; + unsigned immShift; + emitAttr elemsize; + emitAttr datasize; + + case IF_BI_0A: // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 + case IF_BI_0B: // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00 + case IF_LARGEJMP: + assert(id->idGCref() == GCT_NONE); + assert(id->idIsBound()); + dst = emitOutputLJ(ig, dst, id); + sz = sizeof(instrDescJmp); break; - case IF_DV_1C: // DV_1C .........X...... ......nnnnn..... Vn #0.0 (fcmp - with zero) - elemsize = id->idOpSize(); - emitDispReg(id->idReg1(), elemsize, true); - emitDispFloatZero(); + case IF_BI_0C: // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 + code = emitInsCode(ins, fmt); + sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc); + dst += emitOutputCall(ig, dst, id, code); + // Always call RecordRelocation so that we wire in a JumpStub when we don't reach + emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_BRANCH26); break; - case IF_DV_2A: // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvt - vector) - if (emitInsIsVectorLong(ins)) - { - emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); - } - else if (emitInsIsVectorNarrow(ins)) - { - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), false); - } - else - { - assert(!emitInsIsVectorWide(ins)); - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); - } - if (ins == INS_fcmeq || ins == INS_fcmge || ins == INS_fcmgt || ins == INS_fcmle || ins == INS_fcmlt) - { - emitDispComma(); - emitDispFloatZero(); - } + case IF_BI_1A: // BI_1A ......iiiiiiiiii iiiiiiiiiiittttt Rt simm19:00 + assert(insOptsNone(id->idInsOpt())); + assert(id->idIsBound()); + + dst = emitOutputLJ(ig, dst, id); + sz = sizeof(instrDescJmp); break; - case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1) - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + case IF_BI_1B: // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00 + assert(insOptsNone(id->idInsOpt())); + assert(id->idIsBound()); + + dst = emitOutputLJ(ig, dst, id); + sz = sizeof(instrDescJmp); break; - case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) - if (emitInsIsVectorNarrow(ins)) - { - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), false); - } - else - { - assert(!emitInsIsVectorLong(ins) && !emitInsIsVectorWide(ins)); - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); - } - if (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt) - { - emitDispComma(); - emitDispImm(0, false); - } + case IF_BR_1A: // BR_1A ................ ......nnnnn..... Rn + assert(insOptsNone(id->idInsOpt())); + assert((ins == INS_ret) || (ins == INS_br)); + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + + dst += emitOutput_Instr(dst, code); break; - case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar) - elemsize = id->idOpSize(); - if (emitInsIsVectorLong(ins)) - { - emitDispReg(id->idReg1(), widenDatasize(elemsize), true); - emitDispReg(id->idReg2(), elemsize, true); - } - else if (emitInsIsVectorNarrow(ins)) + case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn + assert(insOptsNone(id->idInsOpt())); + assert((ins == INS_br_tail) || (ins == INS_blr)); + code = emitInsCode(ins, fmt); + + if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idIsTlsGD()) { - emitDispReg(id->idReg1(), elemsize, true); - emitDispReg(id->idReg2(), widenDatasize(elemsize), true); + emitRecordRelocation(odst, (CORINFO_METHOD_HANDLE)id->idAddr()->iiaAddr, + IMAGE_REL_AARCH64_TLSDESC_CALL); + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn } else { - assert(!emitInsIsVectorWide(ins)); - emitDispReg(id->idReg1(), elemsize, true); - emitDispReg(id->idReg2(), elemsize, true); + code |= insEncodeReg_Rn(id->idReg3()); // nnnnn } - imm = emitGetInsSC(id); - emitDispImm(imm, false); + dst += emitOutputCall(ig, dst, id, code); + sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc); break; - case IF_DV_2O: // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector) - if ((ins == INS_sxtl) || (ins == INS_sxtl2) || (ins == INS_uxtl) || (ins == INS_uxtl2)) - { - assert((emitInsIsVectorLong(ins))); - emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); - } - else - { - if (emitInsIsVectorLong(ins)) - { - emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - } - else if (emitInsIsVectorNarrow(ins)) - { - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true); - } - else - { - assert(!emitInsIsVectorWide(ins)); - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - } + case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) + case IF_LARGELDC: + assert(insOptsNone(id->idInsOpt())); + assert(id->idIsBound()); - imm = emitGetInsSC(id); - emitDispImm(imm, false); - } + dst = emitOutputLJ(ig, dst, id); + sz = sizeof(instrDescJmp); break; - case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general) - srcsize = id->idOpSize(); - index = emitGetInsSC(id); - if (ins == INS_smov) - { - dstsize = EA_8BYTE; - } - else // INS_umov or INS_mov + case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn + assert(insOptsNone(id->idInsOpt())); + code = emitInsCode(ins, fmt); + // Is the target a vector register? + if (isVectorRegister(id->idReg1())) { - dstsize = (srcsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE; + code &= 0x3FFFFFFF; // clear the size bits + code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX + code |= insEncodeReg_Vt(id->idReg1()); // ttttt } - emitDispReg(id->idReg1(), dstsize, true); - emitDispVectorRegIndex(id->idReg2(), srcsize, index, false); - break; - - case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general) - if (ins == INS_dup) + else { - datasize = id->idOpSize(); - assert(isValidVectorDatasize(datasize)); - assert(isValidArrangement(datasize, id->idInsOpt())); - elemsize = optGetElemsize(id->idInsOpt()); - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X + code |= insEncodeReg_Rt(id->idReg1()); // ttttt } - else // INS_ins + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); + if (id->idIsTlsGD()) { - elemsize = id->idOpSize(); - index = emitGetInsSC(id); - assert(isValidVectorElemsize(elemsize)); - emitDispVectorRegIndex(id->idReg1(), elemsize, index, true); + emitRecordRelocation(odst, (void*)emitGetInsSC(id), IMAGE_REL_AARCH64_TLSDESC_LD64_LO12); } - emitDispReg(id->idReg2(), (elemsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE, false); - break; - - case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector) - datasize = id->idOpSize(); - assert(isValidVectorDatasize(datasize)); - assert(isValidArrangement(datasize, id->idInsOpt())); - elemsize = optGetElemsize(id->idInsOpt()); - index = emitGetInsSC(id); - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorRegIndex(id->idReg2(), elemsize, index, false); - break; - - case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar) - elemsize = id->idOpSize(); - index = emitGetInsSC(id); - emitDispReg(id->idReg1(), elemsize, true); - emitDispVectorRegIndex(id->idReg2(), elemsize, index, false); - break; - - case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element) - imm = emitGetInsSC(id); - index = (imm >> 4) & 0xf; - index2 = imm & 0xf; - elemsize = id->idOpSize(); - emitDispVectorRegIndex(id->idReg1(), elemsize, index, true); - emitDispVectorRegIndex(id->idReg2(), elemsize, index2, false); break; - case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register) - case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp) - case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar) - size = id->idOpSize(); - if ((ins == INS_fcmeq) || (ins == INS_fcmge) || (ins == INS_fcmgt) || (ins == INS_fcmle) || - (ins == INS_fcmlt)) - { - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispFloatZero(); - } - else if (emitInsIsVectorNarrow(ins)) + case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095) + assert(insOptsNone(id->idInsOpt())); + imm = emitGetInsSC(id); + assert(isValidUimm<12>(imm)); + code = emitInsCode(ins, fmt); + // Is the target a vector register? + if (isVectorRegister(id->idReg1())) { - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), widenDatasize(size), false); + code &= 0x3FFFFFFF; // clear the size bits + code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX + code |= insEncodeReg_Vt(id->idReg1()); // ttttt } else { - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, false); + code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X + code |= insEncodeReg_Rt(id->idReg1()); // ttttt } - if (fmt == IF_DV_2L && - (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt)) + code |= ((code_t)imm << 10); // iiiiiiiiiiii + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); + break; + + case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiPPnnnnnttttt Rt Rn imm(-256..+255) no/pre/post inc + assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt())); + imm = emitGetInsSC(id); + assert((imm >= -256) && (imm <= 255)); // signed 9 bits + imm &= 0x1ff; // force into unsigned 9 bit representation + code = emitInsCode(ins, fmt); + // Is the target a vector register? + if (isVectorRegister(id->idReg1())) { - emitDispComma(); - emitDispImm(0, false); + code &= 0x3FFFFFFF; // clear the size bits + code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX + code |= insEncodeReg_Vt(id->idReg1()); // ttttt + } + else + { + code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X + code |= insEncodeReg_Rt(id->idReg1()); // ttttt } + code |= insEncodeIndexedOpt(id->idInsOpt()); // PP + code |= ((code_t)imm << 12); // iiiiiiiii + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov, fcvtXX - to general) - case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov, Xcvtf - from general) - case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt) - dstsize = optGetDstsize(id->idInsOpt()); - srcsize = optGetSrcsize(id->idInsOpt()); + case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn + case IF_LS_2E: // LS_2E .Q.............. ....ssnnnnnttttt Vt Rn + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); - emitDispReg(id->idReg1(), dstsize, true); - emitDispReg(id->idReg2(), srcsize, false); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeVLSElemsize(elemsize); // ss + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vt(id->idReg1()); // ttttt + + dst += emitOutput_Instr(dst, code); break; - case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, - // fminp - scalar) - case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) - case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) - case IF_DV_2T: // DV_2T .Q......XX...... ......nnnnnddddd Sd Vn (addv, saddlv, smaxv, sminv, uaddlv, - // umaxv, uminv) - if ((ins == INS_sadalp) || (ins == INS_saddlp) || (ins == INS_uadalp) || (ins == INS_uaddlp)) + case IF_LS_2F: // LS_2F .Q.............. xx.Sssnnnnnttttt Vt[] Rn + case IF_LS_2G: // LS_2G .Q.............. xx.Sssnnnnnttttt Vt[] Rn + elemsize = id->idOpSize(); + index = id->idSmallCns(); + code = emitInsCode(ins, fmt); + + code |= insEncodeVLSIndex(elemsize, index); // Q xx S ss + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vt(id->idReg1()); // ttttt + + dst += emitOutput_Instr(dst, code); + break; + + case IF_LS_3A: // LS_3A .X.......X.mmmmm oooS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {} + assert(insOptsLSExtend(id->idInsOpt())); + code = emitInsCode(ins, fmt); + // Is the target a vector register? + if (isVectorRegister(id->idReg1())) { - emitDispVectorReg(id->idReg1(), optWidenDstArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + code &= 0x3FFFFFFF; // clear the size bits + code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX + code |= insEncodeReg_Vt(id->idReg1()); // ttttt } else { - if ((ins == INS_saddlv) || (ins == INS_uaddlv)) - { - elemsize = optGetElemsize(optWidenDstArrangement(id->idInsOpt())); - } - else - { - elemsize = optGetElemsize(id->idInsOpt()); - } - emitDispReg(id->idReg1(), elemsize, true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X + code |= insEncodeReg_Rt(id->idReg1()); // ttttt } - break; - - case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) - if ((ins == INS_sdot) || (ins == INS_udot)) + code |= insEncodeExtend(id->idInsOpt()); // ooo + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + if (id->idIsLclVar()) { - // sdot/udot Vd.2s, Vn.8b, Vm.8b - // sdot/udot Vd.4s, Vn.16b, Vm.16b - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - size = id->idOpSize(); - emitDispVectorReg(id->idReg2(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, true); - emitDispVectorReg(id->idReg3(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, false); + code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm } - else if (((ins == INS_pmull) && (id->idInsOpt() == INS_OPTS_1D)) || - ((ins == INS_pmull2) && (id->idInsOpt() == INS_OPTS_2D))) + else { - // pmull Vd.1q, Vn.1d, Vm.1d - // pmull2 Vd.1q, Vn.2d, Vm.2d - printf("%s.1q, ", emitVectorRegName(id->idReg1())); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + code |= insEncodeReg3Scale(id->idReg3Scaled()); // S + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm } - else if (emitInsIsVectorNarrow(ins)) + dst += emitOutput_Instr(dst, code); + break; + + case IF_LS_3B: // LS_3B X............... .aaaaannnnnddddd Rd Ra Rn + assert(insOptsNone(id->idInsOpt())); + code = emitInsCode(ins, fmt); + // Is the target a vector register? + if (isVectorRegister(id->idReg1())) { - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg3(), optWidenElemsizeArrangement(id->idInsOpt()), false); + code &= 0x3FFFFFFF; // clear the size bits + code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX + code |= insEncodeReg_Vt(id->idReg1()); // ttttt + code |= insEncodeReg_Va(id->idReg2()); // aaaaa } else { - if (emitInsIsVectorLong(ins)) - { - emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - } - else if (emitInsIsVectorWide(ins)) - { - emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true); - } - else - { - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - } - - emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rt(id->idReg1()); // ttttt + code |= insEncodeReg_Ra(id->idReg2()); // aaaaa } + code |= insEncodeReg_Rn(id->idReg3()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) - if ((ins == INS_sdot) || (ins == INS_udot)) + case IF_LS_3C: // LS_3C X......PP.iiiiii iaaaaannnnnddddd Rd Ra Rn imm(im7,sh) + assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt())); + imm = emitGetInsSC(id); + assert((imm >= -64) && (imm <= 63)); // signed 7 bits + imm &= 0x7f; // force into unsigned 7 bit representation + code = emitInsCode(ins, fmt); + // Is the target a vector register? + if (isVectorRegister(id->idReg1())) { - // sdot/udot Vd.2s, Vn.8b, Vm.4b[index] - // sdot/udot Vd.4s, Vn.16b, Vm.4b[index] - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - size = id->idOpSize(); - emitDispVectorReg(id->idReg2(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, true); - index = emitGetInsSC(id); - printf("%s.4b[%d]", emitVectorRegName(id->idReg3()), (int)index); + code &= 0x3FFFFFFF; // clear the size bits + code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX + code |= insEncodeReg_Vt(id->idReg1()); // ttttt + code |= insEncodeReg_Va(id->idReg2()); // aaaaa } else { - if (emitInsIsVectorLong(ins)) - { - emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - } - else if (emitInsIsVectorWide(ins)) - { - emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); - emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true); - } - else - { - assert(!emitInsIsVectorNarrow(ins)); - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - } - - elemsize = optGetElemsize(id->idInsOpt()); - index = emitGetInsSC(id); - emitDispVectorRegIndex(id->idReg3(), elemsize, index, false); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rt(id->idReg1()); // ttttt + code |= insEncodeReg_Ra(id->idReg2()); // aaaaa } + code |= insEncodePairIndexedOpt(ins, id->idInsOpt()); // PP + code |= ((code_t)imm << 15); // iiiiiiiii + code |= insEncodeReg_Rn(id->idReg3()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - case IF_DV_3B: // DV_3B .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + case IF_LS_3D: // LS_3D .X.......X.mmmmm ......nnnnnttttt Wm Rt Rn + code = emitInsCode(ins, fmt); + // Arm64 store exclusive unpredictable cases + assert(id->idReg1() != id->idReg2()); + assert(id->idReg1() != id->idReg3()); + code |= insEncodeDatasizeLS(code, id->idOpSize()); // X + code |= insEncodeReg_Rm(id->idReg1()); // mmmmm + code |= insEncodeReg_Rt(id->idReg2()); // ttttt + code |= insEncodeReg_Rn(id->idReg3()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - switch (ins) + case IF_LS_3E: // LS_3E .X.........mmmmm ......nnnnnttttt Rm Rt Rn ARMv8.1 LSE Atomics + code = emitInsCode(ins, fmt); + code |= insEncodeDatasizeLS(code, id->idOpSize()); // X + code |= insEncodeReg_Rm(id->idReg1()); // mmmmm + code |= insEncodeReg_Rt(id->idReg2()); // ttttt + code |= insEncodeReg_Rn(id->idReg3()); // nnnnn + dst += emitOutput_Instr(dst, code); + + // Some instructions with this encoding return their result in the + // second operand register instead of the first so we special case + // the GC update here and skip the common path down below. + if (emitInsDestIsOp2(ins)) { - case INS_tbl: - case INS_tbl_2regs: - case INS_tbl_3regs: - case INS_tbl_4regs: - case INS_tbx: - case INS_tbx_2regs: - case INS_tbx_3regs: - case INS_tbx_4regs: - registerListSize = insGetRegisterListSize(ins); - emitDispVectorRegList(id->idReg2(), registerListSize, INS_OPTS_16B, true); - break; - case INS_mov: - break; - default: - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - break; + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), id->idReg2(), dst); + } + else + { + emitGCregDeadUpd(id->idReg2(), dst); + } + + goto SKIP_GC_UPDATE; } - emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); - break; - case IF_DV_3BI: // DV_3BI .Q........Lmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - elemsize = optGetElemsize(id->idInsOpt()); - emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false); break; - case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispReg(id->idReg3(), size, false); - break; + case IF_LS_3F: // LS_3F .Q.........mmmmm ....ssnnnnnttttt Vt Rn Rm + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); - case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) - if (emitInsIsVectorLong(ins)) - { - emitDispReg(id->idReg1(), widenDatasize(size), true); - } - else - { - assert(!emitInsIsVectorNarrow(ins) && !emitInsIsVectorWide(ins)); - emitDispReg(id->idReg1(), size, true); - } + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + code |= insEncodeVLSElemsize(elemsize); // ss + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vt(id->idReg1()); // ttttt - emitDispReg(id->idReg2(), size, true); - emitDispReg(id->idReg3(), size, false); + dst += emitOutput_Instr(dst, code); break; - case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) - if (emitInsIsVectorLong(ins)) - { - emitDispReg(id->idReg1(), widenDatasize(size), true); - } - else - { - assert(!emitInsIsVectorNarrow(ins) && !emitInsIsVectorWide(ins)); - emitDispReg(id->idReg1(), size, true); - } - emitDispReg(id->idReg2(), size, true); + case IF_LS_3G: // LS_3G .Q.........mmmmm ...Sssnnnnnttttt Vt[] Rn Rm elemsize = id->idOpSize(); - index = emitGetInsSC(id); - emitDispVectorRegIndex(id->idReg3(), elemsize, index, false); - break; - - case IF_DV_3F: // DV_3F ..........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - if ((ins == INS_sha1c) || (ins == INS_sha1m) || (ins == INS_sha1p)) - { - // Qd, Sn, Vm (vector) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), EA_4BYTE, true); - emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); - } - else if ((ins == INS_sha256h) || (ins == INS_sha256h2)) - { - // Qd Qn Vm (vector) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); - } - else // INS_sha1su0, INS_sha256su1 - { - // Vd, Vn, Vm (vector) - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); - } - break; + index = id->idSmallCns(); + code = emitInsCode(ins, fmt); - case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - elemsize = size; - emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false); - break; + code |= insEncodeVLSIndex(elemsize, index); // Q xx S ss + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vt(id->idReg1()); // ttttt - case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) - emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); - emitDispVectorReg(id->idReg3(), id->idInsOpt(), true); - emitDispImm(emitGetInsSC(id), false); + dst += emitOutput_Instr(dst, code); break; - case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) - emitDispReg(id->idReg1(), size, true); - emitDispReg(id->idReg2(), size, true); - emitDispReg(id->idReg3(), size, true); - emitDispReg(id->idReg4(), size, false); + case IF_DI_1A: // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh) + assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt())); + imm = emitGetInsSC(id); + assert(isValidUimm<12>(imm)); + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeShiftImm12(id->idInsOpt()); // sh + code |= ((code_t)imm << 10); // iiiiiiiiiiii + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - case IF_SN_0A: // SN_0A ................ ................ - if (ins == INS_align) - { - instrDescAlign* alignInstrId = (instrDescAlign*)id; - printf("[%d bytes", id->idIsEmptyAlign() ? 0 : INSTR_ENCODED_SIZE); - - // targetIG is only set for 1st of the series of align instruction - if ((alignInstrId->idaLoopHeadPredIG != nullptr) && (alignInstrId->loopHeadIG() != nullptr)) - { - printf(" for IG%02u", alignInstrId->loopHeadIG()->igNum); - } - printf("]"); - } + case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw) + imm = emitGetInsSC(id); + assert(isValidImmHWVal(imm, id->idOpSize())); + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= ((code_t)imm << 5); // hwiiiii iiiiiiiiiii + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + dst += emitOutput_Instr(dst, code); break; - case IF_SI_0A: // SI_0A ...........iiiii iiiiiiiiiii..... imm16 - emitDispImm(emitGetInsSC(id), false); + case IF_DI_1C: // DI_1C X........Nrrrrrr ssssssnnnnn..... Rn imm(N,r,s) + imm = emitGetInsSC(id); + assert(isValidImmNRS(imm, id->idOpSize())); + code = emitInsCode(ins, fmt); + code |= ((code_t)imm << 10); // Nrrrrrrssssss + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier - emitDispBarrier((insBarrier)emitGetInsSC(id)); + case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s) + imm = emitGetInsSC(id); + assert(isValidImmNRS(imm, id->idOpSize())); + code = emitInsCode(ins, fmt); + code |= ((code_t)imm << 10); // Nrrrrrrssssss + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + dst += emitOutput_Instr(dst, code); break; - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) - if (ins == INS_mrs_tpid0) + case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 + case IF_LARGEADR: + assert(insOptsNone(id->idInsOpt())); + if (id->idIsReloc()) { - emitDispReg(id->idReg1(), size, true); - printf("tpidr_el0"); + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + dst += emitOutput_Instr(dst, code); + emitRecordRelocation(odst, id->idAddr()->iiaAddr, id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21 + : IMAGE_REL_ARM64_PAGEBASE_REL21); } else { - emitDispReg(id->idReg1(), size, false); + // Local jmp/load case which does not need a relocation. + assert(id->idIsBound()); + dst = emitOutputLJ(ig, dst, id); } + sz = sizeof(instrDescJmp); break; - // ., /M, ., . - case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) - case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) - case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) - case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) - case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) - case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic - case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left - // (predicated) - case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - // .D, /M, .D, .D - case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - // .H, /M, .H, .H - case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm - break; - - // ., /, . - case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated) - { - PredicateType ptype = (id->idPredicateReg2Merge()) ? PREDICATE_MERGE : PREDICATE_ZERO; - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // nnnnn - emitDispLowPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // ddddd + case IF_DI_1F: // DI_1F X..........iiiii cccc..nnnnn.nzcv Rn imm5 nzcv cond + imm = emitGetInsSC(id); + assert(isValidImmCondFlagsImm5(imm)); + { + condFlagsImm cfi; + cfi.immCFVal = (unsigned)imm; + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + code |= ((code_t)cfi.imm5 << 16); // iiiii + code |= insEncodeFlags(cfi.flags); // nzcv + code |= insEncodeCond(cfi.cond); // cccc + dst += emitOutput_Instr(dst, code); + } break; - } - // ., /M, ., # - case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispImm(emitGetInsSC(id), false); // iiii - break; - - // ., /M, ., .D - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm - break; - - // ., /M, ., . - // ., /M, ., . - case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend - // (predicated) - case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand - // (predicated) - case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend - // ., /Z, ., . - case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); - emitDispSveReg(id->idReg4(), id->idInsOpt(), false); - break; - - // ., ., . - case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) - case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high - // (unpredicated) - case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved - case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute - case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient - case IF_SVE_BR_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments - case IF_SVE_CA_3A: // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads - case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp - case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp - case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) - // ., ., . - case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high - case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate - // .Q, .Q, .Q - case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments - // .D, .D, .D - case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate - // .D, .D, .D - case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) - // .B, .B, .B - case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment) - case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) - // .D, .D, .D - // .S, .S, .S - case IF_SVE_GJ_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations - // .H, .H, .H - case IF_SVE_GW_3B: // ...........mmmmm ......nnnnnddddd -- SVE FP clamp - case IF_SVE_HK_3B: // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn/mmmmm - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm/aaaaa - break; - - // .D, .D, .D - case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) - // .D, .D, .D - case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd - emitDispSveReg(id->idReg2(), INS_OPTS_SCALABLE_D, true); // nnnnn - emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm - break; - - // .D, .D, .D, .D - case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // kkkkk - break; - - // ., #, # - case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate - // increment) - { - ssize_t imm1; - ssize_t imm2; - insSveDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispImm(imm1, true); // iiiii - emitDispImm(imm2, false); // iiiii - break; - } + case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh) + assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt())); + imm = emitGetInsSC(id); + assert(isValidUimm<12>(imm)); + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeShiftImm12(id->idInsOpt()); // sh + code |= ((code_t)imm << 10); // iiiiiiiiiiii + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); - // ., #, - case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register - // increment) + if (id->idIsReloc()) { - const emitAttr intRegSize = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispImm(emitGetInsSC(id), true); // iiiii - emitDispReg(id->idReg2(), intRegSize, false); // mmmmm - break; + assert(sz == sizeof(instrDesc)); + assert(id->idAddr()->iiaAddr != nullptr); + emitRecordRelocation(odst, id->idAddr()->iiaAddr, id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADD_LO12 + : IMAGE_REL_ARM64_PAGEOFFSET_12A); } + break; - // ., , # - case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate - // increment) - { - const emitAttr intRegSize = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispReg(id->idReg2(), intRegSize, true); // mmmmm - emitDispImm(emitGetInsSC(id), false); // iiiii - break; - } + case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert(isValidImmShift(imm, id->idOpSize())); + code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Rm(id->idReg2()); // Reg2 also in mmmmm + code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss + dst += emitOutput_Instr(dst, code); + break; - // .H, .B, .B - case IF_SVE_GN_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long - case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s) + imm = emitGetInsSC(id); + assert(isValidImmNRS(imm, id->idOpSize())); + code = emitInsCode(ins, fmt); + code |= ((code_t)imm << 10); // Nrrrrrrssssss + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // ., ., . - // ., {.}, . - case IF_SVE_BZ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - if (id->idIns() == INS_sve_tbl) - { - emitDispSveConsecutiveRegList(id->idReg2(), 1, id->idInsOpt(), true); // nnnnn - } - else + case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, imms (N,r,s) + if (ins == INS_asr || ins == INS_lsl || ins == INS_lsr) { - assert(id->idIns() == INS_sve_tbx); - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - } - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm - break; + imm = emitGetInsSC(id); + assert(isValidImmShift(imm, id->idOpSize())); + + // Shift immediates are aliases of the SBFM/UBFM instructions + // that actually take 2 registers and 2 constants, + // Since we stored the shift immediate value + // we need to calculate the N,R and S values here. + + bitMaskImm bmi; + bmi.immNRS = 0; + + bmi.immN = (size == EA_8BYTE) ? 1 : 0; + bmi.immR = imm; + bmi.immS = (size == EA_8BYTE) ? 0x3f : 0x1f; + + // immR and immS are now set correctly for INS_asr and INS_lsr + // but for INS_lsl we have to adjust the values for immR and immS + // + if (ins == INS_lsl) + { + bmi.immR = -imm & bmi.immS; + bmi.immS = bmi.immS - imm; + } - // ., ., . - // ., {.}, . - case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - if (id->idIns() == INS_sve_tblq) - { - emitDispSveConsecutiveRegList(id->idReg2(), 1, id->idInsOpt(), true); // nnnnn + // setup imm with the proper 13 bit value N:R:S + // + imm = bmi.immNRS; } else { - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + // The other instructions have already have encoded N,R and S values + imm = emitGetInsSC(id); } - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm - break; - - // ., {., .}, . - case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveConsecutiveRegList(id->idReg2(), 2, id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm - break; + assert(isValidImmNRS(imm, id->idOpSize())); - // ., , - case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register - // increment) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispReg(id->idReg2(), size, true); // nnnnn - emitDispReg(id->idReg3(), size, false); // mmmmm + code = emitInsCode(ins, fmt); + code |= ((code_t)imm << 10); // Nrrrrrrssssss + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // {, {, MUL #}} - case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count - // {, {, MUL #}} - case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count + case IF_DR_1D: // DR_1D X............... cccc.......ddddd Rd cond imm = emitGetInsSC(id); - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp - if (imm > 1) + assert(isValidImmCond(imm)); { - printf("mul "); - emitDispImm(imm, false, false); // iiii + condFlagsImm cfi; + cfi.immCFVal = (unsigned)imm; + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeInvertedCond(cfi.cond); // cccc + dst += emitOutput_Instr(dst, code); } break; - // .D{, {, MUL #}} - // .H{, {, MUL #}} - // .S{, {, MUL #}} - case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count - case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp - if (imm > 1) - { - printf("mul "); - emitDispImm(imm, false, false); // iiii - } + case IF_DR_2A: // DR_2A X..........mmmmm ......nnnnn..... Rn Rm + assert(insOptsNone(id->idInsOpt())); + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + code |= insEncodeReg_Rm(id->idReg2()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // ., ., # - case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - - FALLTHROUGH; - // ., # - case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - bmi.immNRS = (unsigned)emitGetInsSC(id); - imm = emitDecodeBitMaskImm(bmi, optGetSveElemsize(id->idInsOpt())); - emitDispImm(imm, false); // iiiiiiiiiiiii + case IF_DR_2B: // DR_2B X.......sh.mmmmm ssssssnnnnn..... Rn Rm {LSL,LSR,ASR,ROR} imm(0-63) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert(isValidImmShift(imm, id->idOpSize())); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeShiftType(id->idInsOpt()); // sh + code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + code |= insEncodeReg_Rm(id->idReg2()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // , {, {, MUL #}} - // {, {, MUL #}} - // {, {, MUL #}} - case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count - switch (id->idIns()) - { - case INS_sve_sqincb: - case INS_sve_sqdecb: - case INS_sve_sqinch: - case INS_sve_sqdech: - case INS_sve_sqincw: - case INS_sve_sqdecw: - case INS_sve_sqincd: - case INS_sve_sqdecd: - emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd - - if (size == EA_4BYTE) - { - emitDispReg(id->idReg1(), EA_4BYTE, true); - } - break; - - default: - emitDispReg(id->idReg1(), size, true); // ddddd - break; - } + case IF_DR_2C: // DR_2C X..........mmmmm ooosssnnnnn..... Rn Rm ext(Rm) LSL imm(0-4) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert((imm >= 0) && (imm <= 4)); // imm [0..4] + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeExtend(id->idInsOpt()); // ooo + code |= insEncodeExtendScale(imm); // sss + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + code |= insEncodeReg_Rm(id->idReg2()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; + case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond imm = emitGetInsSC(id); - emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp - if (imm > 1) + assert(isValidImmCond(imm)); { - printf("mul "); - emitDispImm(imm, false, false); // iiii + condFlagsImm cfi; + cfi.immCFVal = (unsigned)imm; + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Rm(id->idReg2()); // mmmmm + code |= insEncodeInvertedCond(cfi.cond); // cccc + dst += emitOutput_Instr(dst, code); } break; - // .B, {.B, .B }, # - case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispVectorRegList(id->idReg2(), 2, id->idInsOpt(), true); // nnnnn - emitDispImm(imm, false); // iiiii iii - break; - - // .B, .B, .B, # - case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm - emitDispImm(imm, false); // iiiii iii - break; - - // ., /M, # - case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(id->idInsFmt()), INS_OPTS_NONE, true); // gggg - emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii - break; - - // ., ., .D - case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm - break; - - // ., [., .{, }] - case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - printf("["); - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); - emitDispSveReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id) > 0); - emitDispSveExtendOptsModN(INS_OPTS_LSL, emitGetInsSC(id)); - printf("]"); - break; - - // .D, [.D, .D, SXTW{ }] - case IF_SVE_BH_3B: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - printf("["); - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); - emitDispSveExtendOptsModN(INS_OPTS_SXTW, emitGetInsSC(id)); - printf("]"); - break; - - // .D, [.D, .D, UXTW{ }] - case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - printf("["); - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); - emitDispSveExtendOptsModN(INS_OPTS_UXTW, emitGetInsSC(id)); - printf("]"); - break; - - // ., - case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispReg(id->idReg2(), optGetSveElemsize(id->idInsOpt()), false); // mmmmm - break; - - // ., - case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispReg(id->idReg2(), id->idInsOpt() == INS_OPTS_SCALABLE_D ? EA_8BYTE : EA_4BYTE, false); // mmmmm - break; - - // .H, .B - case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, false); // NNNN + case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rm(id->idReg2()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // ., , ., . - case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert(isValidImmShift(imm, id->idOpSize())); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeShiftType(id->idInsOpt()); // sh + code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rm(id->idReg2()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // , , , . - // , , , . - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DR_2G: // DR_2G X............... .....xnnnnnddddd Rd Rn + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + if (ins == INS_rev) + { + if (size == EA_8BYTE) + { + code |= 0x00000400; // x - bit at location 10 + } + } + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // , , . - // , , . - case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) - case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) - case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register - case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register - case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn + code = emitInsCode(ins, fmt); + code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // ., , . - case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) - case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) - case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) - case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords) - emitDispVectorReg(id->idReg1(), optSveToQuadwordElemsizeArrangement(id->idInsOpt()), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DR_2I: // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond + imm = emitGetInsSC(id); + assert(isValidImmCondFlags(imm)); + { + condFlagsImm cfi; + cfi.immCFVal = (unsigned)imm; + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rn(id->idReg1()); // nnnnn + code |= insEncodeReg_Rm(id->idReg2()); // mmmmm + code |= insEncodeFlags(cfi.flags); // nzcv + code |= insEncodeCond(cfi.cond); // cccc + dst += emitOutput_Instr(dst, code); + } break; - //
, , . - case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) - emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnmmmmm Rd Rn Rm + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + if (id->idIsLclVar()) + { + code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm + } + else + { + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + } + dst += emitOutput_Instr(dst, code); break; - // ., /M, . - case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) - case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated) - case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements - case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated) - case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value - case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert(isValidImmShift(imm, id->idOpSize())); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + code |= insEncodeShiftType(id->idInsOpt()); // sh + code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss + dst += emitOutput_Instr(dst, code); break; - case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, true); // DDDD - emitDispSveReg(id->idReg2(), false); // nnnnn - break; - case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_D, true); // DDDD - emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false); // nnnnn - break; - case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, true); // DDDD - emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false); // nnnnn - break; - case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_S, true); // DDDD - emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false); // nnnnn - break; - case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector - emitDispSveReg(id->idReg1(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, false); // NNNN - break; - case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector - emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_D, false); // NNNN - break; - case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector - emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, false); // NNNN - break; - case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector - emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_S, false); // NNNN + case IF_DR_3C: // DR_3C X..........mmmmm ooosssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert((imm >= 0) && (imm <= 4)); // imm [0..4] + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeExtend(id->idInsOpt()); // ooo + code |= insEncodeExtendScale(imm); // sss + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // ., ., . - case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // NNNN - emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // MMMM + case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnddddd Rd Rn Rm cond + imm = emitGetInsSC(id); + assert(isValidImmCond(imm)); + { + condFlagsImm cfi; + cfi.immCFVal = (unsigned)imm; + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + code |= insEncodeCond(cfi.cond); // cccc + dst += emitOutput_Instr(dst, code); + } break; - // ., , . - case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert(isValidImmShift(imm, id->idOpSize())); + code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss + dst += emitOutput_Instr(dst, code); break; - // ., /M, - case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector - // (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispReg(id->idReg3(), size, false); // mmmmm + case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnmmmmm Rd Rn Rm Ra + code = emitInsCode(ins, fmt); + code |= insEncodeDatasize(id->idOpSize()); // X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + code |= insEncodeReg_Rm(id->idReg3()); // mmmmm + code |= insEncodeReg_Ra(id->idReg4()); // aaaaa + dst += emitOutput_Instr(dst, code); break; - // ., /M, - case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispReg(encodingZRtoSP(id->idReg3()), size, false); // mmmmm + case IF_DV_1A: // DV_1A .........X.iiiii iii........ddddd Vd imm8 (fmov - immediate scalar) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeFloatElemsize(elemsize); // X + code |= ((code_t)imm << 13); // iiiii iii + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + dst += emitOutput_Instr(dst, code); break; - // .Q, /M, .Q - case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_Q, true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_Q, false); // nnnnn - break; + case IF_DV_1B: // DV_1B .QX..........iii cmod..iiiiiddddd Vd imm8 (immediate vector) + imm = emitGetInsSC(id) & 0x0ff; + immShift = (emitGetInsSC(id) & 0x700) >> 8; + elemsize = optGetElemsize(id->idInsOpt()); + cmode = 0; + switch (elemsize) + { // cmode + case EA_1BYTE: + cmode = 0xE; // 1110 + break; + case EA_2BYTE: + cmode = 0x8; + cmode |= (immShift << 1); // 10x0 + break; + case EA_4BYTE: + if (immShift < 4) + { + cmode = 0x0; + cmode |= (immShift << 1); // 0xx0 + } + else // MSL + { + cmode = 0xC; + if (immShift & 2) + cmode |= 1; // 110x + } + break; + case EA_8BYTE: + cmode = 0xE; // 1110 + break; + default: + unreached(); + break; + } - // ., , {., .} - case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // VVV - emitDispSveConsecutiveRegList(id->idReg3(), insGetSveReg1ListSize(ins), id->idInsOpt(), false); // nnnnn + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(id->idOpSize()); // Q + if ((ins == INS_fmov) || (ins == INS_movi)) + { + if (elemsize == EA_8BYTE) + { + code |= 0x20000000; // X + } + } + if (ins != INS_fmov) + { + assert((cmode >= 0) && (cmode <= 0xF)); + code |= (cmode << 12); // cmod + } + code |= (((code_t)imm >> 5) << 16); // iii + code |= (((code_t)imm & 0x1f) << 5); // iiiii + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + dst += emitOutput_Instr(dst, code); break; - // ., , ., . - case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // VVV - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DV_1C: // DV_1C .........X...... ......nnnnn..... Vn #0.0 (fcmp - with zero) + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeReg_Vn(id->idReg1()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // MOV ., /M, . or SEL ., , ., . - case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated) - { - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - - if (id->idIns() == INS_sve_mov) + case IF_DV_2A: // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvt - vector) + case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(id->idOpSize()); // Q + if ((ins == INS_fcvtl) || (ins == INS_fcvtl2) || (ins == INS_fcvtn) || (ins == INS_fcvtn2)) { - emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, id->idInsOpt(), true); // VVVV - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // nnnnn + // fcvtl{2} and fcvtn{2} encode the element size as + // esize = 16 << UInt(sz) + if (elemsize == EA_4BYTE) + { + code |= 0x00400000; // X + } + else + { + assert(elemsize == EA_2BYTE); + } } else { - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // VVVV - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // mmmmm + code |= insEncodeFloatElemsize(elemsize); // X } + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - } - - // ., /Z, ., . - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors - case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match - case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // mmmmm - break; - - // ., /Z, ., .D - case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false); // mmmmm - break; - - // ., /Z, ., # - case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate - case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn - emitDispImm(emitGetInsSC(id), false, (fmt == IF_SVE_CY_3B)); // iiiii - break; - - // .S, .H, .H[] - case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) - case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) - case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) - case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed) - // .S, .B, .B[] - case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) - case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) - case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - // .S, .H, .H[] - case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) - case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) - case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - // .S, .S, .S[] - case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm - emitDispElementIndex(emitGetInsSC(id), false); // ii/iii - break; - - // .S, .H, .H - case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product - case IF_SVE_HA_3A: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - case IF_SVE_HB_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long - case IF_SVE_HD_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate - case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product - case IF_SVE_GO_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm - break; - - // .S, .B, .B - case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd - emitDispSveReg(id->idReg2(), INS_OPTS_SCALABLE_B, true); // nnnnn - emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_B, false); // mmmmm - break; - - // .D, .S, .S[] - case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) - case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) - // .D, .S, .S[] - case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) - case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmm - emitDispElementIndex(emitGetInsSC(id), false); // ii - break; - - // .D, .H, .H[] - case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm - emitDispElementIndex(emitGetInsSC(id), false); // ii - break; - - // .H, .B, .B[] - case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm - emitDispElementIndex(emitGetInsSC(id), false); // iii - break; - - // .H, .H, .H[] - case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - // .S, .S, .S[] - case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - // .D, .D, .D[] - case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - // .D, .D, .D[] - case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - // .H, .H, .H[] - case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm - emitDispElementIndex(emitGetInsSC(id), false); // i/ii/iii - break; - - // .B, /Z, .B, .B - case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - { - bool isFourReg = - !((ins == INS_sve_mov) || (ins == INS_sve_movs) || (ins == INS_sve_not) || (ins == INS_sve_nots)); - PredicateType ptype = (ins == INS_sve_sel) ? PREDICATE_NONE : insGetPredicateType(fmt, 2); - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true); // gggg - emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), isFourReg); // NNNN - if (isFourReg) + case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general) + elemsize = id->idOpSize(); + index = emitGetInsSC(id); + datasize = (elemsize == EA_8BYTE) ? EA_16BYTE : EA_8BYTE; + if (ins == INS_smov) { - emitDispPredicateReg(id->idReg4(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM + datasize = EA_16BYTE; } - - break; - } - - // .B, .B - case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // NNNN - break; - - // .B, /M, .B - case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg - emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN - break; - - // .B, /Z, .B, .B - case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition - { - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg - emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), true); // NNNN - emitDispPredicateReg(id->idReg4(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM - break; - } - - // .B, /, .B - case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition - case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition - { - PredicateType ptype = (id->idPredicateReg2Merge()) ? PREDICATE_MERGE : PREDICATE_ZERO; - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true); // gggg - emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN - break; - } - - // .B, /Z, .B, .B - case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg - emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), true); // NNNN - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM - break; - - // .B, , .B - case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(datasize); // Q + code |= insEncodeVectorIndex(elemsize, index); // iiiii + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // .{, } - case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize - { - bool dispPattern = (id->idSvePattern() != SVE_PATTERN_ALL); - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), dispPattern); // DDDD - if (dispPattern) + case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general) + if (ins == INS_dup) + { + datasize = id->idOpSize(); + elemsize = optGetElemsize(id->idInsOpt()); + index = 0; + } + else // INS_ins { - emitDispSvePattern(id->idSvePattern(), false); // ppppp + datasize = EA_16BYTE; + elemsize = id->idOpSize(); + index = emitGetInsSC(id); } + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(datasize); // Q + code |= insEncodeVectorIndex(elemsize, index); // iiiii + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - } - // ., . - case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // NNNN + case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector) + index = emitGetInsSC(id); + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeVectorIndex(elemsize, index); // iiiii + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // ., , . - case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // VVVV - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD + case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar) + index = emitGetInsSC(id); + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeVectorIndex(elemsize, index); // iiiii + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // .B, /Z - case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated) - case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // gggg + case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element) + elemsize = id->idOpSize(); + imm = emitGetInsSC(id); + index = (imm >> 4) & 0xf; + index2 = imm & 0xf; + code = emitInsCode(ins, fmt); + code |= insEncodeVectorIndex(elemsize, index); // iiiii + code |= insEncodeVectorIndex2(elemsize, index2); // jjjj + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // .B - case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated) - case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // DDDD + case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register) + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // , , . - case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count - emitDispReg(id->idReg1(), size, true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg - emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN + case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov - to general) + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X X + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // ., /M, . - case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), (insOpts)((unsigned)id->idInsOpt() - 1), false); // mmmmm + case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov - from general) + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X X + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Rn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // .H, { .S-.S }, # - case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, true); // nnnn - emitDispImm(emitGetInsSC(id), false); // iiii + case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt) + code = emitInsCode(ins, fmt); + code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // SS DD + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // , ., - case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter) - emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // NNNN - emitDispVectorLengthSpecifier(id); + case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp) + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeReg_Vn(id->idReg1()); // nnnnn + code |= insEncodeReg_Vm(id->idReg2()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // , . - case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count - emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM + case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar) + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // ., . - case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count - case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM + case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) + case IF_DV_2T: // DV_2T .Q......XX...... ......nnnnnddddd Sd Vn (addv, saddlv, smaxv, sminv, uaddlv, + // umaxv, uminv) + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // , ., - // , . - case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count - if ((ins == INS_sve_sqdecp) || (ins == INS_sve_sqincp)) - { - // 32-bit result: , ., - // 64-bit result: , . - const bool is32BitResult = (id->idOpSize() == EA_4BYTE); // X - emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), is32BitResult); // MMMM + case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + code = emitInsCode(ins, fmt); + code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins), imm); // iiiiiii + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); + break; - if (is32BitResult) - { - emitDispReg(id->idReg1(), EA_4BYTE, false); - } - } - else - { - assert((ins == INS_sve_uqdecp) || (ins == INS_sve_uqincp)); - emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM - } + case IF_DV_2O: // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector) + imm = emitGetInsSC(id); + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeVectorShift(elemsize, emitInsIsVectorRightShift(ins), imm); // iiiiiii + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // none - case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise + case IF_DV_2P: // DV_2P ............... ......nnnnnddddd Vd Vn (aes*, sha1su1) + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // .B - case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // NNNN + case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Vd Vn (faddp, fmaxnmp, fmaxp, fminnmp, + // fminp - scalar) + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // , - case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars - emitDispReg(id->idReg1(), id->idOpSize(), true); // nnnnn - emitDispReg(id->idReg2(), id->idOpSize(), false); // mmmmm + case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) + elemsize = optGetElemsize(id->idInsOpt()); + code = emitInsCode(ins, fmt); + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - // .H, {.S-.S } - case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow - { - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true); - emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, false); + case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Sd Sn (sha1h) + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + dst += emitOutput_Instr(dst, code); break; - } - // .B, {.H-.H } - case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts - { - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_B, true); - emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_H, false); + case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) + code = emitInsCode(ins, fmt); + elemsize = optGetElemsize(id->idInsOpt()); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - } - // ., . - case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), optWidenSveElemsizeArrangement(id->idInsOpt()), false); // nnnnn + case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + elemsize = optGetElemsize(id->idInsOpt()); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // , , # - case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment - { - const regNumber reg1 = (id->idReg1() == REG_ZR) ? REG_SP : id->idReg1(); - const regNumber reg2 = (id->idReg2() == REG_ZR) ? REG_SP : id->idReg2(); - emitDispReg(reg1, id->idOpSize(), true); // ddddd - emitDispReg(reg2, id->idOpSize(), true); // nnnnn - emitDispImm(emitGetInsSC(id), false); // iiiiii + case IF_DV_3B: // DV_3B .Q.......X.mmmmm ......nnnnnddddd Vd Vn Vm (vector) + code = emitInsCode(ins, fmt); + elemsize = optGetElemsize(id->idInsOpt()); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - } - // , # - case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size - emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd - emitDispImm(emitGetInsSC(id), false); // iiiiii + case IF_DV_3BI: // DV_3BI .Q.......XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + elemsize = optGetElemsize(id->idInsOpt()); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeFloatIndex(elemsize, imm); // L H + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // ., ., # - case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long - { - const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1); - emitDispSveReg(id->idReg1(), largeSizeSpecifier, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispImm(emitGetInsSC(id), false); // iii + case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - } - // ., ., # - case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow - { - const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), largeSizeSpecifier, true); // nnnnn - emitDispImm(emitGetInsSC(id), false); // iii + case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) + code = emitInsCode(ins, fmt); + code |= insEncodeFloatElemsize(id->idOpSize()); // X + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - } - // ., ., ., - case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add - { - // Rotation bit implies rotation is 270 if set, else rotation is 90 - const ssize_t rot = emitDecodeRotationImm90_or_270(emitGetInsSC(id)); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm - emitDispImm(rot, false); // r + case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeFloatIndex(elemsize, imm); // L H + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - } - // ., ., . - case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) + code = emitInsCode(ins, fmt); + elemsize = id->idOpSize(); + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // .B, .B, .B - // .S, .S, .S - case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // mmmmm + case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(isValidVectorIndex(EA_16BYTE, elemsize, imm)); + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // .B, .B - case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg1(), id->idInsOpt(), false); // ddddd + case IF_DV_3F: // DV_3F ...........mmmmm ......nnnnnddddd Vd Vn Vm (vector) - source dest regs overlap + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - // ., , - case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit - // ., , - case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // DDDD - emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn - emitDispReg(id->idReg3(), id->idOpSize(), false); // mmmmm + case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) + imm = emitGetInsSC(id); + code = emitInsCode(ins, fmt); + code |= insEncodeVectorsize(id->idOpSize()); // Q + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + code |= ((code_t)imm << 11); // iiii + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + dst += emitOutput_Instr(dst, code); break; - // , , .[, ] - case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // NNNN - emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // MMMM - printf("["); - emitDispReg(id->idReg4(), EA_4BYTE, true); // vv - emitDispImm(emitGetInsSC(id), false); // ix xx - printf("]"); + case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) + code = emitInsCode(ins, fmt); + elemsize = id->idOpSize(); + code |= insEncodeFloatElemsize(elemsize); // X + code |= insEncodeReg_Vd(id->idReg1()); // ddddd + code |= insEncodeReg_Vn(id->idReg2()); // nnnnn + code |= insEncodeReg_Vm(id->idReg3()); // mmmmm + code |= insEncodeReg_Va(id->idReg4()); // aaaaa + dst += emitOutput_Instr(dst, code); break; - // ., [] - case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter - emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD - emitDispPredicateReg(id->idReg2(), PREDICATE_N, id->idInsOpt(), false); // NNN - emitDispElementIndex(emitGetInsSC(id), false); // ii - break; + case IF_SN_0A: // SN_0A ................ ................ + { + bool skipIns = false; +#if FEATURE_LOOP_ALIGN + if (id->idIns() == INS_align) + { + // IG can be marked as not needing alignment after emitting align instruction. + // Alternatively, there are fewer align instructions needed than emitted. + // If that is the case, skip outputting alignment. + if (!ig->endsWithAlignInstr() || id->idIsEmptyAlign()) + { + skipIns = true; + } - // {., .}, [] - case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter - emitDispPredicateRegPair(id->idReg1(), id->idInsOpt()); // DDDD - emitDispPredicateReg(id->idReg2(), PREDICATE_N, id->idInsOpt(), false); // NNN - emitDispElementIndex(emitGetInsSC(id), false); // i - break; +#ifdef DEBUG + if (!ig->endsWithAlignInstr()) + { + // Validate if the state is correctly updated + assert(id->idIsEmptyAlign()); + } +#endif + sz = sizeof(instrDescAlign); + ins = INS_nop; - // {., .}, , - case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate - // pair) - emitDispLowPredicateRegPair(id->idReg1(), id->idInsOpt()); - emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn - emitDispReg(id->idReg3(), id->idOpSize(), false); // mmmmm - break; +#ifdef DEBUG + // Under STRESS_EMITTER, if this is the 'align' before the 'jmp' instruction, + // then add "bkpt" instruction. + instrDescAlign* alignInstr = (instrDescAlign*)id; - // ., , , - case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit - // (predicate-as-counter) - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // DDD - emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn - emitDispReg(id->idReg3(), id->idOpSize(), true); // mmmmm - emitDispVectorLengthSpecifier(id); - break; + if (emitComp->compStressCompile(Compiler::STRESS_EMITTER, 50) && alignInstr->isPlacedAfterJmp && + !skipIns) + { + // There is no good way to squeeze in "bkpt" as well as display it + // in the disassembly because there is no corresponding instrDesc for + // it. As such, leave it as is, the "0xD43E0000" bytecode will be seen + // next to the nop instruction in disasm. + // e.g. D43E0000 align [4 bytes for IG07] + ins = INS_BREAKPOINT; + fmt = IF_SI_0A; + } +#endif + } +#endif // FEATURE_LOOP_ALIGN - // PTRUE . - case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // DDD - break; + if (!skipIns) + { + code = emitInsCode(ins, fmt); + dst += emitOutput_Instr(dst, code); + } - // FDUP ., # - // FMOV ., # - case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii break; + } - // DUP ., #{, } - // MOV ., #{, } - case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) - { + case IF_SI_0A: // SI_0A ...........iiiii iiiiiiiiiii..... imm16 imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispImmOptsLSL(imm, id->idHasShift(), 8); // h iiiiiiii + assert(isValidUimm<16>(imm)); + code = emitInsCode(ins, fmt); + code |= ((code_t)imm << 5); // iiiii iiiiiiiiiii + dst += emitOutput_Instr(dst, code); break; - } - // ADD ., ., #{, } - // SQADD ., ., #{, } - // UQADD ., ., #{, } - // SUB ., ., #{, } - // SUBR ., ., #{, } - // SQSUB ., ., #{, } - // UQSUB ., ., #{, } - case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) - { + case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispImmOptsLSL(imm, id->idHasShift(), 8); // h iiiiiiii + assert((imm >= 0) && (imm <= 15)); + code = emitInsCode(ins, fmt); + code |= ((code_t)imm << 8); // bbbb + dst += emitOutput_Instr(dst, code); break; - } - // FMOV ., #0.0 - // (Preferred disassembly: FMOV ., #0) - case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispImm(0, false); - break; - - // SMAX ., ., # - // SMIN ., ., # - // UMAX ., ., # - // UMIN ., ., # - case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) - // MUL ., ., # - case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispImm(emitGetInsSC(id), false); // iiiiiiii - break; - - // ., ., . - case IF_SVE_EH_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated) - // .S, .B, .B - case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate - { - const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 2); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), smallSizeSpecifier, true); // nnnnn - emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) + assert(insOptsNone(id->idInsOpt())); + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Rt(id->idReg1()); // ttttt + dst += emitOutput_Instr(dst, code); break; - } - // ., ., . - case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long - case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long - case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long - case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long - // ., ., . - case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long - case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long - case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long - // .Q, .D, .D - case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long - { - const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 1); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), smallSizeSpecifier, true); // nnnnn - emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm + default: + dst = emitOutput_InstrSve(dst, id); break; - } + } - // ., ., . - case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part + // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref. + // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a + // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as + // for stores, but we ignore those cases here.) + if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref. + { + // We assume that "idReg1" is the primary destination register for all instructions + assert(!emitInsDestIsOp2(ins)); + if (id->idGCref() != GCT_NONE) { - const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), largeSizeSpecifier, true); // nnnnn - emitDispSveReg(id->idReg3(), largeSizeSpecifier, false); // mmmmm - break; + emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); } - - // ., ., . - case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide + else { - const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 1); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm - break; + emitGCregDeadUpd(id->idReg1(), dst); } - // CDOT ., ., ., - case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product + if (emitInsMayWriteMultipleRegs(id)) { - const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 2); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), smallSizeSpecifier, true); // nnnnn - emitDispSveReg(id->idReg3(), smallSizeSpecifier, true); // mmmmm - - // rot specifies a multiple of 90-degree rotations - emitDispImm(emitDecodeRotationImm0_to_270(emitGetInsSC(id)), false); // rr - break; + // INS_ldp etc... + // "idReg2" is the secondary destination register + if (id->idGCrefReg2() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); + } + else + { + emitGCregDeadUpd(id->idReg2(), dst); + } } + } - // CMLA ., ., ., - // SQRDCMLAH ., ., ., - case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm - - // rot specifies a multiple of 90-degree rotations - emitDispImm(emitDecodeRotationImm0_to_270(emitGetInsSC(id)), false); // rr - break; - - // CDOT .S, .B, .B[], - case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) +SKIP_GC_UPDATE: + // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC + // ref or overwritten one. + if (emitInsWritesToLclVarStackLoc(id) || emitInsWritesToLclVarStackLocPair(id)) + { + int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); + unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); + bool FPbased; + int adr = emitComp->lvaFrameAddress(varNum, &FPbased); + if (id->idGCref() != GCT_NONE) { - const ssize_t imm = emitGetInsSC(id); - const ssize_t rot = (imm & 0b11); - const ssize_t index = (imm >> 2); - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm - emitDispElementIndex(index, true); // ii - - // rot specifies a multiple of 90-degree rotations - emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr - break; + emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum)); } - - // CDOT .D, .H, .H[], - case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) + else { - const ssize_t imm = emitGetInsSC(id); - const ssize_t rot = (imm & 0b11); - const ssize_t index = (imm >> 2); - emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm - emitDispElementIndex(index, true); // i - - // rot specifies a multiple of 90-degree rotations - emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr - break; + // If the type of the local is a gc ref type, update the liveness. + var_types vt; + if (varNum >= 0) + { + // "Regular" (non-spill-temp) local. + vt = var_types(emitComp->lvaTable[varNum].lvType); + } + else + { + TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + vt = tmpDsc->tdTempType(); + } + if (vt == TYP_REF || vt == TYP_BYREF) + { + emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum)); + } } - - // CMLA .H, .H, .H[], - case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) - // CMLA .S, .S, .S[], - case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) - // SQRDCMLAH .H, .H, .H[], - case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) - // SQRDCMLAH .S, .S, .S[], - case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) - // FCMLA .S, .S, .S[], - case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed) + if (emitInsWritesToLclVarStackLocPair(id)) { - const ssize_t imm = emitGetInsSC(id); - const ssize_t rot = (imm & 0b11); - const ssize_t index = (imm >> 2); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm - emitDispElementIndex(index, true); // i + int varNum2 = varNum; + int adr2 = adr; + unsigned ofs2 = ofs; + unsigned ofs2Dist; - // rot specifies a multiple of 90-degree rotations - emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr - break; - } + if (id->idIsLclVarPair()) + { + bool FPbased2; - // .H, /M, .S - // .S, /M, .D - // .D, /M, .S - // .S, /M, .H - // .D, /M, .D - // .S, /M, .S - // .D, /M, .H - // .H, /M, .H - // .H, /M, .D - // .H, /M, .S - case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements - case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision - case IF_SVE_HO_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision - case IF_SVE_HO_3C: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision - case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer - case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point - { - insOpts opt = id->idInsOpt(); + emitLclVarAddr* lclVarAddr2 = emitGetLclVarPairLclVar2(id); + varNum2 = lclVarAddr2->lvaVarNum(); + ofs2 = lclVarAddr2->lvaOffset(); - switch (ins) + // If there are 2 GC vars in this instrDesc, get the 2nd variable + // that should be tracked. + adr2 = emitComp->lvaFrameAddress(varNum2, &FPbased2); + ofs2Dist = EA_SIZE_IN_BYTES(size); +#ifdef DEBUG + assert(FPbased == FPbased2); + if (FPbased) + { + assert(id->idReg3() == REG_FP); + } + else + { + assert(id->idReg3() == REG_SP); + } + assert(varNum2 != -1); +#endif // DEBUG + } + else { - // These cases have only one combination of operands so the option may be omitted. - case INS_sve_fcvtxnt: - opt = INS_OPTS_D_TO_S; - break; - case INS_sve_bfcvtnt: - opt = INS_OPTS_S_TO_H; - break; - case INS_sve_fcvtx: - opt = INS_OPTS_D_TO_S; - break; - case INS_sve_bfcvt: - opt = INS_OPTS_S_TO_H; - break; - default: - break; + ofs2Dist = TARGET_POINTER_SIZE; + ofs2 += ofs2Dist; } - insOpts dst = INS_OPTS_NONE; - insOpts src = INS_OPTS_NONE; - optExpandConversionPair(opt, dst, src); - - emitDispSveReg(id->idReg1(), dst, true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), src, false); // nnnnn - break; - } + ofs2 = AlignDown(ofs2, ofs2Dist); - // { .D }, /Z, [{, #, MUL VL}] - // Some of these formats may allow changing the element size instead of using 'D' for all instructions. - case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) - case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus - // immediate) - // { .B }, /Z, [{, #}] - // { .H }, /Z, [{, #}] - // { .S }, /Z, [{, #}] - // { .D }, /Z, [{, #}] - case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus - // immediate) - // { .Q, .Q }, /Z, [{, #, MUL VL}] - // { .Q, .Q, .Q }, /Z, [{, #, MUL VL}] - // { .Q, .Q, .Q, .Q }, /Z, [{, #, MUL VL}] - case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus - // immediate) - // { .B, .B }, /Z, [{, #, MUL VL}] - // { .H, .H }, /Z, [{, #, MUL VL}] - // { .S, .S }, /Z, [{, #, MUL VL}] - // { .D, .D }, /Z, [{, #, MUL VL}] - // { .B, .B, .B }, /Z, [{, #, MUL VL}] - // { .H, .H, .H }, /Z, [{, #, MUL VL}] - // { .S, .S, .S }, /Z, [{, #, MUL VL}] - // { .D, .D, .D }, /Z, [{, #, MUL VL}] - // { .B, .B, .B, .B }, /Z, [{, #, MUL VL}] - // { .H, .H, .H, .H }, /Z, [{, #, MUL VL}] - // { .S, .S, .S, .S }, /Z, [{, #, MUL VL}] - // { .D, .D, .D, .D }, /Z, [{, #, MUL VL}] - case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) - // { .Q, .Q }, , [{, #, MUL VL}] - // { .Q, .Q, .Q }, , [{, #, MUL VL}] - // { .Q, .Q, .Q, .Q }, , [{, #, MUL VL}] - case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus - // immediate) - // { .B }, , [{, #, MUL VL}] - // { .H }, , [{, #, MUL VL}] - // { .S }, , [{, #, MUL VL}] - // { .D }, , [{, #, MUL VL}] - case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus - // immediate) - // { .D }, , [{, #, MUL VL}] - // { .Q }, , [{, #, MUL VL}] - case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - // { .B, .B }, , [{, #, MUL VL}] - // { .H, .H }, , [{, #, MUL VL}] - // { .S, .S }, , [{, #, MUL VL}] - // { .D, .D }, , [{, #, MUL VL}] - // { .B, .B, .B }, , [{, #, MUL VL}] - // { .H, .H, .H }, , [{, #, MUL VL}] - // { .S, .S, .S }, , [{, #, MUL VL}] - // { .D, .D, .D }, , [{, #, MUL VL}] - // { .B, .B, .B, .B }, , [{, #, MUL VL}] - // { .H, .H, .H, .H }, , [{, #, MUL VL}] - // { .S, .S, .S, .S }, , [{, #, MUL VL}] - // { .D, .D, .D, .D }, , [{, #, MUL VL}] - case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) - imm = emitGetInsSC(id); - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - printf("["); - emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn - if (imm != 0) + if (id->idGCrefReg2() != GCT_NONE) { - switch (fmt) +#ifdef DEBUG + if (id->idGCref() != GCT_NONE) { - case IF_SVE_IO_3A: - // This does not have to be printed as hex. - // We only do it because the capstone disassembly displays this immediate as hex. - // We could not modify capstone without affecting other cases. - emitDispImm(emitGetInsSC(id), false, /* alwaysHex */ true); // iiii - break; - - case IF_SVE_IQ_3A: - case IF_SVE_IS_3A: - case IF_SVE_JE_3A: - case IF_SVE_JO_3A: - // This does not have to be printed as hex. - // We only do it because the capstone disassembly displays this immediate as hex. - // We could not modify capstone without affecting other cases. - emitDispImm(emitGetInsSC(id), true, /* alwaysHex */ true); // iiii - printf("mul vl"); - break; - - default: - emitDispImm(emitGetInsSC(id), true); // iiii - printf("mul vl"); - break; + // If 1st register was a gc-var, then make sure the offset + // are correctly set for the 2nd register that is holding + // another gc-var. + assert((adr + ofs + ofs2Dist) == (adr2 + ofs2)); + } +#endif + emitGCvarLiveUpd(adr2 + ofs2, varNum2, id->idGCrefReg2(), dst DEBUG_ARG(varNum2)); + } + else + { + // If the type of the local is a gc ref type, update the liveness. + var_types vt; + if (varNum2 >= 0) + { + // "Regular" (non-spill-temp) local. + vt = var_types(emitComp->lvaTable[varNum2].lvType); + } + else + { + TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum2); + vt = tmpDsc->tdTempType(); + } + if (vt == TYP_REF || vt == TYP_BYREF) + { + emitGCvarDeadUpd(adr2 + ofs2, dst DEBUG_ARG(varNum2)); } } - printf("]"); - break; + } + } - // {.}, , [, ] - // {.}, , [, , LSL #1] - case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - // {.}, , [, , LSL #2] - case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - // {.D }, , [, .D, #3] - // {.S }, , [, .S, #1] - // {.S }, , [, .S, #2] - case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - // {.D }, , [, .D, ] - // {.D }, , [, .D, #1] - // {.D }, , [, .D, #2] - case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - // {.D }, , [, .D, ] - case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - // {.S }, , [, .S, ] - case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - // {.D }, , [, .D, ] - case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled - // offsets) - // {.S }, , [, .S, ] - case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit - // unscaled offsets) - // {.D }, /Z, [, .D, ] - // {.S }, /Z, [, .S, #1] - // {.S }, /Z, [, .S, #2] - case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - // {.S }, /Z, [, .S, ] - // {.D }, /Z, [, .D, #1] - // {.D }, /Z, [, .D, #2] - case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - // {.D }, /Z, [, .D, ] - case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - // {.S }, /Z, [, .S, ] - case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - // {.D }, /Z, [, .D, #2] - // {.D }, /Z, [, .D, #3] - case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - // {.D }, /Z, [, .D, ] - case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - // {.D }, /Z, [, .D, ] - case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - // {.D }, /Z, [, .D] - // {.D }, /Z, [, .D, LSL #1] - // {.D }, /Z, [, .D, LSL #2] - case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - // {.D }, /Z, [, .D] - case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - // {.S }, /Z, [.S{, }] - case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus - // scalar) - // {.D }, /Z, [.D{, }] - case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus - // scalar) - // {.D }, /Z, [{, , LSL #3}] - // {.D }, /Z, [{, , LSL #2}] - case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) - // {.H }, /Z, [{, }] - // {.S }, /Z, [{, }] - // {.D }, /Z, [{, }] - case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - // {.B }, /Z, [{, }] - // {.H }, /Z, [{, }] - // {.S }, /Z, [{, }] - // {.D }, /Z, [{, }] - case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - // {.S }, /Z, [{, , LSL #1}] - // {.D }, /Z, [{, , LSL #1}] - // {.S }, /Z, [{, , LSL #2}] - // {.D }, /Z, [{, , LSL #2}] - case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - // {.H }, /Z, [{, , LSL #1}] - // {.S }, /Z, [{, , LSL #1}] - // {.D }, /Z, [{, , LSL #1}] - case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - // {.D }, /Z, [, , LSL #3] - case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - // {.Q }, /Z, [, , LSL #3] - case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - // {.D }, /Z, [, , LSL #2] - case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - // {.D }, /Z, [, , LSL #2 - case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - // {.H }, /Z, [, ] - // {.S }, /Z, [, ] - // {.D }, /Z, [, ] - case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - // {.S }, /Z, [, , LSL #1] - // {.D }, /Z, [, , LSL #1] - case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - // {.B }, /Z, [, ] - // {.H }, /Z, [, ] - // {.S }, /Z, [, ] - // {.D }, /Z, [, ] - case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - // {.H }, /Z, [, , LSL #1] - // {.S }, /Z, [, , LSL #1] - // {.D }, /Z, [, , LSL #1] - case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - // {.B }, /Z, [, ] - // {.H }, /Z, [, ] - // {.S }, /Z, [, ] - // {.D }, /Z, [, ] - case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) - // {.B }, /Z, [, ] - // {.H }, /Z, [, ] - // {.S }, /Z, [, ] - // {.D }, /Z, [, ] - case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) - // {.Q, .Q }, /Z, [, , LSL #4] - // {.Q, .Q, .Q }, /Z, [, , LSL #4] - // {.Q, .Q, .Q, .Q }, /Z, [, , LSL #4] - case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus - // scalar) - // {.B, .B }, /Z, [, ] - // {.H, .H }, /Z, [, , LSL #1] - // {.S, .S }, /Z, [, , LSL #2] - // {.D, .D }, /Z, [, , LSL #3] - // {.B, .B, .B }, /Z, [, ] - // {.H, .H, .H }, /Z, [, , LSL #1] - // {.S, .S, .S }, /Z, [, , LSL #2] - // {.D, .D, .D }, /Z, [, , LSL #3] - // {.B, .B, .B, .B }, /Z, [, ] - // {.H, .H, .H, .H }, /Z, [, , LSL #1] - // {.S, .S, .S, .S }, /Z, [, , LSL #2] - // {.D, .D, .D, .D }, /Z, [, , LSL #3] - case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) - // {.D }, /Z, [, .D, LSL #2] - // {.D }, /Z, [, .D, LSL #3] - case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - // {.D }, /Z, [, .D] - case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - // {.D }, /Z, [, .D] - case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - // {.Q }, /Z, [.D{, }] - case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) - // {.D }, /Z, [.D{, }] - case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus - // scalar) - // {.Q }, , [.D{, }] - case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) - // {.S }, , [.S{, }] - case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus - // scalar) - // {.D }, , [.D{, }] - case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus - // scalar) - // {.D }, , [.D{, }] - case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus - // scalar) - // {.B }, , [, ] - // {.H }, , [, , LSL #1] - // {.S }, , [, , LSL #2] - // {.D }, , [, , LSL #3] - case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus - // scalar) - // {.B, .B }, , [, ] - // {.H, .H }, , [, , LSL #1] - // {.S, .S }, , [, , LSL #2] - // {.D, .D }, , [, , LSL #3] - // {.B, .B, .B }, , [, ] - // {.H, .H, .H }, , [, , LSL #1] - // {.S, .S, .S }, , [, , LSL #2] - // {.D, .D, .D }, , [, , LSL #3] - // {.B, .B, .B, .B }, , [, ] - // {.H, .H, .H, .H }, , [, , LSL #1] - // {.S, .S, .S, .S }, , [, , LSL #2] - // {.D, .D, .D, .D }, , [, , LSL #3] - case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) - // {.Q }, , [, , LSL #2] - // {.D }, , [, , LSL #3] - case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - // {.Q }, , [, , LSL #3] - case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - // {.Q, .Q }, , [, , LSL #4] - // {.Q, .Q, .Q }, , [, , LSL #4] - // {.Q, .Q, .Q, .Q }, , [, , LSL #4] - case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus - // scalar) - // {.D }, , [, .D, LSL #1] - // {.D }, , [, .D, LSL #2] - // {.D }, , [, .D, LSL #3] - case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - // {.D }, , [, .D] - case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - // {.D }, , [, .D] - case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - // {.D }, , [, .D] - case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled - // offsets) - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveModAddr(ins, id->idReg3(), id->idReg4(), id->idInsOpt(), fmt); // nnnnn - // mmmmm - break; - - // {.}, , [{, #, MUL VL}] - case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - imm = emitGetInsSC(id); - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveImmMulVl(id->idReg3(), imm); - break; +#ifdef DEBUG + /* Make sure we set the instruction descriptor size correctly */ - // {.}, , [{, #, MUL VL}] - case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - imm = emitGetInsSC(id); - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg - emitDispSveImmMulVl(id->idReg3(), imm); - break; + size_t expected = emitSizeOfInsDsc(id); + assert(sz == expected); - // , [{, #, MUL VL}] - case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register - // , [{, #, MUL VL}] - case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register - imm = emitGetInsSC(id); - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // TTTT - emitDispSveImmMulVl(id->idReg2(), imm); - break; + if (emitComp->opts.disAsm || emitComp->verbose) + { + emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + } - // , [{, #, MUL VL}] - case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register - // , [{, #, MUL VL}] - case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register - imm = emitGetInsSC(id); - emitDispReg(id->idReg1(), EA_SCALABLE, true); // ttttt - emitDispSveImmMulVl(id->idReg2(), imm); - break; + if (emitComp->compDebugBreak) + { + // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for + // emitting instruction a6, (i.e. IN00a6 in jitdump). + if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum) + { + assert(!"JitBreakEmitOutputInstr reached"); + } + } - // ., /M, ., ., - case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); - emitDispImm(emitDecodeRotationImm90_or_270(imm), false); - break; + // Output any delta in GC info. + if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) + { + emitDispGCInfoDelta(); + } +#else + if (emitComp->opts.disAsm) + { + size_t expected = emitSizeOfInsDsc(id); + assert(sz == expected); + emitDispIns(id, false, 0, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + } +#endif - // ., /M, ., ., - case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); - emitDispSveReg(id->idReg4(), id->idInsOpt(), true); - emitDispImm(emitDecodeRotationImm0_to_270(imm), false); - break; - - // ., /Z, ., #0.0 - case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); - emitDispFloatZero(); - break; + /* All instructions are expected to generate code */ - // ., /M, ., - case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate - // (predicated) - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSmallFloatImm(imm, id->idIns()); - break; - - // ., ., ., # - case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient - case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); - emitDispImm(emitGetInsSC(id), false); - break; + assert(*dp != dst || id->idIsEmptyAlign()); - // ., /M, . - case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveReg(id->idReg3(), id->idInsOpt(), false); - break; - - // .H, /M, .H, .H - case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend - // ., /M, ., . - case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing - // multiplicand - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); - emitDispSveReg(id->idReg4(), id->idInsOpt(), false); - break; - - // .B, { .B }, [] - case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit - // element size - // .B, { .B }, [] - case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - // .H, { .H }, [] - case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit - // element size - // .H, { .H, .H }, [] - case IF_SVE_GH_3B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - // .H, {.H }, [] - case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveConsecutiveRegList(id->idReg1(), 1, id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), id->idInsOpt(), false); - emitDispElementIndex(imm, false); - break; - - // , , [, .S, ] - // , , [, .S, #1] - // , , [, .S, #2] - // , , [, .S, #3] - case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - // , , [, .D, ] - // , , [, .D, #1] - // , , [, .D, #2] - // , , [, .D, #3] - case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit - // scaled offsets) - // , , [, .D] - // , , [, .D, LSL #1] - // , , [, .D, LSL #2] - // , , [, .D, LSL #3] - case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - // , , [, ] - // , , [, , LSL #1] - // , , [, , LSL #2] - // , , [, , LSL #3] - case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) - emitDispSvePrfop(id->idSvePrfop(), true); - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveModAddr(ins, id->idReg2(), id->idReg3(), id->idInsOpt(), fmt); - break; - - // , , [.S{, #}] - // , , [.D{, #}] - case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) - imm = emitGetInsSC(id); - emitDispSvePrfop(id->idSvePrfop(), true); - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveImm(id->idReg2(), imm, id->idInsOpt()); - break; + *dp = dst; - // , , [{, #, MUL VL}] - case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) - imm = emitGetInsSC(id); - emitDispSvePrfop(id->idSvePrfop(), true); - emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveImmMulVl(id->idReg2(), imm); - break; - - // {.S }, /Z, [.S{, #}] - // {.D }, /Z, [.D{, #}] - case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - // {.S }, /Z, [.S{, #}] - // {.D }, /Z, [.D{, #}] - case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - // {.D }, /Z, [.D{, #}] - case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) - // {.S }, , [.S{, #}] - // {.D }, , [.D{, #}] - case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) - // {.D }, , [.D{, #}] - case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) - // {.D }, /Z, [{, #}] - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - // {.D }, /Z, [{, #}] - case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - // {.D }, /Z, [{, #}] - case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - // {.D }, /Z, [{, #}] - case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - imm = emitGetInsSC(id); - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(id->idIns()), id->idInsOpt(), true); - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); - emitDispSveImmIndex(id->idReg3(), id->idInsOpt(), imm); - break; + return sz; +} - // , - case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), id->idInsOpt(), false); - break; +/*****************************************************************************/ +/*****************************************************************************/ - // ., - case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispReg(encodingZRtoSP(id->idReg2()), size, false); - break; +/***************************************************************************** + * + * Display a comma + */ +void emitter::emitDispComma() +{ + printf(", "); +} - // .H, .B - case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts - // ., . - case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), (insOpts)((unsigned)id->idInsOpt() - 1), false); - break; +/***************************************************************************** + * + * Display the instruction name + */ +void emitter::emitDispInst(instruction ins) +{ + const char* insstr = codeGen->genInsName(ins); + size_t len = strlen(insstr); - // ., . - case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator - // ., . - case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements - // ., . - case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), id->idInsOpt(), false); - break; + /* Display the instruction name */ - // ., ., # - case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated) - // ., ., # - case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert - // ., ., # - case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); - emitDispImm(imm, false); - break; + printf("%s", insstr); + + // + // Add at least one space after the instruction name + // and add spaces until we have reach the normal size of 8 + do + { + printf(" "); + len++; + } while (len < 8); +} + +/***************************************************************************** + * + * Display an immediate value + */ +void emitter::emitDispImm(ssize_t imm, bool addComma, bool alwaysHex /* =false */, bool isAddrOffset /* =false */) +{ + if (isAddrOffset) + { + alwaysHex = true; + } + else if (imm == 0) + { + // Non-offset values of zero are never displayed as hex. + alwaysHex = false; + } + + if (strictArmAsm) + { + printf("#"); + } + + // Munge any pointers if we want diff-able disassembly. + // Since some may be emitted as partial words, print as diffable anything that has + // significant bits beyond the lowest 8-bits. + if (emitComp->opts.disDiffable) + { + ssize_t top56bits = (imm >> 8); + if ((top56bits != 0) && (top56bits != -1)) + imm = 0xD1FFAB1E; + } - // ., /Z, #{, } - // ., /M, #{, } - case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + if (!alwaysHex && (imm > -1000) && (imm < 1000)) + { + printf("%d", (int)imm); + } + else + { + if ((imm < 0) && ((imm & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL)) { - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg - emitDispImmOptsLSL(imm, id->idHasShift(), 8); // iiiiiiii, h - break; + printf("-"); + imm = -imm; } - // ., /M, # - case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated) - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg - emitDispImm(0, false); - break; - - // ., .[] - // ., - case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - if (imm > 0) + if ((imm & 0xFFFFFFFF00000000LL) != 0) + { + if (isAddrOffset) { - emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // nnnnn - emitDispElementIndex(imm, false); + printf("0x%llX", imm); } else { - assert(imm == 0); - emitDispReg(id->idReg2(), optGetSveElemsize(id->idInsOpt()), false); + printf("0x%llx", imm); } - break; - - // ., .[] - case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), id->idInsOpt(), false); - emitDispElementIndex(imm, false); - break; - - // .B, .B, .B, # - case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq - imm = emitGetInsSC(id); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); - emitDispSveReg(id->idReg2(), id->idInsOpt(), true); - emitDispImm(imm, false); - break; - - default: - printf("unexpected format %s", emitIfName(id->idInsFmt())); - assert(!"unexpectedFormat"); - break; - } - - if (id->idIsLclVar()) - { - printf("\t// "); - emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), - id->idDebugOnlyInfo()->idVarRefOffs, asmfm); - if (id->idIsLclVarPair()) + } + else { - printf(", "); - emitLclVarAddr* iiaLclVar2 = emitGetLclVarPairLclVar2(id); - emitDispFrameRef(iiaLclVar2->lvaVarNum(), iiaLclVar2->lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs2, - asmfm); + printf("0x%02X", (unsigned)imm); } } - printf("\n"); + if (addComma) + emitDispComma(); } /***************************************************************************** * - * Display a stack frame reference. + * Display an immediate value as an index operation */ - -void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +void emitter::emitDispElementIndex(const ssize_t imm, const bool addComma) { -#ifdef DEBUG - printf("["); - - if (varx < 0) - printf("TEMP_%02u", -varx); - else - emitComp->gtDispLclVar(+varx, false); + printf("[%d]", imm); - if (disp < 0) - printf("-0x%02x", -disp); - else if (disp > 0) - printf("+0x%02x", +disp); + if (addComma) + { + emitDispComma(); + } +} - printf("]"); +/***************************************************************************** + * + * Display a float zero constant + */ +void emitter::emitDispFloatZero() +{ + if (strictArmAsm) + { + printf("#"); + } + printf("0.0"); +} - if ((varx >= 0) && emitComp->opts.varNames && (((IL_OFFSET)offs) != BAD_IL_OFFSET)) +/***************************************************************************** + * + * Display an encoded float constant value + */ +void emitter::emitDispFloatImm(ssize_t imm8) +{ + assert((0 <= imm8) && (imm8 <= 0x0ff)); + if (strictArmAsm) { - const char* varName = emitComp->compLocalVarName(varx, offs); + printf("#"); + } - if (varName) - { - printf("'%s", varName); + floatImm8 fpImm; + fpImm.immFPIVal = (unsigned)imm8; + double result = emitDecodeFloatImm8(fpImm); - if (disp < 0) - printf("-%d", -disp); - else if (disp > 0) - printf("+%d", +disp); + printf("%.4f", result); +} - printf("'"); - } +/***************************************************************************** + * + * Display an encoded small float constant value + */ +void emitter::emitDispSmallFloatImm(ssize_t imm, instruction ins) +{ + if (strictArmAsm) + { + printf("#"); } -#endif + printf("%.4f", emitDecodeSmallFloatImm(imm, ins)); } -// Generate code for a load or store operation with a potentially complex addressing mode -// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*scale + offset] -// Since Arm64 does not directly support this complex of an addressing mode -// we may generates up to three instructions for this for Arm64 -// -void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir) +/***************************************************************************** + * + * Display an immediate with an optional left-shift. + */ +void emitter::emitDispImmOptsLSL(ssize_t imm, bool hasShift, unsigned shiftAmount) { - GenTree* addr = indir->Addr(); - - if (addr->isContained()) + if (!strictArmAsm && hasShift) { - assert(addr->OperIs(GT_LCL_ADDR, GT_LEA) || (addr->IsIconHandle(GTF_ICON_TLS_HDL))); + imm <<= shiftAmount; + } + emitDispImm(imm, false); + if (strictArmAsm && hasShift) + { + printf(", LSL #%u", shiftAmount); + } +} - int offset = 0; - DWORD lsl = 0; +/***************************************************************************** + * + * Display an ARM64 condition code for the conditional instructions + */ +void emitter::emitDispCond(insCond cond) +{ + const static char* armCond[16] = {"eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "AL", "NV"}; // The last two are invalid + unsigned imm = (unsigned)cond; + assert((0 <= imm) && (imm < ArrLen(armCond))); + printf(armCond[imm]); +} - if (addr->OperGet() == GT_LEA) - { - offset = addr->AsAddrMode()->Offset(); - if (addr->AsAddrMode()->gtScale > 0) - { - assert(isPow2(addr->AsAddrMode()->gtScale)); - BitScanForward(&lsl, addr->AsAddrMode()->gtScale); - } - } +/***************************************************************************** + * + * Display an ARM64 flags for the conditional instructions + */ +void emitter::emitDispFlags(insCflags flags) +{ + const static char* armFlags[16] = {"0", "v", "c", "cv", "z", "zv", "zc", "zcv", + "n", "nv", "nc", "ncv", "nz", "nzv", "nzc", "nzcv"}; + unsigned imm = (unsigned)flags; + assert((0 <= imm) && (imm < ArrLen(armFlags))); + printf(armFlags[imm]); +} - GenTree* memBase = indir->Base(); +/***************************************************************************** + * + * Display an ARM64 'barrier' for the memory barrier instructions + */ +void emitter::emitDispBarrier(insBarrier barrier) +{ + const static char* armBarriers[16] = {"#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh", + "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy"}; + unsigned imm = (unsigned)barrier; + assert((0 <= imm) && (imm < ArrLen(armBarriers))); + printf(armBarriers[imm]); +} - if (indir->HasIndex()) - { - GenTree* index = indir->Index(); +/***************************************************************************** + * + * Prints the encoding for the Shift Type encoding + */ - if (offset != 0) - { - regNumber tmpReg = indir->GetSingleTempReg(); +void emitter::emitDispShiftOpts(insOpts opt) +{ + if (opt == INS_OPTS_LSL) + printf(" LSL "); + else if (opt == INS_OPTS_LSR) + printf(" LSR "); + else if (opt == INS_OPTS_ASR) + printf(" ASR "); + else if (opt == INS_OPTS_ROR) + printf(" ROR "); + else if (opt == INS_OPTS_MSL) + printf(" MSL "); + else + assert(!"Bad value"); +} - emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE; +/***************************************************************************** + * + * Prints the encoding for the Extend Type encoding + */ - if (emitIns_valid_imm_for_add(offset, EA_8BYTE)) - { - if (lsl > 0) - { - // Generate code to set tmpReg = base + index*scale - emitIns_R_R_R_I(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum(), lsl, - INS_OPTS_LSL); - } - else // no scale - { - // Generate code to set tmpReg = base + index - emitIns_R_R_R(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum()); - } +void emitter::emitDispExtendOpts(insOpts opt) +{ + if (opt == INS_OPTS_UXTB) + printf("UXTB"); + else if (opt == INS_OPTS_UXTH) + printf("UXTH"); + else if (opt == INS_OPTS_UXTW) + printf("UXTW"); + else if (opt == INS_OPTS_UXTX) + printf("UXTX"); + else if (opt == INS_OPTS_SXTB) + printf("SXTB"); + else if (opt == INS_OPTS_SXTH) + printf("SXTH"); + else if (opt == INS_OPTS_SXTW) + printf("SXTW"); + else if (opt == INS_OPTS_SXTX) + printf("SXTX"); + else + assert(!"Bad value"); +} - noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); +//------------------------------------------------------------------------ +// emitDispReg: Display a general-purpose register name or SIMD and floating-point scalar register name +// +void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma) +{ + emitAttr size = EA_SIZE(attr); + printf(emitRegName(reg, size)); - // Then load/store dataReg from/to [tmpReg + offset] - emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset); - } - else // large offset - { - // First load/store tmpReg with the large offset constant - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); - // Then add the base register - // rd = rd + base - emitIns_R_R_R(INS_add, addType, tmpReg, tmpReg, memBase->GetRegNum()); + if (addComma) + emitDispComma(); +} + +//------------------------------------------------------------------------ +// emitDispVectorReg: Display a SIMD vector register name with an arrangement suffix +// +void emitter::emitDispVectorReg(regNumber reg, insOpts opt, bool addComma) +{ + assert(isVectorRegister(reg)); + printf(emitVectorRegName(reg)); + emitDispArrangement(opt); + + if (addComma) + emitDispComma(); +} + +//------------------------------------------------------------------------ +// emitDispVectorRegIndex: Display a SIMD vector register name with element index +// +void emitter::emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma) +{ + assert(isVectorRegister(reg)); + printf(emitVectorRegName(reg)); + emitDispElemsize(elemsize); + printf("[%d]", (int)index); - noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); - noway_assert(tmpReg != index->GetRegNum()); + if (addComma) + emitDispComma(); +} - // Then load/store dataReg from/to [tmpReg + index*scale] - emitIns_R_R_R_I(ins, attr, dataReg, tmpReg, index->GetRegNum(), lsl, INS_OPTS_LSL); - } - } - else // (offset == 0) - { - if (lsl > 0) - { - // Then load/store dataReg from/to [memBase + index*scale] - emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum(), INS_OPTS_LSL, lsl); - } - else // no scale - { - if (index->OperIs(GT_BFIZ, GT_CAST) && index->isContained()) - { - // Then load/store dataReg from/to [memBase + index*scale with sign/zero extension] - GenTreeCast* cast; - int cns; +//------------------------------------------------------------------------ +// emitDispVectorRegList: Display a SIMD vector register list +// +void emitter::emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma) +{ + assert(isVectorRegister(firstReg)); - if (index->OperIs(GT_BFIZ)) - { - cast = index->gtGetOp1()->AsCast(); - cns = (int)index->gtGetOp2()->AsIntCon()->IconValue(); - } - else - { - cast = index->AsCast(); - cns = 0; - } + regNumber currReg = firstReg; - // For now, this code only supports extensions from i32/u32 - assert(cast->isContained()); + printf("{"); + for (unsigned i = 0; i < listSize; i++) + { + const bool notLastRegister = (i != listSize - 1); + emitDispVectorReg(currReg, opt, notLastRegister); + currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); + } + printf("}"); - emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(), - cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW, cns); - } - else - { - // Then load/store dataReg from/to [memBase + index] - emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum()); - } - } - } - } - else // no Index register - { - if (addr->OperIs(GT_LCL_ADDR)) - { - GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); - unsigned lclNum = varNode->GetLclNum(); - unsigned offset = varNode->GetLclOffs(); - if (emitInsIsStore(ins)) - { - emitIns_S_R(ins, attr, dataReg, lclNum, offset); - } - else - { - emitIns_R_S(ins, attr, dataReg, lclNum, offset); - } - } - else if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) - { - // On Arm64, TEB is in r18, so load from the r18 as base. - emitIns_R_R_I(ins, attr, dataReg, REG_R18, addr->AsIntCon()->IconValue()); - } - else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet()))) - { - // Then load/store dataReg from/to [memBase + offset] - emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset); - } - else - { - // We require a tmpReg to hold the offset - regNumber tmpReg = indir->GetSingleTempReg(); + if (addComma) + { + emitDispComma(); + } +} - // First load/store tmpReg with the large offset constant - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); +//------------------------------------------------------------------------ +// emitDispVectorElemList: Display a SIMD vector element list +// +void emitter::emitDispVectorElemList( + regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma) +{ + assert(isVectorRegister(firstReg)); - // Then load/store dataReg from/to [memBase + tmpReg] - emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), tmpReg); - } - } - } - else // addr is not contained, so we evaluate it into a register + regNumber currReg = firstReg; + + printf("{"); + for (unsigned i = 0; i < listSize; i++) { -#ifdef DEBUG - if (addr->OperIs(GT_LCL_ADDR)) + printf(emitVectorRegName(currReg)); + emitDispElemsize(elemsize); + const bool notLastRegister = (i != listSize - 1); + if (notLastRegister) { - // If the local var is a gcref or byref, the local var better be untracked, because we have - // no logic here to track local variable lifetime changes, like we do in the contained case - // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local - // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth. - LclVarDsc* varDsc = emitComp->lvaGetDesc(addr->AsLclVarCommon()); - assert(!varDsc->lvTracked); + emitDispComma(); } -#endif // DEBUG + currReg = (currReg == REG_V31) ? REG_V0 : REG_NEXT(currReg); + } + printf("}"); + printf("[%d]", index); - // Then load/store dataReg from/to [addrReg] - emitIns_R_R(ins, attr, dataReg, addr->GetRegNum()); + if (addComma) + { + emitDispComma(); } } -// The callee must call genConsumeReg() for any non-contained srcs -// and genProduceReg() for any non-contained dsts. - -regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) +//------------------------------------------------------------------------ +// emitDispArrangement: Display a SIMD vector arrangement suffix +// +void emitter::emitDispArrangement(insOpts opt) { - // dst can only be a reg - assert(!dst->isContained()); - - // src can be immed or reg - assert(!src->isContained() || src->isContainedIntOrIImmed()); + const char* str = "???"; - // find immed (if any) - it cannot be a dst - GenTreeIntConCommon* intConst = nullptr; - if (src->isContainedIntOrIImmed()) + switch (opt) { - intConst = src->AsIntConCommon(); + case INS_OPTS_8B: + str = "8b"; + break; + case INS_OPTS_16B: + str = "16b"; + break; + case INS_OPTS_SCALABLE_B: + str = "b"; + break; + case INS_OPTS_4H: + str = "4h"; + break; + case INS_OPTS_8H: + str = "8h"; + break; + case INS_OPTS_SCALABLE_H: + str = "h"; + break; + case INS_OPTS_2S: + str = "2s"; + break; + case INS_OPTS_4S: + str = "4s"; + break; + case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_S_UXTW: + case INS_OPTS_SCALABLE_S_SXTW: + str = "s"; + break; + case INS_OPTS_1D: + str = "1d"; + break; + case INS_OPTS_2D: + str = "2d"; + break; + case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + str = "d"; + break; + case INS_OPTS_SCALABLE_Q: + str = "q"; + break; + + default: + assert(!"Invalid insOpt"); } + printf("."); + printf(str); +} - if (intConst) +//------------------------------------------------------------------------ +// emitDispElemsize: Display a SIMD vector element suffix +// +void emitter::emitDispElemsize(emitAttr elemsize) +{ + const char* str = "???"; + + switch (elemsize) { - emitIns_R_I(ins, attr, dst->GetRegNum(), intConst->IconValue()); - return dst->GetRegNum(); + case EA_1BYTE: + str = ".b"; + break; + case EA_2BYTE: + str = ".h"; + break; + case EA_4BYTE: + str = ".s"; + break; + case EA_8BYTE: + str = ".d"; + break; + + default: + assert(!"invalid elemsize"); + break; } - else + + printf(str); +} + +//------------------------------------------------------------------------ +// emitDispShiftedReg: Display a register with an optional shift operation +// +void emitter::emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr) +{ + emitAttr size = EA_SIZE(attr); + assert((imm & 0x003F) == imm); + assert(((imm & 0x0020) == 0) || (size == EA_8BYTE)); + + printf(emitRegName(reg, size)); + + if (imm > 0) { - emitIns_R_R(ins, attr, dst->GetRegNum(), src->GetRegNum()); - return dst->GetRegNum(); + if (strictArmAsm) + { + emitDispComma(); + } + emitDispShiftOpts(opt); + emitDispImm(imm, false); } } -// The callee must call genConsumeReg() for any non-contained srcs -// and genProduceReg() for any non-contained dsts. - -regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2) +/***************************************************************************** + * + * Display a register with an optional extend and scale operations + */ +void emitter::emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm) { - // dst can only be a reg - assert(!dst->isContained()); + assert((imm >= 0) && (imm <= 4)); + assert(insOptsNone(opt) || insOptsAnyExtend(opt) || (opt == INS_OPTS_LSL)); - // find immed (if any) - it cannot be a dst - // Only one src can be an int. - GenTreeIntConCommon* intConst = nullptr; - GenTree* nonIntReg = nullptr; + // size is based on the extend option, not the instr size. + // Assume INS_OPTS_NONE and INS_OPTS_LSL are 64bit as they usually are. + emitAttr size = (insOptsNone(opt) || insOptsLSL(opt) || insOpts64BitExtend(opt)) ? EA_8BYTE : EA_4BYTE; - if (varTypeIsFloating(dst)) - { - // src1 can only be a reg - assert(!src1->isContained()); - // src2 can only be a reg - assert(!src2->isContained()); - } - else // not floating point + if (strictArmAsm) { - // src2 can be immed or reg - assert(!src2->isContained() || src2->isContainedIntOrIImmed()); - - // Check src2 first as we can always allow it to be a contained immediate - if (src2->isContainedIntOrIImmed()) + if (insOptsNone(opt) || (insOptsLSL(opt) && imm == 0)) { - intConst = src2->AsIntConCommon(); - nonIntReg = src1; + emitDispReg(reg, size, false); } - // Only for commutative operations do we check src1 and allow it to be a contained immediate - else if (dst->OperIsCommutative()) + else { - // src1 can be immed or reg - assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + emitDispReg(reg, size, true); - // Check src1 and allow it to be a contained immediate - if (src1->isContainedIntOrIImmed()) + if (insOptsLSL(opt)) + printf("LSL"); + else + emitDispExtendOpts(opt); + + if (imm > 0) { - assert(!src2->isContainedIntOrIImmed()); - intConst = src1->AsIntConCommon(); - nonIntReg = src2; + printf(" "); + emitDispImm(imm, false); } } - else - { - // src1 can only be a reg - assert(!src1->isContained()); - } } - - bool isMulOverflow = false; - if (dst->gtOverflowEx()) + else // !strictArmAsm { - if ((ins == INS_add) || (ins == INS_adds)) - { - ins = INS_adds; - } - else if ((ins == INS_sub) || (ins == INS_subs)) - { - ins = INS_subs; - } - else if (ins == INS_mul) + if (insOptsNone(opt)) { - isMulOverflow = true; - assert(intConst == nullptr); // overflow format doesn't support an int constant operand + emitDispReg(reg, size, false); } else { - assert(!"Invalid ins for overflow check"); - } - } - if (intConst != nullptr) - { - emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), intConst->IconValue()); - } - else - { - if (isMulOverflow) - { - regNumber extraReg = dst->GetSingleTempReg(); - assert(extraReg != dst->GetRegNum()); - - if ((dst->gtFlags & GTF_UNSIGNED) != 0) - { - if (attr == EA_4BYTE) - { - // Compute 8 byte results from 4 byte by 4 byte multiplication. - emitIns_R_R_R(INS_umull, EA_8BYTE, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - - // Get the high result by shifting dst. - emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->GetRegNum(), 32); - } - else - { - assert(attr == EA_8BYTE); - // Compute the high result. - emitIns_R_R_R(INS_umulh, attr, extraReg, src1->GetRegNum(), src2->GetRegNum()); - - // Now multiply without skewing the high result. - emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - } - - // zero-sign bit comparison to detect overflow. - emitIns_R_I(INS_cmp, attr, extraReg, 0); - } - else + if (opt != INS_OPTS_LSL) { - int bitShift = 0; - if (attr == EA_4BYTE) - { - // Compute 8 byte results from 4 byte by 4 byte multiplication. - emitIns_R_R_R(INS_smull, EA_8BYTE, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - - // Get the high result by shifting dst. - emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->GetRegNum(), 32); - - bitShift = 31; - } - else - { - assert(attr == EA_8BYTE); - // Save the high result in a temporary register. - emitIns_R_R_R(INS_smulh, attr, extraReg, src1->GetRegNum(), src2->GetRegNum()); - - // Now multiply without skewing the high result. - emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - - bitShift = 63; - } - - // Sign bit comparison to detect overflow. - emitIns_R_R_I(INS_cmp, attr, extraReg, dst->GetRegNum(), bitShift, INS_OPTS_ASR); + emitDispExtendOpts(opt); + printf("("); + emitDispReg(reg, size, false); + printf(")"); } } - else + if (imm > 0) { - // We can just multiply. - emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + printf("*"); + emitDispImm(ssize_t{1} << imm, false); } } - - if (dst->gtOverflowEx()) - { - assert(!varTypeIsFloating(dst)); - codeGen->genCheckOverflow(dst); - } - - return dst->GetRegNum(); } -#if defined(DEBUG) || defined(LATE_DISASM) - -void emitter::getMemoryOperation(instrDesc* id, unsigned* pMemAccessKind, bool* pIsLocalAccess) +/***************************************************************************** + * + * Display an addressing operand [reg + imm] + */ +void emitter::emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm) { - unsigned memAccessKind = PERFSCORE_MEMORY_NONE; - bool isLocalAccess = false; - instruction ins = id->idIns(); + reg = encodingZRtoSP(reg); // ZR (R31) encodes the SP register - if (emitInsIsLoadOrStore(ins)) + if (strictArmAsm) { - if (emitInsIsLoad(ins)) + printf("["); + + emitDispReg(reg, EA_8BYTE, false); + + if (!insOptsPostIndex(opt) && (imm != 0)) { - if (emitInsIsStore(ins)) - { - memAccessKind = PERFSCORE_MEMORY_READ_WRITE; - } - else - { - memAccessKind = PERFSCORE_MEMORY_READ; - } + emitDispComma(); + emitDispImm(imm, false, true, true); } - else + printf("]"); + + if (insOptsPreIndex(opt)) { - assert(emitInsIsStore(ins)); - memAccessKind = PERFSCORE_MEMORY_WRITE; + printf("!"); } - - insFormat insFmt = id->idInsFmt(); - - switch (insFmt) + else if (insOptsPostIndex(opt)) { - case IF_LS_1A: - isLocalAccess = true; - break; + emitDispComma(); + emitDispImm(imm, false, true, true); + } + } + else // !strictArmAsm + { + printf("["); - case IF_LS_2A: - case IF_LS_2B: - case IF_LS_2C: - case IF_LS_2D: - case IF_LS_2E: - case IF_LS_2F: - case IF_LS_2G: - case IF_LS_3A: - case IF_LS_3F: - case IF_LS_3G: - if (isStackRegister(id->idReg2())) - { - isLocalAccess = true; - } - break; + const char* operStr = "++"; + if (imm < 0) + { + operStr = "--"; + imm = -imm; + } - case IF_LS_3B: - case IF_LS_3C: - case IF_LS_3D: - case IF_LS_3E: - if (isStackRegister(id->idReg3())) - { - isLocalAccess = true; - } - break; - case IF_LARGELDC: - isLocalAccess = false; - break; + if (insOptsPreIndex(opt)) + { + printf(operStr); + } - default: - assert(!"Logic Error"); - memAccessKind = PERFSCORE_MEMORY_NONE; - break; + emitDispReg(reg, EA_8BYTE, false); + + if (insOptsPostIndex(opt)) + { + printf(operStr); } - } - *pMemAccessKind = memAccessKind; - *pIsLocalAccess = isLocalAccess; + if (insOptsIndexed(opt)) + { + emitDispComma(); + } + else + { + printf("%c", operStr[1]); + } + emitDispImm(imm, false, true, true); + printf("]"); + } } -//---------------------------------------------------------------------------------------- -// getInsExecutionCharacteristics: -// Returns the current instruction execution characteristics -// -// Arguments: -// id - The current instruction descriptor to be evaluated -// -// Return Value: -// A struct containing the current instruction execution characteristics -// -// Notes: -// The instruction latencies and throughput values returned by this function -// are from -// -// The Arm Cortex-A55 Software Optimization Guide: -// https://static.docs.arm.com/epm128372/20/arm_cortex_a55_software_optimization_guide_v2.pdf -// -emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id) +/***************************************************************************** + * + * Display an addressing operand [reg + extended reg] + */ +void emitter::emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size) { - insExecutionCharacteristics result; - instruction ins = id->idIns(); - insFormat insFmt = id->idInsFmt(); + reg1 = encodingZRtoSP(reg1); // ZR (R31) encodes the SP register - unsigned memAccessKind; - bool isLocalAccess; - getMemoryOperation(id, &memAccessKind, &isLocalAccess); + unsigned scale = 0; + if (isScaled) + { + scale = NaturalScale_helper(size); + } - result.insThroughput = PERFSCORE_THROUGHPUT_ILLEGAL; - result.insLatency = PERFSCORE_LATENCY_ILLEGAL; + printf("["); - // Initialize insLatency based upon the instruction's memAccessKind and local access values - // - if (memAccessKind == PERFSCORE_MEMORY_READ) + if (strictArmAsm) { - result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_RD_STACK : PERFSCORE_LATENCY_RD_GENERAL; + emitDispReg(reg1, EA_8BYTE, true); + emitDispExtendReg(reg2, opt, scale); } - else if (memAccessKind == PERFSCORE_MEMORY_WRITE) + else // !strictArmAsm { - result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_WR_STACK : PERFSCORE_LATENCY_WR_GENERAL; + emitDispReg(reg1, EA_8BYTE, false); + printf("+"); + emitDispExtendReg(reg2, opt, scale); } - else if (memAccessKind == PERFSCORE_MEMORY_READ_WRITE) + + printf("]"); +} + +/***************************************************************************** + * + * Display (optionally) the instruction encoding in hex + */ + +void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz) +{ + if (!emitComp->opts.disCodeBytes) { - result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_RD_WR_STACK : PERFSCORE_LATENCY_RD_WR_GENERAL; + return; } - switch (insFmt) + // We do not display the instruction hex if we want diff-able disassembly + if (!emitComp->opts.disDiffable) { - // - // Branch Instructions - // - - case IF_BI_0A: // b, bl_local - case IF_BI_0C: // bl, b_tail - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // but is Dual Issue - result.insLatency = PERFSCORE_LATENCY_1C; - break; - - case IF_BI_0B: // beq, bne, bge, blt, bgt, ble, ... - case IF_BI_1A: // cbz, cbnz - case IF_BI_1B: // tbz, tbnz - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + if (sz == 4) + { + printf(" %08X ", (*((code_t*)code))); + } + else + { + printf(" "); + } + } +} - case IF_LARGEJMP: // bcc + b - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; +/***************************************************************************** + * + * Handles printing of LARGEJMP pseudo-instruction. + */ - case IF_BR_1B: // blr, br_tail - if (ins == INS_blr) - { - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - break; - } - // otherwise we should have a br_tail instruction - assert(ins == INS_br_tail); - FALLTHROUGH; - case IF_BR_1A: // ret, br - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - break; +void emitter::emitDispLargeJmp( + instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +{ + // Note: don't touch the actual instrDesc. If we accidentally messed it up, it would create a very + // difficult-to-find bug. - // - // Arithmetic and logical instructions - // + inlineInstrDesc idJmp; + instrDescJmp* pidJmp = idJmp.id(); - // ALU, basic - case IF_DR_3A: // add, adds, adc, adcs, and, ands, bic, bics, - // eon, eor, orn, orr, sub, subs, sbc, sbcs - // asr, asrv, lsl, lslv, lsr, lsrv, ror, rorv - // sdiv, udiv, mul, smull, smulh, umull, umulh, mneg - case IF_DR_2A: // cmp, cmn, tst + const instruction ins = id->idIns(); + instruction reverseIns; + insFormat reverseFmt; - switch (ins) - { - case INS_mul: - case INS_smull: - case INS_umull: - case INS_mneg: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + // Reverse the conditional instruction. + switch (ins) + { + case INS_cbz: + reverseIns = INS_cbnz; + reverseFmt = IF_BI_1A; + break; + case INS_cbnz: + reverseIns = INS_cbz; + reverseFmt = IF_BI_1A; + break; + case INS_tbz: + reverseIns = INS_tbnz; + reverseFmt = IF_BI_1B; + break; + case INS_tbnz: + reverseIns = INS_tbz; + reverseFmt = IF_BI_1B; + break; + default: + reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins))); + reverseFmt = IF_BI_0B; + } - case INS_smulh: - case INS_umulh: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; + pidJmp->idIns(reverseIns); + pidJmp->idInsFmt(reverseFmt); + pidJmp->idOpSize(id->idOpSize()); + pidJmp->idAddr()->iiaSetInstrCount(1); + pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // Share the idDebugOnlyInfo() field. - case INS_sdiv: - case INS_udiv: - if (id->idOpSize() == EA_4BYTE) - { - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_12C; - break; - } - else - { - assert(id->idOpSize() == EA_8BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_20C; - break; - } + const size_t bcondSizeOrZero = (pCode == NULL) ? 0 : 4; // Branch is 4 bytes. + emitDispInsHelp(pidJmp, false, doffs, asmfm, offset, pCode, bcondSizeOrZero, + NULL /* force display of pc-relative branch */); - case INS_add: - case INS_adds: - case INS_adc: - case INS_adcs: - case INS_and: - case INS_ands: - case INS_bic: - case INS_bics: - case INS_eon: - case INS_eor: - case INS_orn: - case INS_orr: - case INS_sub: - case INS_subs: - case INS_sbc: - case INS_sbcs: - case INS_asr: - case INS_lsl: - case INS_lsr: - case INS_ror: - case INS_cmp: - case INS_cmn: - case INS_tst: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + pCode += bcondSizeOrZero; + offset += 4; - case INS_asrv: - case INS_lslv: - case INS_lsrv: - case INS_rorv: - // variable shift by register - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + // Next, display the unconditional branch. - case INS_crc32b: - case INS_crc32h: - case INS_crc32cb: - case INS_crc32ch: - case INS_crc32x: - case INS_crc32cx: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + // Reset the local instrDesc. + memset(pidJmp, 0, sizeof(instrDescJmp)); - case INS_crc32w: - case INS_crc32cw: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + pidJmp->idIns(INS_b); + pidJmp->idInsFmt(IF_LARGEJMP); - case INS_smaddl: - case INS_smsubl: - case INS_smnegl: - case INS_umaddl: - case INS_umsubl: - case INS_umnegl: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + if (id->idIsBound()) + { + pidJmp->idSetIsBound(); + pidJmp->idAddr()->iiaIGlabel = id->idAddr()->iiaIGlabel; + } + else + { + pidJmp->idAddr()->iiaBBlabel = id->idAddr()->iiaBBlabel; + } - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; + pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // Share the idDebugOnlyInfo() field. - // ALU, basic immediate - case IF_DI_1A: // cmp, cmn - case IF_DI_1C: // tst - case IF_DI_1D: // mov reg, imm(N,r,s) - case IF_DI_1E: // adr, adrp - case IF_DI_1F: // ccmp, ccmn - case IF_DI_2A: // add, adds, suv, subs - case IF_DI_2C: // and, ands, eor, orr + const size_t brSizeOrZero = (pCode == NULL) ? 0 : 4; // Unconditional branch is 4 bytes. + emitDispInsHelp(pidJmp, isNew, doffs, asmfm, offset, pCode, brSizeOrZero, ig); +} - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; +/***************************************************************************** + * + * Wrapper for emitter::emitDispInsHelp() that handles special large jump + * pseudo-instruction. + */ - case IF_DR_2D: // cinc, cinv, cneg - case IF_DR_2E: // mov, neg, mvn, negs - case IF_DI_1B: // mov, movk, movn, movz +void emitter::emitDispIns( + instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +{ + // Special case: IF_LARGEJMP - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + if ((id->idInsFmt() == IF_LARGEJMP) && id->idIsBound()) + { + // This is a pseudo-instruction format representing a large conditional branch. See the comment + // in emitter::emitOutputLJ() for the full description. + // + // For this pseudo-instruction, we will actually generate: + // + // b L_not // 4 bytes. Note that we reverse the condition. + // b L_target // 4 bytes. + // L_not: + // + // These instructions don't exist in the actual instruction stream, so we need to fake them + // up to display them. + emitDispLargeJmp(id, isNew, doffs, asmfm, offset, pCode, sz, ig); + } + else + { + emitDispInsHelp(id, isNew, doffs, asmfm, offset, pCode, sz, ig); + } +} - case IF_LARGEADR: // adrp + add - case IF_LARGELDC: // adrp + ldr +//-------------------------------------------------------------------- +// emitDispInsHelp: Dump the given instruction to jitstdout. +// +// Arguments: +// id - The instruction +// isNew - Whether the instruction is newly generated (before encoding). +// doffs - If true, always display the passed-in offset. +// asmfm - Whether the instruction should be displayed in assembly format. +// If false some additional information may be printed for the instruction. +// offset - The offset of the instruction. Only displayed if doffs is true or if +// !isNew && !asmfm. +// code - Pointer to the actual code, used for displaying the address and encoded bytes +// if turned on. +// sz - The size of the instruction, used to display the encoded bytes. +// ig - The instruction group containing the instruction. +// +void emitter::emitDispInsHelp( + instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig) +{ +#ifdef DEBUG + if (EMITVERBOSE) + { + unsigned idNum = + id->idDebugOnlyInfo()->idNum; // Do not remove this! It is needed for VisualStudio conditional breakpoints - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + printf("IN%04x: ", idNum); + } +#endif - // ALU, shift by immediate - case IF_DR_3B: // add, adds, and, ands, bic, bics, - // eon, eor, orn, orr, sub, subs - case IF_DR_2B: // cmp, cmn, tst - case IF_DR_2F: // neg, negs, mvn - case IF_DI_2B: // ror - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + if (pCode == NULL) + { + sz = 0; + } - // ALU, extend, scale - case IF_DR_3C: // add, adc, and, bic, eon, eor, orn, orr, sub, sbc - case IF_DR_2C: // cmp - case IF_DV_2U: // sha1h - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - // ALU, Conditional select - case IF_DR_1D: // cset, csetm - case IF_DR_3D: // csel, csinc, csinv, csneg + if (!isNew && !asmfm && sz) + { + doffs = true; + } - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + /* Display the instruction address */ - // ALU, Conditional compare - case IF_DR_2I: // ccmp , ccmn + emitDispInsAddr(pCode); - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + /* Display the instruction offset */ - // Multiply accumulate - case IF_DR_4A: // madd, msub, smaddl, smsubl, umaddl, umsubl - if (id->idOpSize() == EA_4BYTE) - { - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - } - else - { - assert(id->idOpSize() == EA_8BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_5C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - } + emitDispInsOffs(offset, doffs); - // Miscellaneous Data Preocessing instructions - case IF_DR_3E: // extr - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + BYTE* pCodeRW = nullptr; + if (pCode != nullptr) + { + /* Display the instruction hex code */ + assert(((pCode >= emitCodeBlock) && (pCode < emitCodeBlock + emitTotalHotCodeSize)) || + ((pCode >= emitColdCodeBlock) && (pCode < emitColdCodeBlock + emitTotalColdCodeSize))); - case IF_DR_2H: // sxtb, sxth, sxtw, uxtb, uxth, sha1h - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + pCodeRW = pCode + writeableOffset; + } - case IF_DI_2D: // lsl, lsr, asr, sbfm, bfm, ubfm, sbfiz, bfi, ubfiz, sbfx, bfxil, ubfx - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + emitDispInsHex(id, pCodeRW, sz); - case IF_DR_2G: // mov sp, cls, clz, rbit, rev16, rev32, rev - if (ins == INS_rbit) - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - } - else - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; - } + printf(" "); - // - // Load/Store Instructions - // + /* Get the instruction and format */ - case IF_LS_1A: // ldr, ldrsw (literal, pc relative immediate) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + instruction ins = id->idIns(); + insFormat fmt = id->idInsFmt(); - case IF_LS_2A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (no immediate) - // ldar, ldarb, ldarh, ldapr, ldaprb, ldaprh, ldxr, ldxrb, ldxrh, - // ldaxr, ldaxrb, ldaxrh, stlr, stlrb, stlrh + emitDispInst(ins); - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - // ToDo: store release have 2/4 cycle latency - break; + /* If this instruction has just been added, check its size */ - case IF_LS_2B: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (scaled immediate) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + assert(isNew == false || (int)emitSizeOfInsDsc(id) == emitCurIGfreeNext - (BYTE*)id); - case IF_LS_2C: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh - // ldur, ldurb, ldurh, ldursb, ldursh, ldursw, stur, sturb, sturh - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + /* Figure out the operand size */ + emitAttr size = id->idOpSize(); + emitAttr attr = size; + if (id->idGCref() == GCT_GCREF) + attr = EA_GCREF; + else if (id->idGCref() == GCT_BYREF) + attr = EA_BYREF; - case IF_LS_3A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb strh (register extend, scale 2,4,8) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + switch (fmt) + { + ssize_t imm; + int doffs; + bitMaskImm bmi; + halfwordImm hwi; + condFlagsImm cfi; + unsigned scale; + unsigned immShift; + bool hasShift; + const char* methodName; + emitAttr elemsize; + emitAttr datasize; + emitAttr srcsize; + emitAttr dstsize; + ssize_t index; + ssize_t index2; + unsigned registerListSize; + const char* targetName; - case IF_LS_3B: // ldp, ldpsw, ldnp, stp, stnp (load/store pair zero offset) - case IF_LS_3C: // load/store pair with offset pre/post inc - if (memAccessKind == PERFSCORE_MEMORY_READ) + case IF_BI_0A: // BI_0A ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 + case IF_BI_0B: // BI_0B ......iiiiiiiiii iiiiiiiiiii..... simm19:00 + case IF_LARGEJMP: + { + if (fmt == IF_LARGEJMP) { - // ldp, ldpsw, ldnp - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - if (emitIGisInEpilog(emitCurIG) && (ins == INS_ldp)) + printf("(LARGEJMP)"); + } + if (id->idAddr()->iiaHasInstrCount()) + { + int instrCount = id->idAddr()->iiaGetInstrCount(); + + if (ig == nullptr) { - // Reduce latency for ldp instructions in the epilog - // - result.insLatency = PERFSCORE_LATENCY_2C; + printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount); } - else if (id->idOpSize() == EA_8BYTE) // X-form + else { - // the X-reg variant has an extra cycle of latency - // and two cycle throughput - result.insLatency += 1.0; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; + unsigned insNum = emitFindInsNum(ig, id); + UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1); + UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount); + ssize_t relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs)); + printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", (int)relOffs, (int)instrCount); } } - else // store instructions - { - // stp, stnp - assert(memAccessKind == PERFSCORE_MEMORY_WRITE); - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - } - break; - - case IF_LS_3D: // stxr, stxrb, stxrh, stlxr, stlxrb, srlxrh - // Store exclusive register, returning status - assert(emitInsIsStore(ins)); - // @ToDo - find out the actual latency - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = max(PERFSCORE_LATENCY_4C, result.insLatency); - break; - - case IF_LS_3E: // ARMv8.1 LSE Atomics - if (memAccessKind == PERFSCORE_MEMORY_WRITE) + else if (id->idIsBound()) { - // staddb, staddlb, staddh, staddlh, stadd. staddl - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; + emitPrintLabel(id->idAddr()->iiaIGlabel); } else { - assert(memAccessKind == PERFSCORE_MEMORY_READ_WRITE); - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = max(PERFSCORE_LATENCY_3C, result.insLatency); + printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum); } - break; - - case IF_LS_2D: - case IF_LS_2E: - case IF_LS_3F: - // Load/Store multiple structures - // Load single structure and replicate - switch (ins) - { - case INS_ld1: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - break; - - case INS_ld1_2regs: - case INS_ld2: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_6C; - } - break; - - case INS_ld1_3regs: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_5C; - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_6C; - result.insLatency = PERFSCORE_LATENCY_8C; - } - break; - - case INS_ld1_4regs: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_6C; - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_8C; - result.insLatency = PERFSCORE_LATENCY_10C; - } - break; - - case INS_ld3: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - if (optGetElemsize(id->idInsOpt()) == EA_4BYTE) - { - // S - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_5C; - } - else - { - // B/H - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_6C; - } - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - if ((optGetElemsize(id->idInsOpt()) == EA_4BYTE) || - (optGetElemsize(id->idInsOpt()) == EA_8BYTE)) - { - // S/D - result.insThroughput = PERFSCORE_THROUGHPUT_6C; - result.insLatency = PERFSCORE_LATENCY_8C; - } - else - { - // B/H - result.insThroughput = PERFSCORE_THROUGHPUT_7C; - result.insLatency = PERFSCORE_LATENCY_9C; - } - } - break; - - case INS_ld4: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - if (optGetElemsize(id->idInsOpt()) == EA_4BYTE) - { - // S - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_6C; - } - else - { - // B/H - result.insThroughput = PERFSCORE_THROUGHPUT_5C; - result.insLatency = PERFSCORE_LATENCY_7C; - } - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - if ((optGetElemsize(id->idInsOpt()) == EA_4BYTE) || - (optGetElemsize(id->idInsOpt()) == EA_8BYTE)) - { - // S/D - result.insThroughput = PERFSCORE_THROUGHPUT_8C; - result.insLatency = PERFSCORE_LATENCY_10C; - } - else - { - // B/H - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_11C; - } - } - break; - - case INS_ld1r: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - - case INS_ld2r: - if (id->idOpSize() == EA_8BYTE) - { - // D - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - else - { - // B/H/S - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - } - break; - - case INS_ld3r: - if (id->idOpSize() == EA_8BYTE) - { - // D - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_5C; - } - else - { - // B/H/S - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - break; + } + break; - case INS_ld4r: - if (id->idOpSize() == EA_8BYTE) - { - // D - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_6C; - } - else - { - // B/H/S - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - break; + case IF_BI_0C: // BI_0C ......iiiiiiiiii iiiiiiiiiiiiiiii simm26:00 + methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); + printf("%s", methodName); + break; - case INS_st1: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + case IF_BI_1A: // BI_1A ......iiiiiiiiii iiiiiiiiiiittttt Rt simm19:00 + case IF_BI_1B: // BI_1B B.......bbbbbiii iiiiiiiiiiittttt Rt imm6, simm14:00 + { + assert(insOptsNone(id->idInsOpt())); + emitDispReg(id->idReg1(), size, true); - case INS_st1_2regs: - case INS_st2: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - } - break; + if (fmt == IF_BI_1B) + { + emitDispImm(emitGetInsSC(id), true); + } - case INS_st1_3regs: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_3C; - } - break; + if (id->idAddr()->iiaHasInstrCount()) + { + int instrCount = id->idAddr()->iiaGetInstrCount(); - case INS_st1_4regs: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - } - else - { - // Q-form - assert(id->idOpSize() == EA_16BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - break; + if (ig == nullptr) + { + printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount); + } + else + { + unsigned insNum = emitFindInsNum(ig, id); + UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1); + UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount); + ssize_t relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs)); + printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", (int)relOffs, (int)instrCount); + } + } + else if (id->idIsBound()) + { + emitPrintLabel(id->idAddr()->iiaIGlabel); + } + else + { + printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum); + } + } + break; - case INS_st3: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + case IF_BR_1A: // BR_1A ................ ......nnnnn..... Rn + assert(insOptsNone(id->idInsOpt())); + emitDispReg(id->idReg1(), size, false); + break; - case INS_st4: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_3C; - } - else - { - assert(id->idOpSize() == EA_16BYTE); - if (optGetElemsize(id->idInsOpt()) == EA_8BYTE) - { - // D - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - else - { - // B/H/S - result.insThroughput = PERFSCORE_THROUGHPUT_5C; - result.insLatency = PERFSCORE_LATENCY_5C; - } - } - break; + case IF_BR_1B: // BR_1B ................ ......nnnnn..... Rn + // The size of a branch target is always EA_PTRSIZE + assert(insOptsNone(id->idInsOpt())); + emitDispReg(id->idReg3(), EA_PTRSIZE, false); + break; - default: - unreached(); + case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) + case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 + case IF_LARGELDC: + case IF_LARGEADR: + assert(insOptsNone(id->idInsOpt())); + emitDispReg(id->idReg1(), size, true); + imm = emitGetInsSC(id); + targetName = nullptr; + + /* Is this actually a reference to a data section? */ + if (fmt == IF_LARGEADR) + { + printf("(LARGEADR)"); + } + else if (fmt == IF_LARGELDC) + { + printf("(LARGELDC)"); } - break; - case IF_LS_2F: - case IF_LS_2G: - case IF_LS_3G: - // Load/Store single structure - switch (ins) + printf("["); + if (id->idAddr()->iiaIsJitDataOffset()) { - case INS_ld1: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + doffs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd); + /* Display a data section reference */ - case INS_ld2: - if (id->idOpSize() == EA_8BYTE) - { - // D - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - else - { - // B/H/S - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - } - break; + if (doffs & 1) + printf("@CNS%02u", doffs - 1); + else + printf("@RWD%02u", doffs); - case INS_ld3: - if (id->idOpSize() == EA_8BYTE) - { - // D - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_5C; - } - else + if (imm != 0) + printf("%+Id", imm); + } + else + { + assert(imm == 0); + if (id->idIsReloc()) + { + printf("HIGH RELOC "); + emitDispImm((ssize_t)id->idAddr()->iiaAddr, false); + size_t targetHandle = id->idDebugOnlyInfo()->idMemCookie; + +#ifdef DEBUG + if (targetHandle == THT_InitializeArrayIntrinsics) { - // B/H/S - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; + targetName = "InitializeArrayIntrinsics"; } - break; - - case INS_ld4: - if (id->idOpSize() == EA_8BYTE) + else if (targetHandle == THT_GSCookieCheck) { - // D - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - result.insLatency = PERFSCORE_LATENCY_6C; + targetName = "GlobalSecurityCookieCheck"; } - else + else if (targetHandle == THT_SetGSCookie) { - // B/H/S - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; + targetName = "SetGlobalSecurityCookie"; } - break; +#endif + } + else if (id->idIsBound()) + { + emitPrintLabel(id->idAddr()->iiaIGlabel); + } + else + { + printf("L_M%03u_" FMT_BB, emitComp->compMethodID, id->idAddr()->iiaBBlabel->bbNum); + } + } + printf("]"); + if (targetName != nullptr) + { + printf(" // [%s]", targetName); + } + else + { + emitDispCommentForHandle(id->idDebugOnlyInfo()->idMemCookie, 0, id->idDebugOnlyInfo()->idFlags); + } + break; + + case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn + assert(insOptsNone(id->idInsOpt())); + assert((emitGetInsSC(id) == 0) || id->idIsTlsGD()); + emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); + emitDispAddrRI(id->idReg2(), id->idInsOpt(), 0); + break; - case INS_st1: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095) + assert(insOptsNone(id->idInsOpt())); + imm = emitGetInsSC(id); + scale = NaturalScale_helper(emitInsLoadStoreSize(id)); + imm <<= scale; // The immediate is scaled by the size of the ld/st + emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); + emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm); + break; - case INS_st2: - if (id->idOpSize() == EA_8BYTE) - { - // D - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - } - else - { - // B/H/S - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; - } - break; + case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiPPnnnnnttttt Rt Rn imm(-256..+255) no/pre/post inc + assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt())); + imm = emitGetInsSC(id); + emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); + emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm); + break; - case INS_st3: - case INS_st4: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + case IF_LS_2D: // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn + case IF_LS_2E: // LS_2E .Q.............. ....ssnnnnnttttt Vt Rn + registerListSize = insGetRegisterListSize(id->idIns()); + emitDispVectorRegList(id->idReg1(), registerListSize, id->idInsOpt(), true); - default: - unreached(); + if (fmt == IF_LS_2D) + { + // Load/Store multiple structures base register + // Load single structure and replicate base register + emitDispAddrRI(id->idReg2(), INS_OPTS_NONE, 0); + } + else + { + // Load/Store multiple structures post-indexed by an immediate + // Load single structure and replicate post-indexed by an immediate + emitDispAddrRI(id->idReg2(), INS_OPTS_POST_INDEX, id->idSmallCns()); } break; - case IF_SN_0A: // nop, yield, align + case IF_LS_2F: // LS_2F .Q.............. xx.Sssnnnnnttttt Vt[] Rn + case IF_LS_2G: // LS_2G .Q.............. xx.Sssnnnnnttttt Vt[] Rn + registerListSize = insGetRegisterListSize(id->idIns()); + elemsize = id->idOpSize(); + emitDispVectorElemList(id->idReg1(), registerListSize, elemsize, id->idSmallCns(), true); - if (id->idIns() == INS_align) + if (fmt == IF_LS_2F) { - if ((id->idInsOpt() == INS_OPTS_NONE) || ((instrDescAlign*)id)->isPlacedAfterJmp) - { - // Either we're not going to generate 'align' instruction, or the 'align' - // instruction is placed immediately after unconditional jmp. - // In both cases, don't count for PerfScore. - - result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; - result.insLatency = PERFSCORE_LATENCY_ZERO; - break; - } + // Load/Store single structure base register + emitDispAddrRI(id->idReg2(), INS_OPTS_NONE, 0); } - else if (ins == INS_yield) + else { - // @ToDo - find out the actual latency, match x86/x64 for now - result.insThroughput = PERFSCORE_THROUGHPUT_140C; - result.insLatency = PERFSCORE_LATENCY_140C; + // Load/Store single structure post-indexed by an immediate + emitDispAddrRI(id->idReg2(), INS_OPTS_POST_INDEX, (registerListSize * elemsize)); } - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_ZERO; break; - case IF_SI_0B: // dmb, dsb, isb - // @ToDo - find out the actual latency - result.insThroughput = PERFSCORE_THROUGHPUT_10C; - result.insLatency = PERFSCORE_LATENCY_10C; + case IF_LS_3A: // LS_3A .X.......X.mmmmm oooS..nnnnnttttt Rt Rn Rm ext(Rm) LSL {} + assert(insOptsLSExtend(id->idInsOpt())); + emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); + if (id->idIsLclVar()) + { + emitDispAddrRRExt(id->idReg2(), codeGen->rsGetRsvdReg(), id->idInsOpt(), false, size); + } + else + { + emitDispAddrRRExt(id->idReg2(), id->idReg3(), id->idInsOpt(), id->idReg3Scaled(), size); + } break; - case IF_DV_2J: // fcvt Vd Vn - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; + case IF_LS_3B: // LS_3B X............... .aaaaannnnnddddd Rt Ra Rn + assert(insOptsNone(id->idInsOpt())); + assert(emitGetInsSC(id) == 0); + emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); + emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true); + emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0); break; - case IF_DV_2K: // fcmp Vd Vn - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; + case IF_LS_3C: // LS_3C X.........iiiiii iaaaaannnnnddddd Rt Ra Rn imm(im7,sh) + assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt())); + imm = emitGetInsSC(id); + scale = NaturalScale_helper(emitInsLoadStoreSize(id)); + imm <<= scale; + emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true); + emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true); + emitDispAddrRI(id->idReg3(), id->idInsOpt(), imm); break; - case IF_DV_1A: // fmov - immediate (scalar) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; + case IF_LS_3D: // LS_3D .X.......X.mmmmm ......nnnnnttttt Wm Rt Rn + assert(insOptsNone(id->idInsOpt())); + emitDispReg(id->idReg1(), EA_4BYTE, true); + emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true); + emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0); break; - case IF_DV_1B: // fmov, orr, bic, movi, mvni (immediate vector) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; + case IF_LS_3E: // LS_3E .X.........mmmmm ......nnnnnttttt Rm Rt Rn ARMv8.1 LSE Atomics + assert(insOptsNone(id->idInsOpt())); + assert((EA_SIZE(size) == 4) || (EA_SIZE(size) == 8)); + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0); break; - case IF_DV_1C: // fcmp vn, #0.0 - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + case IF_LS_3F: // LS_3F .Q.........mmmmm ....ssnnnnnttttt Vt Rn Rm + case IF_LS_3G: // LS_3G .Q.........mmmmm ...Sssnnnnnttttt Vt[] Rn Rm + registerListSize = insGetRegisterListSize(id->idIns()); - case IF_DV_2A: // fabs, fneg, fsqrt, fcvtXX, frintX, scvtf, ucvtf, fcmXX (vector) - switch (ins) + if (fmt == IF_LS_3F) { - case INS_fabs: - case INS_fneg: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = (id->idOpSize() == EA_8BYTE) ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_3C / 2; - break; + // Load/Store multiple structures post-indexed by a register + // Load single structure and replicate post-indexed by a register + emitDispVectorRegList(id->idReg1(), registerListSize, id->idInsOpt(), true); + } + else + { + // Load/Store single structure post-indexed by a register + elemsize = id->idOpSize(); + emitDispVectorElemList(id->idReg1(), registerListSize, elemsize, id->idSmallCns(), true); + } - case INS_fsqrt: - if ((id->idInsOpt() == INS_OPTS_2S) || (id->idInsOpt() == INS_OPTS_4S)) - { - // S-form - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_11C; - } - else - { - // D-form - assert(id->idInsOpt() == INS_OPTS_2D); - result.insThroughput = PERFSCORE_THROUGHPUT_6C; - result.insLatency = PERFSCORE_LATENCY_18C; - } - break; + printf("["); + emitDispReg(encodingZRtoSP(id->idReg2()), EA_8BYTE, false); + printf("], "); + emitDispReg(id->idReg3(), EA_8BYTE, false); + break; - case INS_fcvtas: - case INS_fcvtau: - case INS_fcvtms: - case INS_fcvtmu: - case INS_fcvtns: - case INS_fcvtnu: - case INS_fcvtps: - case INS_fcvtpu: - case INS_fcvtzs: - case INS_fcvtzu: - case INS_frinta: - case INS_frinti: - case INS_frintm: - case INS_frintn: - case INS_frintp: - case INS_frintx: - case INS_frintz: - case INS_scvtf: - case INS_ucvtf: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + case IF_DI_1A: // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh) + emitDispReg(id->idReg1(), size, true); + emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12); + emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); + break; - case INS_fcmeq: - case INS_fcmge: - case INS_fcmgt: - case INS_fcmle: - case INS_fcmlt: - case INS_frecpe: - case INS_frsqrte: - case INS_urecpe: - case INS_ursqrte: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + case IF_DI_1B: // DI_1B X........hwiiiii iiiiiiiiiiiddddd Rd imm(i16,hw) + emitDispReg(id->idReg1(), size, true); + hwi.immHWVal = (unsigned)emitGetInsSC(id); + if (ins == INS_mov) + { + emitDispImm(emitDecodeHalfwordImm(hwi, size), false); + } + else // movz, movn, movk + { + emitDispImm(hwi.immVal, false); + if (hwi.immHW != 0) + { + emitDispShiftOpts(INS_OPTS_LSL); + emitDispImm(hwi.immHW * 16, false); + } + } + emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); + break; - case INS_fcvtl: - case INS_fcvtl2: - case INS_fcvtn: - case INS_fcvtn2: - case INS_fcvtxn: - case INS_fcvtxn2: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + case IF_DI_1C: // DI_1C X........Nrrrrrr ssssssnnnnn..... Rn imm(N,r,s) + emitDispReg(id->idReg1(), size, true); + bmi.immNRS = (unsigned)emitGetInsSC(id); + emitDispImm(emitDecodeBitMaskImm(bmi, size), false); + emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); + break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } + case IF_DI_1D: // DI_1D X........Nrrrrrr ssssss.....ddddd Rd imm(N,r,s) + emitDispReg(encodingZRtoSP(id->idReg1()), size, true); + bmi.immNRS = (unsigned)emitGetInsSC(id); + emitDispImm(emitDecodeBitMaskImm(bmi, size), false); + emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); break; - case IF_DV_2G: // fmov, fabs, fneg, fsqrt, fcmXX, fcvtXX, frintX, scvtf, ucvtf (scalar) - switch (ins) + case IF_DI_2A: // DI_2A X.......shiiiiii iiiiiinnnnnddddd Rd Rn imm(i12,sh) + if ((ins == INS_add) || (ins == INS_sub)) { - case INS_fmov: - // FP move, vector register - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + emitDispReg(encodingZRtoSP(id->idReg1()), size, true); + emitDispReg(encodingZRtoSP(id->idReg2()), size, true); + } + else + { + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + } + if (id->idIsReloc()) + { + assert(ins == INS_add); + printf("[LOW RELOC "); + emitDispImm((ssize_t)id->idAddr()->iiaAddr, false); + printf("]"); + } + else + { + emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12); + } + break; - case INS_fabs: - case INS_fneg: + case IF_DI_2B: // DI_2B X........X.nnnnn ssssssnnnnnddddd Rd Rn imm(0-63) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispImm(emitGetInsSC(id), false); + break; - case INS_fcvtas: - case INS_fcvtau: - case INS_fcvtms: - case INS_fcvtmu: - case INS_fcvtns: - case INS_fcvtnu: - case INS_fcvtps: - case INS_fcvtpu: - case INS_fcvtzs: - case INS_fcvtzu: - case INS_scvtf: - case INS_ucvtf: + case IF_DI_2C: // DI_2C X........Nrrrrrr ssssssnnnnnddddd Rd Rn imm(N,r,s) + if (ins == INS_ands) + { + emitDispReg(id->idReg1(), size, true); + } + else + { + emitDispReg(encodingZRtoSP(id->idReg1()), size, true); + } + emitDispReg(id->idReg2(), size, true); + bmi.immNRS = (unsigned)emitGetInsSC(id); + emitDispImm(emitDecodeBitMaskImm(bmi, size), false); + break; - case INS_frinta: - case INS_frinti: - case INS_frintm: - case INS_frintn: - case INS_frintp: - case INS_frintx: - case INS_frintz: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + case IF_DI_2D: // DI_2D X........Nrrrrrr ssssssnnnnnddddd Rd Rn imr, ims (N,r,s) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); - case INS_fcvtxn: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + imm = emitGetInsSC(id); + bmi.immNRS = (unsigned)imm; + + switch (ins) + { + case INS_bfm: + case INS_sbfm: + case INS_ubfm: + emitDispImm(bmi.immR, true); + emitDispImm(bmi.immS, false); break; - case INS_fcmeq: - case INS_fcmge: - case INS_fcmgt: - case INS_fcmle: - case INS_fcmlt: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; + case INS_bfi: + case INS_sbfiz: + case INS_ubfiz: + emitDispImm(getBitWidth(size) - bmi.immR, true); + emitDispImm(bmi.immS + 1, false); break; - case INS_frecpe: - case INS_frecpx: - case INS_frsqrte: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; + case INS_bfxil: + case INS_sbfx: + case INS_ubfx: + emitDispImm(bmi.immR, true); + emitDispImm(bmi.immS - bmi.immR + 1, false); break; - case INS_fsqrt: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_19C; - result.insLatency = PERFSCORE_LATENCY_22C; - } - else - { - // S-form - assert(id->idOpSize() == EA_4BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_12C; - } + case INS_asr: + case INS_lsr: + case INS_lsl: + emitDispImm(imm, false); break; default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + assert(!"Unexpected instruction in IF_DI_2D"); } - break; - case IF_DV_2Q: // faddp, fmaxnmp, fmaxp, fminnmp, fminp (scalar) - case IF_DV_2R: // fmaxnmv, fmaxv, fminnmv, fminv - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_DV_2S: // addp (scalar) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_3C; + case IF_DI_1F: // DI_1F X..........iiiii cccc..nnnnn.nzcv Rn imm5 nzcv cond + emitDispReg(id->idReg1(), size, true); + cfi.immCFVal = (unsigned)emitGetInsSC(id); + emitDispImm(cfi.imm5, true); + emitDispFlags(cfi.flags); + emitDispComma(); + emitDispCond(cfi.cond); break; - case IF_DV_3B: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX - // faddp, fmaxnmp, fmaxp, fminnmp, fminp, addp (vector) - switch (ins) - { - case INS_fmin: - case INS_fminnm: - case INS_fmax: - case INS_fmaxnm: - case INS_fabd: - case INS_fadd: - case INS_fsub: - case INS_fmul: - case INS_fmulx: - case INS_fmla: - case INS_fmls: - case INS_frecps: - case INS_frsqrts: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + case IF_DR_1D: // DR_1D X............... cccc.......mmmmm Rd cond + emitDispReg(id->idReg1(), size, true); + cfi.immCFVal = (unsigned)emitGetInsSC(id); + emitDispCond(cfi.cond); + break; - case INS_faddp: - case INS_fmaxnmp: - case INS_fmaxp: - case INS_fminnmp: - case INS_fminp: - if (id->idOpSize() == EA_16BYTE) - { - // Q-form - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - else - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; - } - break; + case IF_DR_2A: // DR_2A X..........mmmmm ......nnnnn..... Rn Rm + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, false); + break; - case INS_facge: - case INS_facgt: - case INS_fcmeq: - case INS_fcmge: - case INS_fcmgt: - case INS_fcmle: - case INS_fcmlt: - if (id->idOpSize() == EA_16BYTE) - { - // Q-form - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - } - else - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - } - break; + case IF_DR_2B: // DR_2B X.......sh.mmmmm ssssssnnnnn..... Rn Rm {LSL,LSR,ASR,ROR} imm(0-63) + emitDispReg(id->idReg1(), size, true); + emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size); + break; - case INS_fdiv: - if ((id->idInsOpt() == INS_OPTS_2S) || (id->idInsOpt() == INS_OPTS_4S)) - { - // S-form - result.insThroughput = PERFSCORE_THROUGHPUT_10C; - result.insLatency = PERFSCORE_LATENCY_13C; - } - else - { - // D-form - assert(id->idInsOpt() == INS_OPTS_2D); - result.insThroughput = PERFSCORE_THROUGHPUT_10C; - result.insLatency = PERFSCORE_LATENCY_22C; - } - break; + case IF_DR_2C: // DR_2C X..........mmmmm ooosssnnnnn..... Rn Rm ext(Rm) LSL imm(0-4) + emitDispReg(encodingZRtoSP(id->idReg1()), size, true); + imm = emitGetInsSC(id); + emitDispExtendReg(id->idReg2(), id->idInsOpt(), imm); + break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } + case IF_DR_2D: // DR_2D X..........nnnnn cccc..nnnnnddddd Rd Rn cond + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + cfi.immCFVal = (unsigned)emitGetInsSC(id); + emitDispCond(cfi.cond); break; - case IF_DV_3AI: // mul, mla, mls (vector by element) - case IF_DV_3BI: // fmul, fmulx, fmla, fmls (vector by element) - case IF_DV_3EI: // sqdmlal, sqdmlsl, sqdmulh, sqdmull (scalar by element) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + case IF_DR_2E: // DR_2E X..........mmmmm ...........ddddd Rd Rm + case IF_DV_2U: // DV_2U ................ ......nnnnnddddd Sd Sn + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, false); break; - case IF_DV_4A: // fmadd, fmsub, fnmadd, fnsub (scalar) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; + case IF_DR_2F: // DR_2F X.......sh.mmmmm ssssss.....ddddd Rd Rm {LSL,LSR,ASR} imm(0-63) + emitDispReg(id->idReg1(), size, true); + emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size); break; - case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar) - switch (ins) + case IF_DR_2G: // DR_2G X............... ......nnnnnddddd Rd Rn + emitDispReg(encodingZRtoSP(id->idReg1()), size, true); + emitDispReg(encodingZRtoSP(id->idReg2()), size, false); + break; + + case IF_DR_2H: // DR_2H X........X...... ......nnnnnddddd Rd Rn + if ((ins == INS_uxtb) || (ins == INS_uxth)) { - case INS_fadd: - case INS_fsub: - case INS_fabd: - case INS_fmax: - case INS_fmaxnm: - case INS_fmin: - case INS_fminnm: - case INS_fmul: - case INS_fmulx: - case INS_fnmul: - case INS_frecps: - case INS_frsqrts: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + // There is no 64-bit variant of uxtb and uxth + // However, we allow idOpSize() to have EA_8BYTE value for these instruction + emitDispReg(id->idReg1(), EA_4BYTE, true); + emitDispReg(id->idReg2(), EA_4BYTE, false); + } + else + { + emitDispReg(id->idReg1(), size, true); + // sxtb, sxth and sxtb always operate on 32-bit source register + emitDispReg(id->idReg2(), EA_4BYTE, false); + } + break; - case INS_facge: - case INS_facgt: - case INS_fcmeq: - case INS_fcmge: - case INS_fcmgt: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + case IF_DR_2I: // DR_2I X..........mmmmm cccc..nnnnn.nzcv Rn Rm nzcv cond + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + cfi.immCFVal = (unsigned)emitGetInsSC(id); + emitDispFlags(cfi.flags); + emitDispComma(); + emitDispCond(cfi.cond); + break; - case INS_fdiv: - if (id->idOpSize() == EA_8BYTE) - { - // D-form - result.insThroughput = PERFSCORE_THROUGHPUT_6C; - result.insLatency = PERFSCORE_LATENCY_15C; - } - else - { - // S-form - assert(id->idOpSize() == EA_4BYTE); - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_10C; - } - break; + case IF_DR_3A: // DR_3A X..........mmmmm ......nnnnnmmmmm Rd Rn Rm + if ((ins == INS_add) || (ins == INS_sub)) + { + emitDispReg(encodingZRtoSP(id->idReg1()), size, true); + emitDispReg(encodingZRtoSP(id->idReg2()), size, true); + } + else if ((ins == INS_smulh) || (ins == INS_umulh)) + { + size = EA_8BYTE; + // smulh Xd, Xn, Xm + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + } + else if ((ins == INS_smull) || (ins == INS_umull) || (ins == INS_smnegl) || (ins == INS_umnegl)) + { + // smull Xd, Wn, Wm + emitDispReg(id->idReg1(), EA_8BYTE, true); + size = EA_4BYTE; + emitDispReg(id->idReg2(), size, true); + } + else + { + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + } - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + if (id->idIsLclVar()) + { + emitDispReg(codeGen->rsGetRsvdReg(), size, false); + } + else + { + emitDispReg(id->idReg3(), size, false); } + break; - case IF_DV_2H: // fmov, fcvtXX - to general - // fmov : FP transfer to general register - // fcvtaXX : FP convert from vector to general - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_3C; + case IF_DR_3B: // DR_3B X.......sh.mmmmm ssssssnnnnnddddd Rd Rn Rm {LSL,LSR,ASR} imm(0-63) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispShiftedReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id), size); break; - case IF_DV_2I: // fmov, Xcvtf - from general - switch (ins) - { - case INS_fmov: - // FP transfer from general register - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + case IF_DR_3C: // DR_3C X..........mmmmm ooosssnnnnnddddd Rd Rn Rm ext(Rm) LSL imm(0-4) + emitDispReg(encodingZRtoSP(id->idReg1()), size, true); + emitDispReg(encodingZRtoSP(id->idReg2()), size, true); + imm = emitGetInsSC(id); + emitDispExtendReg(id->idReg3(), id->idInsOpt(), imm); + break; - case INS_scvtf: - case INS_ucvtf: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_5C; - break; + case IF_DR_3D: // DR_3D X..........mmmmm cccc..nnnnnmmmmm Rd Rn Rm cond + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispReg(id->idReg3(), size, true); + cfi.immCFVal = (unsigned)emitGetInsSC(id); + emitDispCond(cfi.cond); + break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + case IF_DR_3E: // DR_3E X........X.mmmmm ssssssnnnnnddddd Rd Rn Rm imm(0-63) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispReg(id->idReg3(), size, true); + emitDispImm(emitGetInsSC(id), false); + break; + + case IF_DR_4A: // DR_4A X..........mmmmm .aaaaannnnnmmmmm Rd Rn Rm Ra + if ((ins == INS_smaddl) || (ins == INS_smsubl) || (ins == INS_umaddl) || (ins == INS_umsubl)) + { + // smaddl Xd, Wn, Wm, Xa + emitDispReg(id->idReg1(), EA_8BYTE, true); + emitDispReg(id->idReg2(), EA_4BYTE, true); + emitDispReg(id->idReg3(), EA_4BYTE, true); + emitDispReg(id->idReg4(), EA_8BYTE, false); + } + else + { + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispReg(id->idReg3(), size, true); + emitDispReg(id->idReg4(), size, false); } break; - case IF_DV_3C: // mov,and, bic, eor, mov,mvn, orn, bsl, bit, bif, - // tbl, tbx (vector) - switch (ins) + case IF_DV_1A: // DV_1A .........X.iiiii iii........ddddd Vd imm8 (fmov - immediate scalar) + elemsize = id->idOpSize(); + emitDispReg(id->idReg1(), elemsize, true); + emitDispFloatImm(emitGetInsSC(id)); + break; + + case IF_DV_1B: // DV_1B .QX..........iii cmod..iiiiiddddd Vd imm8 (immediate vector) + imm = emitGetInsSC(id) & 0x0ff; + immShift = (emitGetInsSC(id) & 0x700) >> 8; + hasShift = (immShift != 0); + elemsize = optGetElemsize(id->idInsOpt()); + if (id->idInsOpt() == INS_OPTS_1D) { - case INS_tbl: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; - case INS_tbl_2regs: - result.insThroughput = PERFSCORE_THROUGHPUT_3X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_tbl_3regs: - result.insThroughput = PERFSCORE_THROUGHPUT_4X; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - case INS_tbl_4regs: - result.insThroughput = PERFSCORE_THROUGHPUT_3X; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_tbx: - result.insThroughput = PERFSCORE_THROUGHPUT_3X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_tbx_2regs: - result.insThroughput = PERFSCORE_THROUGHPUT_4X; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - case INS_tbx_3regs: - result.insThroughput = PERFSCORE_THROUGHPUT_5X; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_tbx_4regs: - result.insThroughput = PERFSCORE_THROUGHPUT_6X; - result.insLatency = PERFSCORE_LATENCY_5C; - break; - default: - // All other instructions - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + assert(elemsize == size); + emitDispReg(id->idReg1(), size, true); + } + else + { + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + } + if (ins == INS_fmov) + { + emitDispFloatImm(imm); + assert(hasShift == false); + } + else + { + if (elemsize == EA_8BYTE) + { + assert(ins == INS_movi); + ssize_t imm64 = 0; + const ssize_t mask8 = 0xFF; + for (unsigned b = 0; b < 8; b++) + { + if (imm & (ssize_t{1} << b)) + { + imm64 |= (mask8 << (b * 8)); + } + } + emitDispImm(imm64, hasShift, true); + } + else + { + emitDispImm(imm, hasShift, true); + } + if (hasShift) + { + insOpts opt = (immShift & 0x4) ? INS_OPTS_MSL : INS_OPTS_LSL; + unsigned shift = (immShift & 0x3) * 8; + emitDispShiftOpts(opt); + emitDispImm(shift, false); + } } break; - case IF_DV_2E: // mov, dup (scalar) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_1C: // DV_1C .........X...... ......nnnnn..... Vn #0.0 (fcmp - with zero) + elemsize = id->idOpSize(); + emitDispReg(id->idReg1(), elemsize, true); + emitDispFloatZero(); break; - case IF_DV_2F: // mov, ins (element) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_2A: // DV_2A .Q.......X...... ......nnnnnddddd Vd Vn (fabs, fcvt - vector) + if (emitInsIsVectorLong(ins)) + { + emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + } + else if (emitInsIsVectorNarrow(ins)) + { + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), false); + } + else + { + assert(!emitInsIsVectorWide(ins)); + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + } + if (ins == INS_fcmeq || ins == INS_fcmge || ins == INS_fcmgt || ins == INS_fcmle || ins == INS_fcmlt) + { + emitDispComma(); + emitDispFloatZero(); + } break; - case IF_DV_2B: // smov, umov - to general) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); break; - case IF_DV_2C: // mov, dup, ins - from general) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - if (ins == INS_dup) + case IF_DV_2M: // DV_2M .Q......XX...... ......nnnnnddddd Vd Vn (abs, neg - vector) + if (emitInsIsVectorNarrow(ins)) { - result.insLatency = PERFSCORE_LATENCY_3C; + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), false); } else { - assert((ins == INS_ins) || (ins == INS_mov)); - result.insLatency = PERFSCORE_LATENCY_2C; + assert(!emitInsIsVectorLong(ins) && !emitInsIsVectorWide(ins)); + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + } + if (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt) + { + emitDispComma(); + emitDispImm(0, false); } break; - case IF_DV_2D: // dup (dvector) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar) + elemsize = id->idOpSize(); + if (emitInsIsVectorLong(ins)) + { + emitDispReg(id->idReg1(), widenDatasize(elemsize), true); + emitDispReg(id->idReg2(), elemsize, true); + } + else if (emitInsIsVectorNarrow(ins)) + { + emitDispReg(id->idReg1(), elemsize, true); + emitDispReg(id->idReg2(), widenDatasize(elemsize), true); + } + else + { + assert(!emitInsIsVectorWide(ins)); + emitDispReg(id->idReg1(), elemsize, true); + emitDispReg(id->idReg2(), elemsize, true); + } + imm = emitGetInsSC(id); + emitDispImm(imm, false); break; - case IF_DV_3A: // (vector) - // add, sub, mul, mla, mls, cmeq, cmge, cmgt, cmhi, cmhs, ctst, - // pmul, saba, uaba, sabd, uabd, umin, uminp, umax, umaxp, smin, sminp, smax, smaxp - switch (ins) + case IF_DV_2O: // DV_2O .Q.......iiiiiii ......nnnnnddddd Vd Vn imm (shift - vector) + if ((ins == INS_sxtl) || (ins == INS_sxtl2) || (ins == INS_uxtl) || (ins == INS_uxtl2)) { - case INS_add: - case INS_sub: - case INS_cmeq: - case INS_cmge: - case INS_cmgt: - case INS_cmhi: - case INS_cmhs: - case INS_shadd: - case INS_shsub: - case INS_srhadd: - case INS_srshl: - case INS_sshl: - case INS_smax: - case INS_smaxp: - case INS_smin: - case INS_sminp: - case INS_umax: - case INS_umaxp: - case INS_umin: - case INS_uminp: - case INS_uhadd: - case INS_uhsub: - case INS_urhadd: - case INS_urshl: - case INS_ushl: - case INS_uzp1: - case INS_uzp2: - case INS_zip1: - case INS_zip2: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - - case INS_trn1: - case INS_trn2: - if (id->idInsOpt() == INS_OPTS_2D) - { - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - } - else - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - } - - result.insLatency = PERFSCORE_LATENCY_2C; - break; - - case INS_addp: - case INS_cmtst: - case INS_pmul: - case INS_sabd: - case INS_sqadd: - case INS_sqsub: - case INS_uabd: - case INS_uqadd: - case INS_uqsub: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - - case INS_mla: - case INS_mls: - case INS_mul: - case INS_sqdmulh: - case INS_sqrdmulh: - case INS_sqrshl: - case INS_sqshl: - case INS_uqrshl: - case INS_uqshl: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - - case INS_saba: - case INS_uaba: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - - case INS_sdot: - case INS_udot: - result.insLatency = PERFSCORE_LATENCY_4C; - if (id->idOpSize() == EA_16BYTE) - { - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - } - else - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - } - break; - - case INS_addhn: - case INS_addhn2: - case INS_sabdl: - case INS_sabdl2: - case INS_saddl2: - case INS_saddl: - case INS_saddw: - case INS_saddw2: - case INS_ssubl: - case INS_ssubl2: - case INS_ssubw: - case INS_ssubw2: - case INS_subhn: - case INS_subhn2: - case INS_uabdl: - case INS_uabdl2: - case INS_uaddl: - case INS_uaddl2: - case INS_uaddw: - case INS_uaddw2: - case INS_usubl: - case INS_usubl2: - case INS_usubw: - case INS_usubw2: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - - case INS_raddhn: - case INS_raddhn2: - case INS_rsubhn: - case INS_rsubhn2: - case INS_sabal: - case INS_sabal2: - case INS_uabal: - case INS_uabal2: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - - case INS_smlal: - case INS_smlal2: - case INS_smlsl: - case INS_smlsl2: - case INS_smull: - case INS_smull2: - case INS_sqdmlal: - case INS_sqdmlal2: - case INS_sqdmlsl: - case INS_sqdmlsl2: - case INS_sqdmull: - case INS_sqdmull2: - case INS_sqrdmlah: - case INS_sqrdmlsh: - case INS_umlal: - case INS_umlal2: - case INS_umlsl: - case INS_umlsl2: - case INS_umull: - case INS_umull2: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + assert((emitInsIsVectorLong(ins))); + emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + } + else + { + if (emitInsIsVectorLong(ins)) + { + emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + } + else if (emitInsIsVectorNarrow(ins)) + { + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true); + } + else + { + assert(!emitInsIsVectorWide(ins)); + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + } - case INS_pmull: - case INS_pmull2: - if ((id->idInsOpt() == INS_OPTS_8B) || (id->idInsOpt() == INS_OPTS_16B)) - { - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - } - else - { - // Crypto polynomial (64x64) multiply long - assert((id->idInsOpt() == INS_OPTS_1D) || (id->idInsOpt() == INS_OPTS_2D)); - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - } - break; + imm = emitGetInsSC(id); + emitDispImm(imm, false); + } + break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + case IF_DV_2B: // DV_2B .Q.........iiiii ......nnnnnddddd Rd Vn[] (umov/smov - to general) + srcsize = id->idOpSize(); + index = emitGetInsSC(id); + if (ins == INS_smov) + { + dstsize = EA_8BYTE; + } + else // INS_umov or INS_mov + { + dstsize = (srcsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE; } + emitDispReg(id->idReg1(), dstsize, true); + emitDispVectorRegIndex(id->idReg2(), srcsize, index, false); break; - case IF_DV_3DI: // fmul, fmulx, fmla, fmls (scalar by element) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + case IF_DV_2C: // DV_2C .Q.........iiiii ......nnnnnddddd Vd Rn (dup/ins - vector from general) + if (ins == INS_dup) + { + datasize = id->idOpSize(); + assert(isValidVectorDatasize(datasize)); + assert(isValidArrangement(datasize, id->idInsOpt())); + elemsize = optGetElemsize(id->idInsOpt()); + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + } + else // INS_ins + { + elemsize = id->idOpSize(); + index = emitGetInsSC(id); + assert(isValidVectorElemsize(elemsize)); + emitDispVectorRegIndex(id->idReg1(), elemsize, index, true); + } + emitDispReg(id->idReg2(), (elemsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE, false); break; - case IF_DV_3E: // add, sub, cmeq, cmge, cmgt, cmhi, cmhs, ctst, (scalar) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_2D: // DV_2D .Q.........iiiii ......nnnnnddddd Vd Vn[] (dup - vector) + datasize = id->idOpSize(); + assert(isValidVectorDatasize(datasize)); + assert(isValidArrangement(datasize, id->idInsOpt())); + elemsize = optGetElemsize(id->idInsOpt()); + index = emitGetInsSC(id); + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorRegIndex(id->idReg2(), elemsize, index, false); break; - case IF_DV_3G: // ext - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_2E: // DV_2E ...........iiiii ......nnnnnddddd Vd Vn[] (dup - scalar) + elemsize = id->idOpSize(); + index = emitGetInsSC(id); + emitDispReg(id->idReg1(), elemsize, true); + emitDispVectorRegIndex(id->idReg2(), elemsize, index, false); break; - case IF_DV_2L: // abs, neg, cmeq, cmge, cmgt, cmle, cmlt (scalar) - case IF_DV_2M: // (vector) - // abs, neg, mvn, not, cmeq, cmge, cmgt, cmle, cmlt, - // addv, saddlv, uaddlv, smaxv, sminv, umaxv, uminv - // cls, clz, cnt, rbit, rev16, rev32, rev64, - // xtn, xtn2, shll, shll2 - switch (ins) - { - case INS_abs: - case INS_sqneg: - case INS_suqadd: - case INS_usqadd: - if (id->idOpSize() == EA_16BYTE) - { - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - } - else - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - } + case IF_DV_2F: // DV_2F ...........iiiii .jjjj.nnnnnddddd Vd[] Vn[] (ins - element) + imm = emitGetInsSC(id); + index = (imm >> 4) & 0xf; + index2 = imm & 0xf; + elemsize = id->idOpSize(); + emitDispVectorRegIndex(id->idReg1(), elemsize, index, true); + emitDispVectorRegIndex(id->idReg2(), elemsize, index2, false); + break; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + case IF_DV_2G: // DV_2G .........X...... ......nnnnnddddd Vd Vn (fmov, fcvtXX - register) + case IF_DV_2K: // DV_2K .........X.mmmmm ......nnnnn..... Vn Vm (fcmp) + case IF_DV_2L: // DV_2L ........XX...... ......nnnnnddddd Vd Vn (abs, neg - scalar) + size = id->idOpSize(); + if ((ins == INS_fcmeq) || (ins == INS_fcmge) || (ins == INS_fcmgt) || (ins == INS_fcmle) || + (ins == INS_fcmlt)) + { + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispFloatZero(); + } + else if (emitInsIsVectorNarrow(ins)) + { + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), widenDatasize(size), false); + } + else + { + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, false); + } + if (fmt == IF_DV_2L && + (ins == INS_cmeq || ins == INS_cmge || ins == INS_cmgt || ins == INS_cmle || ins == INS_cmlt)) + { + emitDispComma(); + emitDispImm(0, false); + } + break; - case INS_addv: - case INS_saddlv: - case INS_uaddlv: - case INS_cls: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov, fcvtXX - to general) + case IF_DV_2I: // DV_2I X........X...... ......nnnnnddddd Vd Rn (fmov, Xcvtf - from general) + case IF_DV_2J: // DV_2J ........SS.....D D.....nnnnnddddd Vd Vn (fcvt) + dstsize = optGetDstsize(id->idInsOpt()); + srcsize = optGetSrcsize(id->idInsOpt()); - case INS_sminv: - case INS_smaxv: - case INS_uminv: - case INS_umaxv: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + emitDispReg(id->idReg1(), dstsize, true); + emitDispReg(id->idReg2(), srcsize, false); + break; - case INS_cmeq: - case INS_cmge: - case INS_cmgt: - case INS_cmle: - case INS_cmlt: + case IF_DV_2Q: // DV_2Q .........X...... ......nnnnnddddd Sd Vn (faddp, fmaxnmp, fmaxp, fminnmp, + // fminp - scalar) + case IF_DV_2R: // DV_2R .Q.......X...... ......nnnnnddddd Sd Vn (fmaxnmv, fmaxv, fminnmv, fminv) + case IF_DV_2S: // DV_2S ........XX...... ......nnnnnddddd Sd Vn (addp - scalar) + case IF_DV_2T: // DV_2T .Q......XX...... ......nnnnnddddd Sd Vn (addv, saddlv, smaxv, sminv, uaddlv, + // umaxv, uminv) + if ((ins == INS_sadalp) || (ins == INS_saddlp) || (ins == INS_uadalp) || (ins == INS_uaddlp)) + { + emitDispVectorReg(id->idReg1(), optWidenDstArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + } + else + { + if ((ins == INS_saddlv) || (ins == INS_uaddlv)) + { + elemsize = optGetElemsize(optWidenDstArrangement(id->idInsOpt())); + } + else + { + elemsize = optGetElemsize(id->idInsOpt()); + } + emitDispReg(id->idReg1(), elemsize, true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), false); + } + break; - case INS_clz: - case INS_cnt: - case INS_rbit: - case INS_rev16: - case INS_rev32: - case INS_rev64: - case INS_xtn: - case INS_xtn2: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + case IF_DV_3A: // DV_3A .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) + if ((ins == INS_sdot) || (ins == INS_udot)) + { + // sdot/udot Vd.2s, Vn.8b, Vm.8b + // sdot/udot Vd.4s, Vn.16b, Vm.16b + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + size = id->idOpSize(); + emitDispVectorReg(id->idReg2(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, true); + emitDispVectorReg(id->idReg3(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, false); + } + else if (((ins == INS_pmull) && (id->idInsOpt() == INS_OPTS_1D)) || + ((ins == INS_pmull2) && (id->idInsOpt() == INS_OPTS_2D))) + { + // pmull Vd.1q, Vn.1d, Vm.1d + // pmull2 Vd.1q, Vn.2d, Vm.2d + printf("%s.1q, ", emitVectorRegName(id->idReg1())); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + } + else if (emitInsIsVectorNarrow(ins)) + { + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg3(), optWidenElemsizeArrangement(id->idInsOpt()), false); + } + else + { + if (emitInsIsVectorLong(ins)) + { + emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + } + else if (emitInsIsVectorWide(ins)) + { + emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true); + } + else + { + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + } - case INS_mvn: - case INS_not: - case INS_neg: - case INS_shll: - case INS_shll2: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_1C; - break; + emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + } + break; - case INS_sqabs: - case INS_sqxtn: - case INS_sqxtn2: - case INS_sqxtun: - case INS_sqxtun2: - case INS_uqxtn: - case INS_uqxtn2: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + case IF_DV_3AI: // DV_3AI .Q......XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) + if ((ins == INS_sdot) || (ins == INS_udot)) + { + // sdot/udot Vd.2s, Vn.8b, Vm.4b[index] + // sdot/udot Vd.4s, Vn.16b, Vm.4b[index] + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + size = id->idOpSize(); + emitDispVectorReg(id->idReg2(), (size == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B, true); + index = emitGetInsSC(id); + printf("%s.4b[%d]", emitVectorRegName(id->idReg3()), (int)index); + } + else + { + if (emitInsIsVectorLong(ins)) + { + emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + } + else if (emitInsIsVectorWide(ins)) + { + emitDispVectorReg(id->idReg1(), optWidenElemsizeArrangement(id->idInsOpt()), true); + emitDispVectorReg(id->idReg2(), optWidenElemsizeArrangement(id->idInsOpt()), true); + } + else + { + assert(!emitInsIsVectorNarrow(ins)); + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + } - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + elemsize = optGetElemsize(id->idInsOpt()); + index = emitGetInsSC(id); + emitDispVectorRegIndex(id->idReg3(), elemsize, index, false); } break; - case IF_DV_2N: // sshr, ssra, srshr, srsra, shl, ushr, usra, urshr, ursra, sri, sli (shift by immediate - - // scalar) - case IF_DV_2O: // sshr, ssra, srshr, srsra, shl, ushr, usra, urshr, ursra, sri, sli (shift by immediate - - // vector) - // sshll, sshll2, ushll, ushll2, shrn, shrn2, rshrn, rshrn2, sxrl, sxl2, uxtl, uxtl2 + case IF_DV_3B: // DV_3B .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + break; + + case IF_DV_3C: // DV_3C .Q.........mmmmm ......nnnnnddddd Vd Vn Vm (vector) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); switch (ins) { - case INS_shl: - case INS_shrn: - case INS_shrn2: - case INS_sli: - case INS_sri: - case INS_sshr: - case INS_ushr: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_2C; + case INS_tbl: + case INS_tbl_2regs: + case INS_tbl_3regs: + case INS_tbl_4regs: + case INS_tbx: + case INS_tbx_2regs: + case INS_tbx_3regs: + case INS_tbx_4regs: + registerListSize = insGetRegisterListSize(ins); + emitDispVectorRegList(id->idReg2(), registerListSize, INS_OPTS_16B, true); break; - - case INS_shll: - case INS_shll2: - case INS_sshll: - case INS_sshll2: - case INS_ushll: - case INS_ushll2: - case INS_sxtl: - case INS_sxtl2: - case INS_uxtl: - case INS_uxtl2: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; + case INS_mov: break; - - case INS_rshrn: - case INS_rshrn2: - case INS_srshr: - case INS_sqshrn: - case INS_sqshrn2: - case INS_ssra: - case INS_urshr: - case INS_uqshrn: - case INS_uqshrn2: - case INS_usra: - if (id->idOpSize() == EA_16BYTE) - { - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - } - else - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_3C; - } + default: + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); break; + } + emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + break; - case INS_srsra: - case INS_ursra: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + case IF_DV_3BI: // DV_3BI .Q........Lmmmmm ....H.nnnnnddddd Vd Vn Vm[] (vector by element) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + elemsize = optGetElemsize(id->idInsOpt()); + emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false); + break; - case INS_sqrshrn: - case INS_sqrshrn2: - case INS_sqrshrun: - case INS_sqrshrun2: - case INS_sqshrun: - case INS_sqshrun2: - case INS_sqshl: - case INS_sqshlu: - case INS_uqrshrn: - case INS_uqrshrn2: - case INS_uqshl: - if (id->idOpSize() == EA_16BYTE) - { - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - } - else - { - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_4C; - } - break; + case IF_DV_3D: // DV_3D .........X.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispReg(id->idReg3(), size, false); + break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + case IF_DV_3E: // DV_3E ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (scalar) + if (emitInsIsVectorLong(ins)) + { + emitDispReg(id->idReg1(), widenDatasize(size), true); + } + else + { + assert(!emitInsIsVectorNarrow(ins) && !emitInsIsVectorWide(ins)); + emitDispReg(id->idReg1(), size, true); } + + emitDispReg(id->idReg2(), size, true); + emitDispReg(id->idReg3(), size, false); break; - case IF_DV_2P: // aese, aesd, aesmc, aesimc, sha1su1, sha256su0 - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) + if (emitInsIsVectorLong(ins)) + { + emitDispReg(id->idReg1(), widenDatasize(size), true); + } + else + { + assert(!emitInsIsVectorNarrow(ins) && !emitInsIsVectorWide(ins)); + emitDispReg(id->idReg1(), size, true); + } + emitDispReg(id->idReg2(), size, true); + elemsize = id->idOpSize(); + index = emitGetInsSC(id); + emitDispVectorRegIndex(id->idReg3(), elemsize, index, false); break; - case IF_DV_3F: // sha1c, sha1m, sha1p, sha1su0, sha256h, sha256h2, sha256su1 (vector) - switch (ins) + case IF_DV_3F: // DV_3F ..........mmmmm ......nnnnnddddd Vd Vn Vm (vector) + if ((ins == INS_sha1c) || (ins == INS_sha1m) || (ins == INS_sha1p)) { - case INS_sha1su0: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + // Qd, Sn, Vm (vector) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), EA_4BYTE, true); + emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + } + else if ((ins == INS_sha256h) || (ins == INS_sha256h2)) + { + // Qd Qn Vm (vector) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + } + else // INS_sha1su0, INS_sha256su1 + { + // Vd, Vn, Vm (vector) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg3(), id->idInsOpt(), false); + } + break; - case INS_sha256su0: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + case IF_DV_3DI: // DV_3DI .........XLmmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + elemsize = size; + emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false); + break; - case INS_sha1c: - case INS_sha1m: - case INS_sha1p: - case INS_sha256h: - case INS_sha256h2: - case INS_sha256su1: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) + emitDispVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg2(), id->idInsOpt(), true); + emitDispVectorReg(id->idReg3(), id->idInsOpt(), true); + emitDispImm(emitGetInsSC(id), false); + break; + + case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) + emitDispReg(id->idReg1(), size, true); + emitDispReg(id->idReg2(), size, true); + emitDispReg(id->idReg3(), size, true); + emitDispReg(id->idReg4(), size, false); + break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + case IF_SN_0A: // SN_0A ................ ................ + if (ins == INS_align) + { + instrDescAlign* alignInstrId = (instrDescAlign*)id; + printf("[%d bytes", id->idIsEmptyAlign() ? 0 : INSTR_ENCODED_SIZE); + + // targetIG is only set for 1st of the series of align instruction + if ((alignInstrId->idaLoopHeadPredIG != nullptr) && (alignInstrId->loopHeadIG() != nullptr)) + { + printf(" for IG%02u", alignInstrId->loopHeadIG()->igNum); + } + printf("]"); } break; - case IF_SI_0A: // brk imm16 - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; + case IF_SI_0A: // SI_0A ...........iiiii iiiiiiiiiii..... imm16 + emitDispImm(emitGetInsSC(id), false); break; - case IF_SR_1A: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_1C; + case IF_SI_0B: // SI_0B ................ ....bbbb........ imm4 - barrier + emitDispBarrier((insBarrier)emitGetInsSC(id)); break; - case IF_DV_2T: // addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv - switch (ins) + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) + if (ins == INS_mrs_tpid0) { - case INS_addv: - case INS_saddlv: - case INS_uaddlv: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + emitDispReg(id->idReg1(), size, true); + printf("tpidr_el0"); + } + else + { + emitDispReg(id->idReg1(), size, false); + } + break; - case INS_smaxv: - case INS_sminv: - case INS_umaxv: - case INS_uminv: - case INS_sha256h2: - case INS_sha256su1: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + default: + // fallback to display SVE instructions. + emitDispInsSveHelp(id); + break; + } - case INS_sadalp: - case INS_uadalp: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + if (id->idIsLclVar()) + { + printf("\t// "); + emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), + id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + if (id->idIsLclVarPair()) + { + printf(", "); + emitLclVarAddr* iiaLclVar2 = emitGetLclVarPairLclVar2(id); + emitDispFrameRef(iiaLclVar2->lvaVarNum(), iiaLclVar2->lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs2, + asmfm); + } + } - case INS_saddlp: - case INS_uaddlp: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + printf("\n"); +} - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; +/***************************************************************************** + * + * Display a stack frame reference. + */ - // SVE latencies from Arm Neoverse N2 Software Optimization Guide, Issue 5.0, Revision: r0p3 +void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) +{ +#ifdef DEBUG + printf("["); - // Predicate logical - case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) - result.insLatency = PERFSCORE_LATENCY_1C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + if (varx < 0) + printf("TEMP_%02u", -varx); + else + emitComp->gtDispLclVar(+varx, false); - // Arithmetic, basic - case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) - // Max/min, basic and pairwise - case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; + if (disp < 0) + printf("-0x%02x", -disp); + else if (disp > 0) + printf("+0x%02x", +disp); - // Divides, 32 bit (Note: worse for 64 bit) - case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) - result.insLatency = PERFSCORE_LATENCY_12C; // 7 to 12 - result.insThroughput = PERFSCORE_THROUGHPUT_11C; // 1/11 to 1/7 - break; + printf("]"); - // Multiply, B, H, S element size (Note: D element size is slightly slower) - case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + if ((varx >= 0) && emitComp->opts.varNames && (((IL_OFFSET)offs) != BAD_IL_OFFSET)) + { + const char* varName = emitComp->compLocalVarName(varx, offs); - // Reduction, logical - case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) - result.insLatency = PERFSCORE_LATENCY_6C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + if (varName) + { + printf("'%s", varName); - case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated) - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + if (disp < 0) + printf("-%d", -disp); + else if (disp > 0) + printf("+%d", +disp); - // Reduction, arithmetic, D form (worse for B, S and H) - case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) - // Reduction, arithmetic, D form (worse for B, S and H) - case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + printf("'"); + } + } +#endif +} - case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated) - switch (ins) +// Generate code for a load or store operation with a potentially complex addressing mode +// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*scale + offset] +// Since Arm64 does not directly support this complex of an addressing mode +// we may generates up to three instructions for this for Arm64 +// +void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir) +{ + GenTree* addr = indir->Addr(); + + if (addr->isContained()) + { + assert(addr->OperIs(GT_LCL_ADDR, GT_LEA) || (addr->IsIconHandle(GTF_ICON_TLS_HDL))); + + int offset = 0; + DWORD lsl = 0; + + if (addr->OperGet() == GT_LEA) + { + offset = addr->AsAddrMode()->Offset(); + if (addr->AsAddrMode()->gtScale > 0) { - case INS_sve_asr: - case INS_sve_lsl: - case INS_sve_lsr: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_srshr: - case INS_sve_sqshl: - case INS_sve_urshr: - case INS_sve_sqshlu: - case INS_sve_uqshl: - case INS_sve_asrd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + assert(isPow2(addr->AsAddrMode()->gtScale)); + BitScanForward(&lsl, addr->AsAddrMode()->gtScale); } - break; + } - // Arithmetic, shift - case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) - case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + GenTree* memBase = indir->Base(); - // Count/reverse bits - // Arithmetic, basic - // Floating point absolute value/difference - // Floating point arithmetic - // Logical - case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; + if (indir->HasIndex()) + { + GenTree* index = indir->Index(); - case IF_SVE_AQ_3A: - switch (ins) + if (offset != 0) { - // Arithmetic, basic - case INS_sve_abs: - case INS_sve_neg: - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; + regNumber tmpReg = indir->GetSingleTempReg(); - // Extend, sign or zero - case INS_sve_sxtb: - case INS_sve_sxth: - case INS_sve_sxtw: - case INS_sve_uxtb: - case INS_sve_uxth: - case INS_sve_uxtw: - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + emitAttr addType = varTypeIsGC(memBase) ? EA_BYREF : EA_PTRSIZE; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; + if (emitIns_valid_imm_for_add(offset, EA_8BYTE)) + { + if (lsl > 0) + { + // Generate code to set tmpReg = base + index*scale + emitIns_R_R_R_I(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum(), lsl, + INS_OPTS_LSL); + } + else // no scale + { + // Generate code to set tmpReg = base + index + emitIns_R_R_R(INS_add, addType, tmpReg, memBase->GetRegNum(), index->GetRegNum()); + } - case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend - // (predicated) - case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand - // (predicated) - case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) - case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high - // (unpredicated) - case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) - case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) - case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) - case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) - case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_5C; - break; + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); - case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GN_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + // Then load/store dataReg from/to [tmpReg + offset] + emitIns_R_R_I(ins, attr, dataReg, tmpReg, offset); + } + else // large offset + { + // First load/store tmpReg with the large offset constant + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); + // Then add the base register + // rd = rd + base + emitIns_R_R_R(INS_add, addType, tmpReg, tmpReg, memBase->GetRegNum()); + + noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg)); + noway_assert(tmpReg != index->GetRegNum()); + + // Then load/store dataReg from/to [tmpReg + index*scale] + emitIns_R_R_R_I(ins, attr, dataReg, tmpReg, index->GetRegNum(), lsl, INS_OPTS_LSL); + } + } + else // (offset == 0) + { + if (lsl > 0) + { + // Then load/store dataReg from/to [memBase + index*scale] + emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum(), INS_OPTS_LSL, lsl); + } + else // no scale + { + if (index->OperIs(GT_BFIZ, GT_CAST) && index->isContained()) + { + // Then load/store dataReg from/to [memBase + index*scale with sign/zero extension] + GenTreeCast* cast; + int cns; + + if (index->OperIs(GT_BFIZ)) + { + cast = index->gtGetOp1()->AsCast(); + cns = (int)index->gtGetOp2()->AsIntCon()->IconValue(); + } + else + { + cast = index->AsCast(); + cns = 0; + } - case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + // For now, this code only supports extensions from i32/u32 + assert(cast->isContained()); - case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - switch (ins) - { - case INS_sve_fdot: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_bfdot: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(), + cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW, cns); + } + else + { + // Then load/store dataReg from/to [memBase + index] + emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum()); + } + } } - break; - - case IF_SVE_HA_3A: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - switch (ins) + } + else // no Index register + { + if (addr->OperIs(GT_LCL_ADDR)) { - case INS_sve_fdot: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_bfdot: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); + unsigned lclNum = varNode->GetLclNum(); + unsigned offset = varNode->GetLclOffs(); + if (emitInsIsStore(ins)) + { + emitIns_S_R(ins, attr, dataReg, lclNum, offset); + } + else + { + emitIns_R_S(ins, attr, dataReg, lclNum, offset); + } } - break; - - case IF_SVE_HB_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long - switch (ins) + else if (addr->IsIconHandle(GTF_ICON_TLS_HDL)) { - case INS_sve_fmlalb: - case INS_sve_fmlalt: - case INS_sve_fmlslb: - case INS_sve_fmlslt: - case INS_sve_bfmlalb: - case INS_sve_bfmlalt: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_bfmlslb: - case INS_sve_bfmlslt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + // On Arm64, TEB is in r18, so load from the r18 as base. + emitIns_R_R_I(ins, attr, dataReg, REG_R18, addr->AsIntCon()->IconValue()); } - break; - - case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations - switch (ins) + else if (emitIns_valid_imm_for_ldst_offset(offset, emitTypeSize(indir->TypeGet()))) { - case INS_sve_eor3: - case INS_sve_bcax: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_bsl: - case INS_sve_bsl1n: - case INS_sve_bsl2n: - case INS_sve_nbsl: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + // Then load/store dataReg from/to [memBase + offset] + emitIns_R_R_I(ins, attr, dataReg, memBase->GetRegNum(), offset); } - break; - - case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) - case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) - case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) - case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) - case IF_SVE_CA_3A: // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads - case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp - case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords) - case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp - case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) - case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - case IF_SVE_GO_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long - case IF_SVE_GW_3B: // ...........mmmmm ......nnnnnddddd -- SVE FP clamp - case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product - case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate - case IF_SVE_HK_3B: // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - - case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) - case IF_SVE_BR_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments - case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments - case IF_SVE_BZ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) - case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) - case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long - case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide - case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved - case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long - case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part - case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment) - case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) - case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector) - case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment - case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + else + { + // We require a tmpReg to hold the offset + regNumber tmpReg = indir->GetSingleTempReg(); - case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_6C; - break; + // First load/store tmpReg with the large offset constant + codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset); - case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long - switch (ins) - { - case INS_sve_smullb: - case INS_sve_smullt: - case INS_sve_umullb: - case INS_sve_umullt: - case INS_sve_sqdmullb: - case INS_sve_sqdmullt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_pmullb: - case INS_sve_pmullt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + // Then load/store dataReg from/to [memBase + tmpReg] + emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), tmpReg); } - break; + } + } + else // addr is not contained, so we evaluate it into a register + { +#ifdef DEBUG + if (addr->OperIs(GT_LCL_ADDR)) + { + // If the local var is a gcref or byref, the local var better be untracked, because we have + // no logic here to track local variable lifetime changes, like we do in the contained case + // above. E.g., for a `str r0,[r1]` for byref `r1` to local `V01`, we won't store the local + // `V01` and so the emitter can't update the GC lifetime for `V01` if this is a variable birth. + LclVarDsc* varDsc = emitComp->lvaGetDesc(addr->AsLclVarCommon()); + assert(!varDsc->lvTracked); + } +#endif // DEBUG - case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register - // increment) - case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate - // increment) - case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register - // increment) - case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate - // increment) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_8C; - break; + // Then load/store dataReg from/to [addrReg] + emitIns_R_R(ins, attr, dataReg, addr->GetRegNum()); + } +} - case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation - case IF_SVE_BH_3B: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation - case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; +// The callee must call genConsumeReg() for any non-contained srcs +// and genProduceReg() for any non-contained dsts. - case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count - case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count - case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count - case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count - case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count - case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) - case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) - case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) - case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated) - case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; +regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src) +{ + // dst can only be a reg + assert(!dst->isContained()); - case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient - case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + // src can be immed or reg + assert(!src->isContained() || src->isContainedIntOrIImmed()); - case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) - case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long - case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) - case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + // find immed (if any) - it cannot be a dst + GenTreeIntConCommon* intConst = nullptr; + if (src->isContainedIntOrIImmed()) + { + intConst = src->AsIntConCommon(); + } - case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) - case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated) - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + if (intConst) + { + emitIns_R_I(ins, attr, dst->GetRegNum(), intConst->IconValue()); + return dst->GetRegNum(); + } + else + { + emitIns_R_R(ins, attr, dst->GetRegNum(), src->GetRegNum()); + return dst->GetRegNum(); + } +} - case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; +// The callee must call genConsumeReg() for any non-contained srcs +// and genProduceReg() for any non-contained dsts. - case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector - case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector - case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector - case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector - case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector - case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector - case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector - case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector - result.insThroughput = PERFSCORE_THROUGHPUT_140C; // @ToDo currently undocumented - result.insLatency = PERFSCORE_LATENCY_140C; - break; +regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2) +{ + // dst can only be a reg + assert(!dst->isContained()); - case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register - case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_5C; - break; + // find immed (if any) - it cannot be a dst + // Only one src can be an int. + GenTreeIntConCommon* intConst = nullptr; + GenTree* nonIntReg = nullptr; - case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements - case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements - case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + if (varTypeIsFloating(dst)) + { + // src1 can only be a reg + assert(!src1->isContained()); + // src2 can only be a reg + assert(!src2->isContained()); + } + else // not floating point + { + // src2 can be immed or reg + assert(!src2->isContained() || src2->isContainedIntOrIImmed()); - // Conditional extract operations, SIMD&FP scalar and vector forms - case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements - case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector - case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + // Check src2 first as we can always allow it to be a contained immediate + if (src2->isContainedIntOrIImmed()) + { + intConst = src2->AsIntConCommon(); + nonIntReg = src1; + } + // Only for commutative operations do we check src1 and allow it to be a contained immediate + else if (dst->OperIsCommutative()) + { + // src1 can be immed or reg + assert(!src1->isContained() || src1->isContainedIntOrIImmed()); + + // Check src1 and allow it to be a contained immediate + if (src1->isContainedIntOrIImmed()) + { + assert(!src2->isContainedIntOrIImmed()); + intConst = src1->AsIntConCommon(); + nonIntReg = src2; + } + } + else + { + // src1 can only be a reg + assert(!src1->isContained()); + } + } + + bool isMulOverflow = false; + if (dst->gtOverflowEx()) + { + if ((ins == INS_add) || (ins == INS_adds)) + { + ins = INS_adds; + } + else if ((ins == INS_sub) || (ins == INS_subs)) + { + ins = INS_subs; + } + else if (ins == INS_mul) + { + isMulOverflow = true; + assert(intConst == nullptr); // overflow format doesn't support an int constant operand + } + else + { + assert(!"Invalid ins for overflow check"); + } + } + if (intConst != nullptr) + { + emitIns_R_R_I(ins, attr, dst->GetRegNum(), nonIntReg->GetRegNum(), intConst->IconValue()); + } + else + { + if (isMulOverflow) + { + regNumber extraReg = dst->GetSingleTempReg(); + assert(extraReg != dst->GetRegNum()); - // Conditional extract operations, scalar form - case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register - result.insLatency = PERFSCORE_LATENCY_8C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + if ((dst->gtFlags & GTF_UNSIGNED) != 0) + { + if (attr == EA_4BYTE) + { + // Compute 8 byte results from 4 byte by 4 byte multiplication. + emitIns_R_R_R(INS_umull, EA_8BYTE, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - // Copy, scalar SIMD&FP or imm - case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector - // (predicated) - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; + // Get the high result by shifting dst. + emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->GetRegNum(), 32); + } + else + { + assert(attr == EA_8BYTE); + // Compute the high result. + emitIns_R_R_R(INS_umulh, attr, extraReg, src1->GetRegNum(), src2->GetRegNum()); - // Copy, scalar - case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated) - result.insLatency = PERFSCORE_LATENCY_5C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + // Now multiply without skewing the high result. + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + } - case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords - result.insThroughput = PERFSCORE_THROUGHPUT_140C; // @ToDo Currently undocumented. - result.insLatency = PERFSCORE_LATENCY_140C; - break; + // zero-sign bit comparison to detect overflow. + emitIns_R_I(INS_cmp, attr, extraReg, 0); + } + else + { + int bitShift = 0; + if (attr == EA_4BYTE) + { + // Compute 8 byte results from 4 byte by 4 byte multiplication. + emitIns_R_R_R(INS_smull, EA_8BYTE, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive) - case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive) - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + // Get the high result by shifting dst. + emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->GetRegNum(), 32); - case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated) - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + bitShift = 31; + } + else + { + assert(attr == EA_8BYTE); + // Save the high result in a temporary register. + emitIns_R_R_R(INS_smulh, attr, extraReg, src1->GetRegNum(), src2->GetRegNum()); - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors - case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors - case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate - case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate - case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) - case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) - case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) - case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) - case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) - case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) - case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) - case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) - case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) - case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) - case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) - case IF_SVE_EH_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated) - case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long - case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long - case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long - case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate - case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + // Now multiply without skewing the high result. + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); - case IF_SVE_GJ_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations - switch (ins) - { - case INS_sve_rax1: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_sm4ekey: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; + bitShift = 63; + } - case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed) - switch (ins) - { - case INS_sve_fmlalb: - case INS_sve_fmlalt: - case INS_sve_fmlslb: - case INS_sve_fmlslt: - case INS_sve_bfmlalb: - case INS_sve_bfmlalt: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_bfmlslb: - case INS_sve_bfmlslt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + // Sign bit comparison to detect overflow. + emitIns_R_R_I(INS_cmp, attr, extraReg, dst->GetRegNum(), bitShift, INS_OPTS_ASR); } - break; - - case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + } + else + { + // We can just multiply. + emitIns_R_R_R(ins, attr, dst->GetRegNum(), src1->GetRegNum(), src2->GetRegNum()); + } + } - case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations - switch (ins) - { - case INS_sve_mov: - case INS_sve_and: - case INS_sve_orr: - case INS_sve_eor: - case INS_sve_bic: - case INS_sve_orn: - case INS_sve_not: - case INS_sve_sel: - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + if (dst->gtOverflowEx()) + { + assert(!varTypeIsFloating(dst)); + codeGen->genCheckOverflow(dst); + } - case INS_sve_bics: - case INS_sve_eors: - case INS_sve_nots: - case INS_sve_ands: - case INS_sve_orrs: - case INS_sve_orns: - case INS_sve_nors: - case INS_sve_nands: - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + return dst->GetRegNum(); +} - case INS_sve_nor: - case INS_sve_nand: - result.insLatency = PERFSCORE_LATENCY_1C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; +#if defined(DEBUG) || defined(LATE_DISASM) - case INS_sve_movs: - result.insLatency = PERFSCORE_LATENCY_1C; - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - break; +void emitter::getMemoryOperation(instrDesc* id, unsigned* pMemAccessKind, bool* pIsLocalAccess) +{ + unsigned memAccessKind = PERFSCORE_MEMORY_NONE; + bool isLocalAccess = false; + instruction ins = id->idIns(); - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + if (emitInsIsLoadOrStore(ins)) + { + if (emitInsIsLoad(ins)) + { + if (emitInsIsStore(ins)) + { + memAccessKind = PERFSCORE_MEMORY_READ_WRITE; } - break; - - case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition - case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition - switch (ins) + else { - case INS_sve_brkpa: - case INS_sve_brkpb: - case INS_sve_brkn: - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - - case INS_sve_brkpas: - case INS_sve_brkpbs: - case INS_sve_brkns: - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + memAccessKind = PERFSCORE_MEMORY_READ; } - break; - - case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + } + else + { + assert(emitInsIsStore(ins)); + memAccessKind = PERFSCORE_MEMORY_WRITE; + } - case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + insFormat insFmt = id->idInsFmt(); - case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + switch (insFmt) + { + case IF_LS_1A: + isLocalAccess = true; + break; - case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize - switch (ins) - { - case INS_sve_ptrue: - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + case IF_LS_2A: + case IF_LS_2B: + case IF_LS_2C: + case IF_LS_2D: + case IF_LS_2E: + case IF_LS_2F: + case IF_LS_2G: + case IF_LS_3A: + case IF_LS_3F: + case IF_LS_3G: + if (isStackRegister(id->idReg2())) + { + isLocalAccess = true; + } + break; - case INS_sve_ptrues: - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + case IF_LS_3B: + case IF_LS_3C: + case IF_LS_3D: + case IF_LS_3E: + if (isStackRegister(id->idReg3())) + { + isLocalAccess = true; + } + break; + case IF_LARGELDC: + isLocalAccess = false; + break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; + default: + assert(!"Logic Error"); + memAccessKind = PERFSCORE_MEMORY_NONE; + break; + } + } - case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + *pMemAccessKind = memAccessKind; + *pIsLocalAccess = isLocalAccess; +} - case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated) - switch (ins) - { - case INS_sve_rdffr: - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; +//---------------------------------------------------------------------------------------- +// getInsExecutionCharacteristics: +// Returns the current instruction execution characteristics +// +// Arguments: +// id - The current instruction descriptor to be evaluated +// +// Return Value: +// A struct containing the current instruction execution characteristics +// +// Notes: +// The instruction latencies and throughput values returned by this function +// are from +// +// The Arm Cortex-A55 Software Optimization Guide: +// https://static.docs.arm.com/epm128372/20/arm_cortex_a55_software_optimization_guide_v2.pdf +// +emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(instrDesc* id) +{ + insExecutionCharacteristics result; + instruction ins = id->idIns(); + insFormat insFmt = id->idInsFmt(); - case INS_sve_rdffrs: - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; + unsigned memAccessKind; + bool isLocalAccess; + getMemoryOperation(id, &memAccessKind, &isLocalAccess); - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; + result.insThroughput = PERFSCORE_THROUGHPUT_ILLEGAL; + result.insLatency = PERFSCORE_LATENCY_ILLEGAL; - case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated) - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; + // Initialize insLatency based upon the instruction's memAccessKind and local access values + // + if (memAccessKind == PERFSCORE_MEMORY_READ) + { + result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_RD_STACK : PERFSCORE_LATENCY_RD_GENERAL; + } + else if (memAccessKind == PERFSCORE_MEMORY_WRITE) + { + result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_WR_STACK : PERFSCORE_LATENCY_WR_GENERAL; + } + else if (memAccessKind == PERFSCORE_MEMORY_READ_WRITE) + { + result.insLatency = isLocalAccess ? PERFSCORE_LATENCY_RD_WR_STACK : PERFSCORE_LATENCY_RD_WR_GENERAL; + } - case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + switch (insFmt) + { + // + // Branch Instructions + // - case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test + case IF_BI_0A: // b, bl_local + case IF_BI_0C: // bl, b_tail + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // but is Dual Issue result.insLatency = PERFSCORE_LATENCY_1C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; - - case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; - - case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match - case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - - // Extract/insert operation, SIMD and FP scalar form - case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; - // Extract/insert operation, scalar - case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register - result.insLatency = PERFSCORE_LATENCY_5C; + case IF_BI_0B: // beq, bne, bge, blt, bgt, ble, ... + case IF_BI_1A: // cbz, cbnz + case IF_BI_1B: // tbz, tbnz result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - // Count/reverse bits - // Reverse, vector - case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; - - // Arithmetic, pairwise add - // Max/min, basic and pairwise - case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_LARGEJMP: // bcc + b + result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; break; - case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated) - switch (ins) + case IF_BR_1B: // blr, br_tail + if (ins == INS_blr) { - // Arithmetic, complex - case INS_sve_sqabs: - case INS_sve_sqneg: - // Reciprocal estimate - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; - - // Reciprocal estimate - case INS_sve_urecpe: - case INS_sve_ursqrte: - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; - - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_1C; + break; } - break; - - // Arithmetic, complex - case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; - - // Arithmetic, shift complex - case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left - // (predicated) - // Arithmetic, pairwise add and accum long - case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long - case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - - case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements + // otherwise we should have a br_tail instruction + assert(ins == INS_br_tail); + FALLTHROUGH; + case IF_BR_1A: // ret, br result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - - // Floating point arithmetic - // Floating point min/max pairwise - case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - // Floating point reduction, F64. (Note: Worse for F32 and F16) - case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction - result.insLatency = PERFSCORE_LATENCY_2C; - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - break; + // + // Arithmetic and logical instructions + // - // Floating point associative add, F64. (Note: Worse for F32 and F16) - case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; + // ALU, basic + case IF_DR_3A: // add, adds, adc, adcs, and, ands, bic, bics, + // eon, eor, orn, orr, sub, subs, sbc, sbcs + // asr, asrv, lsl, lslv, lsr, lsrv, ror, rorv + // sdiv, udiv, mul, smull, smulh, umull, umulh, mneg + case IF_DR_2A: // cmp, cmn, tst - case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) switch (ins) { - case INS_sve_frecps: - case INS_sve_frsqrts: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - - case INS_sve_fmul: - case INS_sve_ftsmul: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; + case INS_mul: + case INS_smull: + case INS_umull: + case INS_mneg: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_fadd: - case INS_sve_fsub: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; + case INS_smulh: + case INS_umulh: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; + case INS_sdiv: + case INS_udiv: + if (id->idOpSize() == EA_4BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_12C; + break; + } + else + { + assert(id->idOpSize() == EA_8BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_20C; + break; + } - case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) - switch (ins) - { - // Floating point absolute value/difference - case INS_sve_fabd: - // Floating point min/max - case INS_sve_fmax: - case INS_sve_fmaxnm: - case INS_sve_fmin: - case INS_sve_fminnm: - // Floating point arithmetic - case INS_sve_fadd: - case INS_sve_fsub: - case INS_sve_fsubr: - result.insLatency = PERFSCORE_LATENCY_2C; + case INS_add: + case INS_adds: + case INS_adc: + case INS_adcs: + case INS_and: + case INS_ands: + case INS_bic: + case INS_bics: + case INS_eon: + case INS_eor: + case INS_orn: + case INS_orr: + case INS_sub: + case INS_subs: + case INS_sbc: + case INS_sbcs: + case INS_asr: + case INS_lsl: + case INS_lsr: + case INS_ror: + case INS_cmp: + case INS_cmn: + case INS_tst: result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - // Floating point divide, F64 (Note: Worse for F32, F16) - case INS_sve_fdiv: - case INS_sve_fdivr: - result.insLatency = PERFSCORE_LATENCY_15C; // 7 to 15 - result.insThroughput = PERFSCORE_THROUGHPUT_14C; // 1/14 to 1/7 + case INS_asrv: + case INS_lslv: + case INS_lsrv: + case INS_rorv: + // variable shift by register + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - // Floating point multiply - case INS_sve_fmul: - case INS_sve_fmulx: - case INS_sve_fscale: - result.insLatency = PERFSCORE_LATENCY_3C; + case INS_crc32b: + case INS_crc32h: + case INS_crc32cb: + case INS_crc32ch: + case INS_crc32x: + case INS_crc32cx: result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case INS_crc32w: + case INS_crc32cw: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case INS_sve_famax: - case INS_sve_famin: - result.insLatency = PERFSCORE_LATENCY_20C; // TODO-SVE: Placeholder - result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder + case INS_smaddl: + case INS_smsubl: + case INS_smnegl: + case INS_umaddl: + case INS_umsubl: + case INS_umnegl: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_3C; break; default: @@ -20976,731 +14830,691 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision - case IF_SVE_HO_3B: - case IF_SVE_HO_3C: - case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; + // ALU, basic immediate + case IF_DI_1A: // cmp, cmn + case IF_DI_1C: // tst + case IF_DI_1D: // mov reg, imm(N,r,s) + case IF_DI_1E: // adr, adrp + case IF_DI_1F: // ccmp, ccmn + case IF_DI_2A: // add, adds, suv, subs + case IF_DI_2C: // and, ands, eor, orr - // Floating point round to integral, F64. (Note: Worse for F32 and F16) - case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value - result.insLatency = PERFSCORE_LATENCY_3C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations - switch (ins) - { - // Floating point reciprocal estimate, F64. (Note: Worse for F32 and F16) - case INS_sve_frecpx: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_1C; - break; - - // Floating point square root F64. (Note: Worse for F32 and F16) - case INS_sve_fsqrt: - result.insThroughput = PERFSCORE_THROUGHPUT_16C; - result.insLatency = PERFSCORE_LATENCY_14C; - break; + case IF_DR_2D: // cinc, cinv, cneg + case IF_DR_2E: // mov, neg, mvn, negs + case IF_DI_1B: // mov, movk, movn, movz - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point - result.insThroughput = PERFSCORE_THROUGHPUT_4X; - result.insLatency = PERFSCORE_LATENCY_6C; - break; + case IF_LARGEADR: // adrp + add + case IF_LARGELDC: // adrp + ldr - case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter) - result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_2C; break; - case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count - case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count - case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count - case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_7C; + // ALU, shift by immediate + case IF_DR_3B: // add, adds, and, ands, bic, bics, + // eon, eor, orn, orr, sub, subs + case IF_DR_2B: // cmp, cmn, tst + case IF_DR_2F: // neg, negs, mvn + case IF_DI_2B: // ror + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise - case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate - result.insThroughput = PERFSCORE_THROUGHPUT_1C; + // ALU, extend, scale + case IF_DR_3C: // add, adc, and, bic, eon, eor, orn, orr, sub, sbc + case IF_DR_2C: // cmp + case IF_DV_2U: // sha1h + result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_2C; break; + // ALU, Conditional select + case IF_DR_1D: // cset, csetm + case IF_DR_3D: // csel, csinc, csinv, csneg - case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter - case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter - case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars - result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element - case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow - case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + // ALU, Conditional compare + case IF_DR_2I: // ccmp , ccmn + + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts - switch (ins) + // Multiply accumulate + case IF_DR_4A: // madd, msub, smaddl, smsubl, umaddl, umsubl + if (id->idOpSize() == EA_4BYTE) { - case INS_sve_fcvtnt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - case INS_sve_fcvtn: - case INS_sve_bfcvtn: - case INS_sve_fcvtnb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + } + else + { + assert(id->idOpSize() == EA_8BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_5C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; } + + // Miscellaneous Data Preocessing instructions + case IF_DR_3E: // extr + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - // Not available in Arm Neoverse N2 Software Optimization Guide. - case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) - case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) - case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) - case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords) - result.insLatency = PERFSCORE_LATENCY_20C; // TODO-SVE: Placeholder - result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder + case IF_DR_2H: // sxtb, sxth, sxtw, uxtb, uxth, sha1h + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - // Not available in Arm Neoverse N2 Software Optimization Guide. - case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow - result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder - result.insLatency = PERFSCORE_LATENCY_20C; // TODO-SVE: Placeholder + case IF_DI_2D: // lsl, lsr, asr, sbfm, bfm, ubfm, sbfiz, bfi, ubfiz, sbfx, bfxil, ubfx + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow - case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) - case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) - case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product + case IF_DR_2G: // mov sp, cls, clz, rbit, rev16, rev32, rev + if (ins == INS_rbit) + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; + break; + } + + // + // Load/Store Instructions + // + + case IF_LS_1A: // ldr, ldrsw (literal, pc relative immediate) result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations - case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; + case IF_LS_2A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (no immediate) + // ldar, ldarb, ldarh, ldapr, ldaprb, ldaprh, ldxr, ldxrb, ldxrh, + // ldaxr, ldaxrb, ldaxrh, stlr, stlrb, stlrh - case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit - case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate - // pair) - case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit - // (predicate-as-counter) - case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; + // ToDo: store release have 2/4 cycle latency break; - case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue - case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) - case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) - case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) - case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated) - case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add - case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_LS_2B: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (scaled immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; - case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) - switch (ins) - { - case INS_sve_umin: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - } + case IF_LS_2C: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh + // ldur, ldurb, ldurh, ldursb, ldursh, ldursw, stur, sturb, sturh + result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; - case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) - case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) - case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) - case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) - case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) - case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_5C; - break; - - case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus - // immediate) - case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) - case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_LS_3A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb strh (register extend, scale 2,4,8) result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; break; - case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) - case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus - // immediate) - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; + case IF_LS_3B: // ldp, ldpsw, ldnp, stp, stnp (load/store pair zero offset) + case IF_LS_3C: // load/store pair with offset pre/post inc + if (memAccessKind == PERFSCORE_MEMORY_READ) + { + // ldp, ldpsw, ldnp + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + if (emitIGisInEpilog(emitCurIG) && (ins == INS_ldp)) + { + // Reduce latency for ldp instructions in the epilog + // + result.insLatency = PERFSCORE_LATENCY_2C; + } + else if (id->idOpSize() == EA_8BYTE) // X-form + { + // the X-reg variant has an extra cycle of latency + // and two cycle throughput + result.insLatency += 1.0; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + } + } + else // store instructions + { + // stp, stnp + assert(memAccessKind == PERFSCORE_MEMORY_WRITE); + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + } break; - case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus - // immediate) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; + case IF_LS_3D: // stxr, stxrb, stxrh, stlxr, stlxrb, srlxrh + // Store exclusive register, returning status + assert(emitInsIsStore(ins)); + // @ToDo - find out the actual latency + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = max(PERFSCORE_LATENCY_4C, result.insLatency); break; - case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus - // immediate) - switch (ins) + case IF_LS_3E: // ARMv8.1 LSE Atomics + if (memAccessKind == PERFSCORE_MEMORY_WRITE) { - case INS_sve_ld1rqb: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - case INS_sve_ld1rob: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld1rqh: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - case INS_sve_ld1roh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld1rqw: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - case INS_sve_ld1row: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld1rqd: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - case INS_sve_ld1rod: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + // staddb, staddlb, staddh, staddlh, stadd. staddl + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; } - break; - - case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus - // immediate) - switch (ins) + else { - case INS_sve_ld2q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld3q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld4q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + assert(memAccessKind == PERFSCORE_MEMORY_READ_WRITE); + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = max(PERFSCORE_LATENCY_3C, result.insLatency); } break; - case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + case IF_LS_2D: + case IF_LS_2E: + case IF_LS_3F: + // Load/Store multiple structures + // Load single structure and replicate switch (ins) { - case INS_sve_ld2b: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - case INS_sve_ld3b: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld4b: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld2h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - case INS_sve_ld3h: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld4h: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld2w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - case INS_sve_ld3w: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld4w: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld2d: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - case INS_sve_ld3d: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_10C; + case INS_ld1: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + } break; - case INS_sve_ld4d: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; + + case INS_ld1_2regs: + case INS_ld2: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_6C; + } break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + + case INS_ld1_3regs: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_5C; + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_6C; + result.insLatency = PERFSCORE_LATENCY_8C; + } break; - } - break; - case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus - // immediate) - switch (ins) - { - case INS_sve_st2q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_st3q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_st4q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + case INS_ld1_4regs: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_6C; + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_8C; + result.insLatency = PERFSCORE_LATENCY_10C; + } break; - } - break; - - case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long - case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus - // immediate) - case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow - switch (ins) - { - case INS_sve_sqshrunb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_sqshrunt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_sqrshrunb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_sqrshrunt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_shrnb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; + case INS_ld3: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + if (optGetElemsize(id->idInsOpt()) == EA_4BYTE) + { + // S + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_5C; + } + else + { + // B/H + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_6C; + } + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + if ((optGetElemsize(id->idInsOpt()) == EA_4BYTE) || + (optGetElemsize(id->idInsOpt()) == EA_8BYTE)) + { + // S/D + result.insThroughput = PERFSCORE_THROUGHPUT_6C; + result.insLatency = PERFSCORE_LATENCY_8C; + } + else + { + // B/H + result.insThroughput = PERFSCORE_THROUGHPUT_7C; + result.insLatency = PERFSCORE_LATENCY_9C; + } + } break; - case INS_sve_shrnt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; + + case INS_ld4: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + if (optGetElemsize(id->idInsOpt()) == EA_4BYTE) + { + // S + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_6C; + } + else + { + // B/H + result.insThroughput = PERFSCORE_THROUGHPUT_5C; + result.insLatency = PERFSCORE_LATENCY_7C; + } + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + if ((optGetElemsize(id->idInsOpt()) == EA_4BYTE) || + (optGetElemsize(id->idInsOpt()) == EA_8BYTE)) + { + // S/D + result.insThroughput = PERFSCORE_THROUGHPUT_8C; + result.insLatency = PERFSCORE_LATENCY_10C; + } + else + { + // B/H + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + } + } break; - case INS_sve_rshrnb: + + case INS_ld1r: result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_rshrnt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_ld2r: + if (id->idOpSize() == EA_8BYTE) + { + // D + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + } + else + { + // B/H/S + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + } break; - case INS_sve_sqshrnb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_ld3r: + if (id->idOpSize() == EA_8BYTE) + { + // D + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_5C; + } + else + { + // B/H/S + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + } break; - case INS_sve_sqshrnt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_ld4r: + if (id->idOpSize() == EA_8BYTE) + { + // D + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_6C; + } + else + { + // B/H/S + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + } break; - case INS_sve_sqrshrnb: + + case INS_st1: result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case INS_sve_sqrshrnt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_st1_2regs: + case INS_st2: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_1C; + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + } break; - case INS_sve_uqshrnb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_st1_3regs: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_3C; + } break; - case INS_sve_uqshrnt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_st1_4regs: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + } + else + { + // Q-form + assert(id->idOpSize() == EA_16BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_4C; + } break; - case INS_sve_uqrshrnb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_st3: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_uqrshrnt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_st4: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_3C; + } + else + { + assert(id->idOpSize() == EA_16BYTE); + if (optGetElemsize(id->idInsOpt()) == EA_8BYTE) + { + // D + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_4C; + } + else + { + // B/H/S + result.insThroughput = PERFSCORE_THROUGHPUT_5C; + result.insLatency = PERFSCORE_LATENCY_5C; + } + } break; + default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + unreached(); } break; - case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + case IF_LS_2F: + case IF_LS_2G: + case IF_LS_3G: + // Load/Store single structure switch (ins) { - case INS_sve_st2b: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_st3b: - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_7C; - break; - case INS_sve_st4b: - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_11C; - break; - case INS_sve_st2h: + case INS_ld1: result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_st3h: - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_7C; - break; - case INS_sve_st4h: - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_11C; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_st2w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + + case INS_ld2: + if (id->idOpSize() == EA_8BYTE) + { + // D + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + } + else + { + // B/H/S + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + } break; - case INS_sve_st3w: - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_7C; + + case INS_ld3: + if (id->idOpSize() == EA_8BYTE) + { + // D + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_5C; + } + else + { + // B/H/S + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + } break; - case INS_sve_st4w: - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_11C; + + case INS_ld4: + if (id->idOpSize() == EA_8BYTE) + { + // D + result.insThroughput = PERFSCORE_THROUGHPUT_4C; + result.insLatency = PERFSCORE_LATENCY_6C; + } + else + { + // B/H/S + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + } break; - case INS_sve_st2d: + + case INS_st1: result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case INS_sve_st3d: - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_7C; + + case INS_st2: + if (id->idOpSize() == EA_8BYTE) + { + // D + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + } + else + { + // B/H/S + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_1C; + } break; - case INS_sve_st4d: - result.insThroughput = PERFSCORE_THROUGHPUT_9C; - result.insLatency = PERFSCORE_LATENCY_11C; + + case INS_st3: + case INS_st4: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; break; + default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + unreached(); } break; - case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled - // offsets) - case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit - // unscaled offsets) - case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_SN_0A: // nop, yield, align + + if (id->idIns() == INS_align) + { + if ((id->idInsOpt() == INS_OPTS_NONE) || ((instrDescAlign*)id)->isPlacedAfterJmp) + { + // Either we're not going to generate 'align' instruction, or the 'align' + // instruction is placed immediately after unconditional jmp. + // In both cases, don't count for PerfScore. + + result.insThroughput = PERFSCORE_THROUGHPUT_ZERO; + result.insLatency = PERFSCORE_LATENCY_ZERO; + break; + } + } + else if (ins == INS_yield) + { + // @ToDo - find out the actual latency, match x86/x64 for now + result.insThroughput = PERFSCORE_THROUGHPUT_140C; + result.insLatency = PERFSCORE_LATENCY_140C; + } + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_ZERO; break; - case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled - // offsets) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; + case IF_SI_0B: // dmb, dsb, isb + // @ToDo - find out the actual latency + result.insThroughput = PERFSCORE_THROUGHPUT_10C; + result.insLatency = PERFSCORE_LATENCY_10C; break; - case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus - // scalar) - case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus - // scalar) + case IF_DV_2J: // fcvt Vd Vn result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; + result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) - case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus - // scalar) - case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) - case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) - case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_DV_2K: // fcmp Vd Vn result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) + case IF_DV_1A: // fmov - immediate (scalar) result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) - switch (ins) - { - case INS_sve_ld1rqb: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - case INS_sve_ld1rob: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld1rqh: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - case INS_sve_ld1roh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld1rqw: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - case INS_sve_ld1row: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld1rqd: - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - case INS_sve_ld1rod: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } + case IF_DV_1B: // fmov, orr, bic, movi, mvni (immediate vector) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus - // scalar) - switch (ins) - { - case INS_sve_ld2q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld3q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_ld4q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } + case IF_DV_1C: // fcmp vn, #0.0 + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) + case IF_DV_2A: // fabs, fneg, fsqrt, fcvtXX, frintX, scvtf, ucvtf, fcmXX (vector) switch (ins) { - case INS_sve_ld2b: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - case INS_sve_ld3b: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld4b: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld2h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - case INS_sve_ld3h: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld4h: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld2w: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - case INS_sve_ld3w: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld4w: + case INS_fabs: + case INS_fneg: result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case INS_sve_ld2d: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; + result.insLatency = (id->idOpSize() == EA_8BYTE) ? PERFSCORE_LATENCY_2C : PERFSCORE_LATENCY_3C / 2; break; - case INS_sve_ld3d: - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; + + case INS_fsqrt: + if ((id->idInsOpt() == INS_OPTS_2S) || (id->idInsOpt() == INS_OPTS_4S)) + { + // S-form + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_11C; + } + else + { + // D-form + assert(id->idInsOpt() == INS_OPTS_2D); + result.insThroughput = PERFSCORE_THROUGHPUT_6C; + result.insLatency = PERFSCORE_LATENCY_18C; + } break; - case INS_sve_ld4d: + + case INS_fcvtas: + case INS_fcvtau: + case INS_fcvtms: + case INS_fcvtmu: + case INS_fcvtns: + case INS_fcvtnu: + case INS_fcvtps: + case INS_fcvtpu: + case INS_fcvtzs: + case INS_fcvtzu: + case INS_frinta: + case INS_frinti: + case INS_frintm: + case INS_frintn: + case INS_frintp: + case INS_frintx: + case INS_frintz: + case INS_scvtf: + case INS_ucvtf: result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + result.insLatency = PERFSCORE_LATENCY_4C; break; - } - break; - case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked - // scaled offsets) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; - break; - - case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) - switch (ins) - { - case INS_sve_ld1q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + case INS_fcmeq: + case INS_fcmge: + case INS_fcmgt: + case INS_fcmle: + case INS_fcmlt: + case INS_frecpe: + case INS_frsqrte: + case INS_urecpe: + case INS_ursqrte: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - } - break; - - case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus - // scalar) - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_10C; - break; - case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) - switch (ins) - { - case INS_sve_st1q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_fcvtl: + case INS_fcvtl2: + case INS_fcvtn: + case INS_fcvtn2: + case INS_fcvtxn: + case INS_fcvtxn2: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); @@ -21708,94 +15522,79 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus - // scalar) - case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus - // scalar) - case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus - // scalar) - case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus - // scalar) - case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - - case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) + case IF_DV_2G: // fmov, fabs, fneg, fsqrt, fcmXX, fcvtXX, frintX, scvtf, ucvtf (scalar) switch (ins) { - case INS_sve_st2b: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - case INS_sve_st3b: + case INS_fmov: + // FP move, vector register result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_7C; - break; - case INS_sve_st4b: - result.insThroughput = PERFSCORE_THROUGHPUT_9X; - result.insLatency = PERFSCORE_LATENCY_11C; - break; - case INS_sve_st2h: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case INS_sve_st3h: + + case INS_fabs: + case INS_fneg: + + case INS_fcvtas: + case INS_fcvtau: + case INS_fcvtms: + case INS_fcvtmu: + case INS_fcvtns: + case INS_fcvtnu: + case INS_fcvtps: + case INS_fcvtpu: + case INS_fcvtzs: + case INS_fcvtzu: + case INS_scvtf: + case INS_ucvtf: + + case INS_frinta: + case INS_frinti: + case INS_frintm: + case INS_frintn: + case INS_frintp: + case INS_frintx: + case INS_frintz: result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_7C; - break; - case INS_sve_st4h: - result.insThroughput = PERFSCORE_THROUGHPUT_9X; - result.insLatency = PERFSCORE_LATENCY_11C; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_st2w: + + case INS_fcvtxn: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_4C; break; - case INS_sve_st3w: + + case INS_fcmeq: + case INS_fcmge: + case INS_fcmgt: + case INS_fcmle: + case INS_fcmlt: result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_7C; - break; - case INS_sve_st4w: - result.insThroughput = PERFSCORE_THROUGHPUT_9X; - result.insLatency = PERFSCORE_LATENCY_11C; - break; - case INS_sve_st2d: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; + result.insLatency = PERFSCORE_LATENCY_2C; break; - case INS_sve_st3d: + + case INS_frecpe: + case INS_frecpx: + case INS_frsqrte: result.insThroughput = PERFSCORE_THROUGHPUT_2X; - result.insLatency = PERFSCORE_LATENCY_7C; - break; - case INS_sve_st4d: - result.insThroughput = PERFSCORE_THROUGHPUT_9X; - result.insLatency = PERFSCORE_LATENCY_11C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + result.insLatency = PERFSCORE_LATENCY_4C; break; - } - break; - case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus - // scalar) - switch (ins) - { - case INS_sve_st2q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_st3q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_st4q: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_fsqrt: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_19C; + result.insLatency = PERFSCORE_LATENCY_22C; + } + else + { + // S-form + assert(id->idOpSize() == EA_4BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_12C; + } break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); @@ -21803,73 +15602,90 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled - // offsets) - case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled - // offsets) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_2Q: // faddp, fmaxnmp, fmaxp, fminnmp, fminp (scalar) + case IF_DV_2R: // fmaxnmv, fmaxv, fminnmv, fminv + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) - case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product - result.insThroughput = PERFSCORE_THROUGHPUT_2C; + case IF_DV_2S: // addp (scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_3C; break; - case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed) - case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) - case IF_SVE_HD_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_5C; - break; - - case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - - case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate - // (predicated) + case IF_DV_3B: // fadd, fsub, fdiv, fmul, fmulx, fmla, fmls, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX + // faddp, fmaxnmp, fmaxp, fminnmp, fminp, addp (vector) switch (ins) { - case INS_sve_fmul: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_3C; + case INS_fmin: + case INS_fminnm: + case INS_fmax: + case INS_fmaxnm: + case INS_fabd: + case INS_fadd: + case INS_fsub: + case INS_fmul: + case INS_fmulx: + case INS_fmla: + case INS_fmls: + case INS_frecps: + case INS_frsqrts: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; break; - default: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; + case INS_faddp: + case INS_fmaxnmp: + case INS_fmaxp: + case INS_fminnmp: + case INS_fminp: + if (id->idOpSize() == EA_16BYTE) + { + // Q-form + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; + } break; - } - break; - - case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; - - case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend - switch (ins) - { - case INS_sve_bfmla: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_facge: + case INS_facgt: + case INS_fcmeq: + case INS_fcmge: + case INS_fcmgt: + case INS_fcmle: + case INS_fcmlt: + if (id->idOpSize() == EA_16BYTE) + { + // Q-form + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + } break; - case INS_sve_bfmls: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_fdiv: + if ((id->idInsOpt() == INS_OPTS_2S) || (id->idInsOpt() == INS_OPTS_4S)) + { + // S-form + result.insThroughput = PERFSCORE_THROUGHPUT_10C; + result.insLatency = PERFSCORE_LATENCY_13C; + } + else + { + // D-form + assert(id->idInsOpt() == INS_OPTS_2D); + result.insThroughput = PERFSCORE_THROUGHPUT_10C; + result.insLatency = PERFSCORE_LATENCY_22C; + } break; default: @@ -21879,75 +15695,91 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing - // multiplicand - case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend - result.insThroughput = PERFSCORE_THROUGHPUT_2C; + case IF_DV_3AI: // mul, mla, mls (vector by element) + case IF_DV_3BI: // fmul, fmulx, fmla, fmls (vector by element) + case IF_DV_3EI: // sqdmlal, sqdmlsl, sqdmulh, sqdmull (scalar by element) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register - case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - - case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register - case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; + case IF_DV_4A: // fmadd, fmsub, fnmadd, fnsub (scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit - // element size - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; + case IF_DV_3D: // fadd, fsub, fdiv, fmul, fmulx, fmin, fminnm, fmax, fmaxnm, fabd, fcmXX (scalar) + switch (ins) + { + case INS_fadd: + case INS_fsub: + case INS_fabd: + case INS_fmax: + case INS_fmaxnm: + case INS_fmin: + case INS_fminnm: + case INS_fmul: + case INS_fmulx: + case INS_fnmul: + case INS_frecps: + case INS_frsqrts: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; + break; - case IF_SVE_GH_3B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; + case INS_facge: + case INS_facgt: + case INS_fcmeq: + case INS_fcmge: + case INS_fcmgt: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; - case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; + case INS_fdiv: + if (id->idOpSize() == EA_8BYTE) + { + // D-form + result.insThroughput = PERFSCORE_THROUGHPUT_6C; + result.insLatency = PERFSCORE_LATENCY_15C; + } + else + { + // S-form + assert(id->idOpSize() == EA_4BYTE); + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + } + break; - case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit - // element size - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } break; - case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit - // element size - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case IF_DV_2H: // fmov, fcvtXX - to general + // fmov : FP transfer to general register + // fcvtaXX : FP convert from vector to general + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) + case IF_DV_2I: // fmov, Xcvtf - from general switch (ins) { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_fmov: + // FP transfer from general register + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_scvtf: + case INS_ucvtf: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_5C; break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); @@ -21955,131 +15787,260 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit - // scaled offsets) + case IF_DV_3C: // mov,and, bic, eor, mov,mvn, orn, bsl, bit, bif, + // tbl, tbx (vector) switch (ins) { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_tbl: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_tbl_2regs: + result.insThroughput = PERFSCORE_THROUGHPUT_3X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_tbl_3regs: + result.insThroughput = PERFSCORE_THROUGHPUT_4X; + result.insLatency = PERFSCORE_LATENCY_3C; break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + case INS_tbl_4regs: + result.insThroughput = PERFSCORE_THROUGHPUT_3X; + result.insLatency = PERFSCORE_LATENCY_4C; break; - } - break; - - case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled - // offsets) - switch (ins) - { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_tbx: + result.insThroughput = PERFSCORE_THROUGHPUT_3X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_tbx_2regs: + result.insThroughput = PERFSCORE_THROUGHPUT_4X; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_tbx_3regs: + result.insThroughput = PERFSCORE_THROUGHPUT_5X; + result.insLatency = PERFSCORE_LATENCY_4C; break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_tbx_4regs: + result.insThroughput = PERFSCORE_THROUGHPUT_6X; + result.insLatency = PERFSCORE_LATENCY_5C; break; default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + // All other instructions + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; } break; - case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) - switch (ins) + case IF_DV_2E: // mov, dup (scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_DV_2F: // mov, ins (element) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_DV_2B: // smov, umov - to general) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_DV_2C: // mov, dup, ins - from general) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + if (ins == INS_dup) { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; + result.insLatency = PERFSCORE_LATENCY_3C; + } + else + { + assert((ins == INS_ins) || (ins == INS_mov)); + result.insLatency = PERFSCORE_LATENCY_2C; } break; - case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + case IF_DV_2D: // dup (dvector) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_DV_3A: // (vector) + // add, sub, mul, mla, mls, cmeq, cmge, cmgt, cmhi, cmhs, ctst, + // pmul, saba, uaba, sabd, uabd, umin, uminp, umax, umaxp, smin, sminp, smax, smaxp switch (ins) { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_add: + case INS_sub: + case INS_cmeq: + case INS_cmge: + case INS_cmgt: + case INS_cmhi: + case INS_cmhs: + case INS_shadd: + case INS_shsub: + case INS_srhadd: + case INS_srshl: + case INS_sshl: + case INS_smax: + case INS_smaxp: + case INS_smin: + case INS_sminp: + case INS_umax: + case INS_umaxp: + case INS_umin: + case INS_uminp: + case INS_uhadd: + case INS_uhsub: + case INS_urhadd: + case INS_urshl: + case INS_ushl: + case INS_uzp1: + case INS_uzp2: + case INS_zip1: + case INS_zip2: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_trn1: + case INS_trn2: + if (id->idInsOpt() == INS_OPTS_2D) + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + } + + result.insLatency = PERFSCORE_LATENCY_2C; break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_addp: + case INS_cmtst: + case INS_pmul: + case INS_sabd: + case INS_sqadd: + case INS_sqsub: + case INS_uabd: + case INS_uqadd: + case INS_uqsub: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_mla: + case INS_mls: + case INS_mul: + case INS_sqdmulh: + case INS_sqrdmulh: + case INS_sqrshl: + case INS_sqshl: + case INS_uqrshl: + case INS_uqshl: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + + case INS_saba: + case INS_uaba: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; break; - } - break; - case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) - switch (ins) - { - case INS_sve_prfb: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_sdot: + case INS_udot: + result.insLatency = PERFSCORE_LATENCY_4C; + if (id->idOpSize() == EA_16BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + } + break; + + case INS_addhn: + case INS_addhn2: + case INS_sabdl: + case INS_sabdl2: + case INS_saddl2: + case INS_saddl: + case INS_saddw: + case INS_saddw2: + case INS_ssubl: + case INS_ssubl2: + case INS_ssubw: + case INS_ssubw2: + case INS_subhn: + case INS_subhn2: + case INS_uabdl: + case INS_uabdl2: + case INS_uaddl: + case INS_uaddl2: + case INS_uaddw: + case INS_uaddw2: + case INS_usubl: + case INS_usubl2: + case INS_usubw: + case INS_usubw2: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_prfh: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_raddhn: + case INS_raddhn2: + case INS_rsubhn: + case INS_rsubhn2: + case INS_sabal: + case INS_sabal2: + case INS_uabal: + case INS_uabal2: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; break; - case INS_sve_prfw: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_smlal: + case INS_smlal2: + case INS_smlsl: + case INS_smlsl2: + case INS_smull: + case INS_smull2: + case INS_sqdmlal: + case INS_sqdmlal2: + case INS_sqdmlsl: + case INS_sqdmlsl2: + case INS_sqdmull: + case INS_sqdmull2: + case INS_sqrdmlah: + case INS_sqrdmlsh: + case INS_umlal: + case INS_umlal2: + case INS_umlsl: + case INS_umlsl2: + case INS_umull: + case INS_umull2: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; break; - case INS_sve_prfd: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_pmull: + case INS_pmull2: + if ((id->idInsOpt() == INS_OPTS_8B) || (id->idInsOpt() == INS_OPTS_16B)) + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + } + else + { + // Crypto polynomial (64x64) multiply long + assert((id->idInsOpt() == INS_OPTS_1D) || (id->idInsOpt() == INS_OPTS_2D)); + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + } break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); @@ -22087,67 +16048,99 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) - case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + case IF_DV_3DI: // fmul, fmulx, fmla, fmls (scalar by element) result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_9C; + result.insLatency = PERFSCORE_LATENCY_4C; break; - case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) - case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) - result.insThroughput = PERFSCORE_THROUGHPUT_1C; + case IF_DV_3E: // add, sub, cmeq, cmge, cmgt, cmhi, cmhs, ctst, (scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_2C; break; - case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element - result.insThroughput = PERFSCORE_THROUGHPUT_3C; - result.insLatency = PERFSCORE_LATENCY_6C; - break; - - case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated) - result.insThroughput = PERFSCORE_THROUGHPUT_2C; + case IF_DV_3G: // ext + result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_2C; break; - case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts + case IF_DV_2L: // abs, neg, cmeq, cmge, cmgt, cmle, cmlt (scalar) + case IF_DV_2M: // (vector) + // abs, neg, mvn, not, cmeq, cmge, cmgt, cmle, cmlt, + // addv, saddlv, uaddlv, smaxv, sminv, umaxv, uminv + // cls, clz, cnt, rbit, rev16, rev32, rev64, + // xtn, xtn2, shll, shll2 switch (ins) { - case INS_sve_f1cvt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_f2cvt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; - case INS_sve_bf1cvt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + case INS_abs: + case INS_sqneg: + case INS_suqadd: + case INS_usqadd: + if (id->idOpSize() == EA_16BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + } + + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_bf2cvt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_addv: + case INS_saddlv: + case INS_uaddlv: + case INS_cls: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; break; - case INS_sve_f1cvtlt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_sminv: + case INS_smaxv: + case INS_uminv: + case INS_umaxv: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; break; - case INS_sve_f2cvtlt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_cmeq: + case INS_cmge: + case INS_cmgt: + case INS_cmle: + case INS_cmlt: + + case INS_clz: + case INS_cnt: + case INS_rbit: + case INS_rev16: + case INS_rev32: + case INS_rev64: + case INS_xtn: + case INS_xtn2: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; break; - case INS_sve_bf1cvtlt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_mvn: + case INS_not: + case INS_neg: + case INS_shll: + case INS_shll2: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case INS_sve_bf2cvtlt: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + + case INS_sqabs: + case INS_sqxtn: + case INS_sqxtn2: + case INS_sqxtun: + case INS_sqxtun2: + case INS_uqxtn: + case INS_uqxtn2: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); @@ -22155,22 +16148,89 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - - case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register + case IF_DV_2N: // sshr, ssra, srshr, srsra, shl, ushr, usra, urshr, ursra, sri, sli (shift by immediate - + // scalar) + case IF_DV_2O: // sshr, ssra, srshr, srsra, shl, ushr, usra, urshr, ursra, sri, sli (shift by immediate - + // vector) + // sshll, sshll2, ushll, ushll2, shrn, shrn2, rshrn, rshrn2, sxrl, sxl2, uxtl, uxtl2 switch (ins) { - case INS_sve_mov: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; + case INS_shl: + case INS_shrn: + case INS_shrn2: + case INS_sli: + case INS_sri: + case INS_sshr: + case INS_ushr: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_2C; break; - case INS_sve_dup: + + case INS_shll: + case INS_shll2: + case INS_sshll: + case INS_sshll2: + case INS_ushll: + case INS_ushll2: + case INS_sxtl: + case INS_sxtl2: + case INS_uxtl: + case INS_uxtl2: result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case INS_rshrn: + case INS_rshrn2: + case INS_srshr: + case INS_sqshrn: + case INS_sqshrn2: + case INS_ssra: + case INS_urshr: + case INS_uqshrn: + case INS_uqshrn2: + case INS_usra: + if (id->idOpSize() == EA_16BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_3C; + } + break; + + case INS_srsra: + case INS_ursra: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case INS_sqrshrn: + case INS_sqrshrn2: + case INS_sqrshrun: + case INS_sqrshrun2: + case INS_sqshrun: + case INS_sqshrun2: + case INS_sqshl: + case INS_sqshlu: + case INS_uqrshrn: + case INS_uqrshrn2: + case INS_uqshl: + if (id->idOpSize() == EA_16BYTE) + { + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + } + else + { + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_4C; + } break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); @@ -22178,13 +16238,34 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements + case IF_DV_2P: // aese, aesd, aesmc, aesimc, sha1su1, sha256su0 + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_DV_3F: // sha1c, sha1m, sha1p, sha1su0, sha256h, sha256h2, sha256su1 (vector) switch (ins) { - case INS_sve_rev: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; + case INS_sha1su0: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_2C; break; + + case INS_sha256su0: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case INS_sha1c: + case INS_sha1m: + case INS_sha1p: + case INS_sha256h: + case INS_sha256h2: + case INS_sha256su1: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); @@ -22192,40 +16273,58 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; - case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - - case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated) + case IF_SI_0A: // brk imm16 result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated) - case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert + case IF_SR_1A: result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_2C; + result.insLatency = PERFSCORE_LATENCY_1C; break; - case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_4C; - break; + case IF_DV_2T: // addv, saddlv, smaxv, sminv, uaddlv, umaxv, uminv + switch (ins) + { + case INS_addv: + case INS_saddlv: + case INS_uaddlv: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; - case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix - break; + case INS_smaxv: + case INS_sminv: + case INS_umaxv: + case INS_uminv: + case INS_sha256h2: + case INS_sha256su1: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case INS_sadalp: + case INS_uadalp: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case INS_saddlp: + case INS_uaddlp: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_3C; + break; - case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq - result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix - result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } break; default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); + // fallback to SVE instructions + getInsSveExecutionCharacteristics(id, result); break; } diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 3eb45289dd772f..62624fe50d68ef 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -33,8 +33,17 @@ const char* emitSveRegName(regNumber reg) const; const char* emitVectorRegName(regNumber reg); const char* emitPredicateRegName(regNumber reg, PredicateType ptype); +#ifdef DEBUG +void emitInsSveSanityCheck(instrDesc* id); +#endif // DEBUG + +#if defined(DEBUG) || defined(LATE_DISASM) +void getInsSveExecutionCharacteristics(instrDesc* id, insExecutionCharacteristics& result); +#endif // defined(DEBUG) || defined(LATE_DISASM) + void emitDispInsHelp( instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig); +void emitDispInsSveHelp(instrDesc* id); void emitDispLargeJmp( instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig); void emitDispComma(); diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 68826a1bfe5594..8a0894426cfb53 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -12522,4 +12522,5879 @@ void emitter::emitDispSvePrfop(insSvePrfop prfop, bool addComma) return; } +#ifdef DEBUG +/***************************************************************************** + * + * The following is called for each recorded SVE instruction -- use for debugging. + */ +void emitter::emitInsSveSanityCheck(instrDesc* id) +{ + switch (id->idInsFmt()) + { + ssize_t imm; + + case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // NNNN + break; + + // Scalable. + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable, .S or .D. + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements + assert(insOptsScalableWords(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable, Merge or Zero predicate. + case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // nnnnn + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // ddddd + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable, with shift immediate. + case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isValidVectorShiftAmount(emitGetInsSC(id), optGetSveElemsize(id->idInsOpt()), true)); + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable Wide. + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + assert(insOptsScalableWide(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable to/from SIMD scalar. + case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) + case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector + // (predicated) + case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(id->idOpSize())); + break; + + // Scalable to FP SIMD scalar. + case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + assert(insOptsScalableFloat(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsizeSveFloat(id->idOpSize())); + break; + + // Scalable to general register. + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isGeneralRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidScalarDatasize(id->idOpSize())); + break; + + // Scalable, 4 regs (location of reg3 and reg4 can switch) + case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend + // (predicated) + case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand + // (predicated) + case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector) + case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); + assert(isVectorRegister(id->idReg4())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable, unpredicated + case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high + // (unpredicated) + case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) + case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + case IF_SVE_BR_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments + case IF_SVE_BZ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) + case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) + case IF_SVE_CA_3A: // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads + case IF_SVE_EH_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated) + case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long + case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high + case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long + case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long + case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp + case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords) + case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long + case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide + case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved + case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute + case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long + case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate + case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long + case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part + case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment) + case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp + case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable, no predicates. General purpose source registers + case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register + // increment) + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg3())); // mmmmm + assert(isValidScalarDatasize(id->idOpSize())); + break; + + case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation + assert(id->idInsOpt() == INS_OPTS_SCALABLE_S || id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidUimm<2>(emitGetInsSC(id))); // hh + break; + + case IF_SVE_BH_3B: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation + case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D_SXTW || id->idInsOpt() == INS_OPTS_SCALABLE_D_UXTW); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidUimm<2>(emitGetInsSC(id))); // hh + break; + + case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count + assert(id->idInsOpt() == INS_OPTS_NONE); + assert(isGeneralRegister(id->idReg1())); + assert(id->idOpSize() == EA_8BYTE); + assert(isValidUimmFrom1<4>(emitGetInsSC(id))); + break; + + case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isValidUimmFrom1<4>(emitGetInsSC(id))); + break; + + case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated) + case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate + imm = emitGetInsSC(id); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidImmNRS(imm, optGetSveElemsize(id->idInsOpt()))); + break; + + case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count + assert(id->idInsOpt() == INS_OPTS_NONE); + assert(isGeneralRegister(id->idReg1())); + assert(isValidGeneralDatasize(id->idOpSize())); + assert(isValidUimmFrom1<4>(emitGetInsSC(id))); + break; + + case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isValidUimm<8>(emitGetInsSC(id))); // iiiii iii + break; + + case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) + { + imm = emitGetInsSC(id); + floatImm8 fpImm; + fpImm.immFPIVal = (unsigned)imm; + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm<8>((ssize_t)emitDecodeFloatImm8(fpImm))); // iiiiiiii + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + } + + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + imm = emitGetInsSC(id); + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + assert(isValidSimm<8>(imm)); // iiiiiiii + break; + + case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated) + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // gggg + break; + + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isVectorRegister(id->idReg2())); // nnnnn + break; + + case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isValidUimm<3>(emitGetInsSC(id))); + break; + + case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isValidUimm<1>(emitGetInsSC(id))); // i + break; + + case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isValidUimm<3>(emitGetInsSC(id))); // ii + break; + + case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // NNNN + break; + + case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // NNNN + assert(isValidUimm<3>(emitGetInsSC(id))); + break; + + case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // NNNN + assert(isValidUimm<1>(emitGetInsSC(id))); // i + break; + + case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // NNNN + assert(isValidUimm<2>(emitGetInsSC(id))); // ii + break; + + case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // mmmmm + break; + + case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isGeneralRegisterOrZR(id->idReg2())); // mmmmm + break; + + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // NNNN + assert(isPredicateRegister(id->idReg3())); // MMMM + break; + + case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements + assert(isScalableVectorSize(id->idOpSize())); + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // NNNN + break; + + case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + break; + + // Scalable, 4 regs, to predicate register. + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + assert(isScalableVectorSize(id->idOpSize())); + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + break; + + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + assert(isScalableVectorSize(id->idOpSize())); + assert(insOptsScalableWide(id->idInsOpt())); // xx + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + break; + + case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate + assert(isScalableVectorSize(id->idOpSize())); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isValidSimm<5>(emitGetInsSC(id))); // iiiii + break; + + case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate + assert(isScalableVectorSize(id->idOpSize())); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isValidUimm<7>(emitGetInsSC(id))); // iiiii + break; + + case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments + case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate + case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) + case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product + case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product + case IF_SVE_GJ_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations + case IF_SVE_GN_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long + case IF_SVE_GO_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long + case IF_SVE_GW_3B: // ...........mmmmm ......nnnnnddddd -- SVE FP clamp + case IF_SVE_HA_3A: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + case IF_SVE_HB_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long + case IF_SVE_HD_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate + case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate + case IF_SVE_HK_3B: // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) + case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations + assert(insOptsScalable(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm + assert(isVectorRegister(id->idReg3())); // mmmmm/aaaaa + break; + + case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) + case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) + assert(insOptsNone(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn/aaaaa + assert(isVectorRegister(id->idReg3())); // mmmmm + break; + + case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) + case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7)); + assert(isValidUimm<2>(emitGetInsSC(id))); // ii + break; + + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7)); + assert(isValidUimm<3>(emitGetInsSC(id))); // iii + break; + + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isLowVectorRegister(id->idReg3())); // mmmm + assert(isValidUimm<2>(emitGetInsSC(id))); // ii + break; + + case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isLowVectorRegister(id->idReg3())); // mmmm + assert(isValidUimm<1>(emitGetInsSC(id))); // i + break; + + case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isPredicateRegister(id->idReg3())); // NNNN + + switch (id->idIns()) + { + case INS_sve_and: + case INS_sve_ands: + case INS_sve_bic: + case INS_sve_bics: + case INS_sve_eor: + case INS_sve_eors: + case INS_sve_nand: + case INS_sve_nands: + case INS_sve_nor: + case INS_sve_nors: + case INS_sve_orn: + case INS_sve_orns: + case INS_sve_orr: + case INS_sve_orrs: + case INS_sve_sel: + assert(isPredicateRegister(id->idReg4())); // MMMM + break; + + case INS_sve_mov: + case INS_sve_movs: + case INS_sve_not: + case INS_sve_nots: + // no fourth register + break; + + default: + unreached(); + break; + } + break; + + case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // NNNN + break; + + case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition + case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition + case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition + assert(isScalableVectorSize(id->idOpSize())); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isPredicateRegister(id->idReg3())); // NNNN + break; + + case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition + assert(isScalableVectorSize(id->idOpSize())); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isPredicateRegister(id->idReg3())); // NNNN + assert(isPredicateRegister(id->idReg4())); // MMMM + break; + + case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active + case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // gggg + break; + + case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize + assert(isScalableVectorSize(id->idOpSize())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(insOptsScalableStandard(id->idInsOpt())); // xx + break; + + case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active + case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test + assert(isScalableVectorSize(id->idOpSize())); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // gggg + break; + + case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated) + case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero + assert(isScalableVectorSize(id->idOpSize())); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isPredicateRegister(id->idReg1())); // DDDD + break; + + case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count + assert(id->idOpSize() == EA_8BYTE); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isGeneralRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // gggg + assert(isPredicateRegister(id->idReg3())); // NNNN + break; + + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + assert(isScalableVectorSize(id->idOpSize())); + assert(insOptsScalableAtMaxHalf(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + break; + + case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements + switch (id->idIns()) + { + case INS_sve_fcvtnt: + case INS_sve_fcvtlt: + assert(insOptsConvertFloatStepwise(id->idInsOpt())); + FALLTHROUGH; + case INS_sve_fcvtxnt: + case INS_sve_bfcvtnt: + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + break; + default: + assert(!"unreachable"); + break; + } + break; + + case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision + assert(id->idInsOpt() == INS_OPTS_S_TO_H); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + break; + + case IF_SVE_HO_3B: + assert(insOptsConvertFloatToFloat(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + break; + + case IF_SVE_HO_3C: + assert(id->idInsOpt() == INS_OPTS_D_TO_S); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + break; + + case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer + assert(insOptsScalableFloat(id->idInsOpt()) || id->idInsOpt() == INS_OPTS_H_TO_S || + id->idInsOpt() == INS_OPTS_H_TO_D || id->idInsOpt() == INS_OPTS_S_TO_D || + id->idInsOpt() == INS_OPTS_D_TO_S); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + break; + + case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point + assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || id->idInsOpt() == INS_OPTS_S_TO_H || + id->idInsOpt() == INS_OPTS_S_TO_D || id->idInsOpt() == INS_OPTS_D_TO_H || + id->idInsOpt() == INS_OPTS_D_TO_S); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + break; + + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors + assert(isScalableVectorSize(id->idOpSize())); + assert(insOptsScalableFloat(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + break; + + // Scalable FP. + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations + assert(insOptsScalableFloat(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable to Simd Vector. + case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) + case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) + case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(id->idOpSize() == EA_8BYTE); + break; + + // Scalable FP to Simd Vector. + case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords) + assert(insOptsScalableFloat(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(id->idOpSize() == EA_8BYTE); + break; + + // Scalable, widening to scalar SIMD. + case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) + assert(insOptsScalableWide(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsizeWidening(id->idOpSize())); + break; + + // Scalable, possibly FP. + case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) + switch (id->idIns()) + { + case INS_sve_fabs: + case INS_sve_fneg: + assert(insOptsScalableFloat(id->idInsOpt())); // xx + break; + + default: + assert(insOptsScalableStandard(id->idInsOpt())); // xx + break; + } + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable, various sizes. + case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated) + case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements + switch (id->idIns()) + { + case INS_sve_abs: + case INS_sve_neg: + case INS_sve_rbit: + assert(insOptsScalableStandard(id->idInsOpt())); + break; + + case INS_sve_sxtb: + case INS_sve_uxtb: + case INS_sve_revb: + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + break; + + case INS_sve_sxth: + case INS_sve_uxth: + case INS_sve_revh: + assert(insOptsScalableWords(id->idInsOpt())); + break; + + default: + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + break; + } + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive) + case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive) + assert(isScalableVectorSize(id->idOpSize())); // xx + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // VVV + assert(isVectorRegister(id->idReg3())); // nnnnn + break; + + case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated) + assert(isScalableVectorSize(id->idOpSize())); // xx + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // VVVV + assert(isVectorRegister(id->idReg3())); // nnnnn + if (id->idIns() == INS_sve_sel) + { + assert(isVectorRegister(id->idReg4())); // mmmmm + } + break; + + // Scalable from general scalar (possibly SP) + case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated) + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegisterOrZR(id->idReg3())); // mmmmm + assert(isValidScalarDatasize(id->idOpSize())); + break; + + // Scalable, .H, .S or .D + case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long + case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + // Scalable, possibly fixed to .S + case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated) + switch (id->idIns()) + { + case INS_sve_sqabs: + case INS_sve_sqneg: + assert(insOptsScalableStandard(id->idInsOpt())); + break; + + default: + assert(id->idInsOpt() == INS_OPTS_SCALABLE_S); + break; + } + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow + assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); + assert(isVectorRegister(id->idReg1())); // nnnn + assert(isVectorRegister(id->idReg2())); // ddddd + assert(isEvenRegister(id->idReg2())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter) + assert(id->idOpSize() == EA_8BYTE); + + FALLTHROUGH; + case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count + case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + assert(isGeneralRegister(id->idReg1())); // ddddd + assert(isPredicateRegister(id->idReg2())); // MMMM + assert(isValidGeneralDatasize(id->idOpSize())); + break; + + case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count + case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); // xx + assert(isPredicateRegister(id->idReg1())); // MMMM + assert(isVectorRegister(id->idReg2())); // ddddd + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise + break; + + case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isPredicateRegister(id->idReg1())); // NNNN + break; + + case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars + assert(insOptsNone(id->idInsOpt())); + assert(isGeneralRegister(id->idReg1())); // nnnnn + assert(isGeneralRegister(id->idReg2())); // mmmmm + assert(isValidGeneralDatasize(id->idOpSize())); // x + break; + + case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow + assert(insOptsNone(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnn + assert(isEvenRegister(id->idReg2())); + break; + + case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts + assert(insOptsNone(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnn + assert(isEvenRegister(id->idReg2())); + break; + + case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // nnnnn + assert(isVectorRegister(id->idReg2())); // ddddd + assert(optGetSveElemsize(id->idInsOpt()) != EA_8BYTE); + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + // x + break; + + case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment + assert(insOptsNone(id->idInsOpt())); + assert(id->idOpSize() == EA_8BYTE); + assert(isGeneralRegisterOrZR(id->idReg1())); // ddddd + assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn + assert(isValidSimm<6>(emitGetInsSC(id))); // iiiiii + break; + + case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size + assert(insOptsNone(id->idInsOpt())); + assert(id->idOpSize() == EA_8BYTE); + assert(isGeneralRegister(id->idReg1())); // ddddd + assert(isValidSimm<6>(emitGetInsSC(id))); // iiiiii + break; + + case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm + { + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx xx + imm = emitGetInsSC(id); + + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimmFrom1<3>(imm)); // iii + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimmFrom1<4>(imm)); // xiii + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimmFrom1<5>(imm)); // xxiii + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidUimmFrom1<6>(imm)); // xx xiii + break; + + default: + unreached(); + break; + } + break; + } + + case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate + // increment) + { + ssize_t imm1; + ssize_t imm2; + insSveDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm<5>(imm1)); // iiiii + assert(isValidSimm<5>(imm2)); // iiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + } + + case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register + // increment) + case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate + // increment) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm<5>(emitGetInsSC(id))); // iiiii + assert(isIntegerRegister(id->idReg2())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long + { + assert(insOptsScalableWide(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x xx + imm = emitGetInsSC(id); + + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimm<3>(imm)); // iii + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimm<4>(imm)); // x iii + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm<5>(imm)); // xx iii + break; + + default: + unreached(); + break; + } + break; + } + + case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow + { + assert(insOptsScalableWide(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x xx + imm = emitGetInsSC(id); + + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimmFrom1<3>(imm)); // iii + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimmFrom1<4>(imm)); // x iii + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimmFrom1<5>(imm)); // xx iii + break; + + default: + unreached(); + break; + } + break; + } + + case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(emitIsValidEncodedRotationImm90_or_270(emitGetInsSC(id))); // r + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // x + break; + + case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // mmmmm + if (id->idInsOpt() == INS_OPTS_SCALABLE_S) + { + assert(id->idIns() == INS_sve_sm4e); + } + else + { + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + } + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare + assert(id->idOpSize() == EA_8BYTE); + + FALLTHROUGH; + case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isGeneralRegister(id->idReg2())); // nnnnn + assert(isValidGeneralDatasize(id->idOpSize())); // X + assert(isGeneralRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element + { + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isPredicateRegister(id->idReg2())); // NNNN + assert(isPredicateRegister(id->idReg3())); // MMMM + assert(isGeneralRegister(id->idReg4())); // vv + assert((REG_R12 <= id->idReg4()) && (id->idReg4() <= REG_R15)); + imm = emitGetInsSC(id); + + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimm<4>(imm)); + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimm<3>(imm)); + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm<2>(imm)); + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidUimm<1>(imm)); + break; + + default: + unreached(); + break; + } + + break; + } + + case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter + assert(isValidUimm<1>(emitGetInsSC(id))); // i + + FALLTHROUGH; + case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isHighPredicateRegister(id->idReg2())); // NNN + assert(isValidUimm<2>(emitGetInsSC(id))); // ii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate + // pair) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isLowPredicateRegister(id->idReg1())); // DDD + assert(isGeneralRegister(id->idReg2())); // nnnnn + assert(isGeneralRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit + // (predicate-as-counter) + assert(insOptsScalableStandard(id->idInsOpt())); // L + assert(isHighPredicateRegister(id->idReg1())); // DDD + assert(isGeneralRegister(id->idReg2())); // nnnnn + assert(isGeneralRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isHighPredicateRegister(id->idReg1())); // DDD + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidUimm<8>(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) + imm = emitGetInsSC(id); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + assert(isValidSimm<8>(imm)); // iiiiiiii + break; + + case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) + imm = emitGetInsSC(id); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + assert(isValidUimm<8>(imm)); // iiiiiiii + break; + + case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm<8>(emitGetInsSC(id)) || isValidUimm<8>(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm<8>(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product + assert(insOptsScalableWords(id->idInsOpt())); + + FALLTHROUGH; + case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(emitIsValidEncodedRotationImm0_to_270(emitGetInsSC(id))); // rr + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) + case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) + case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7)); + assert(isValidUimm<4>(emitGetInsSC(id))); // ii rr + break; + + case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) + case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) + case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) + case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isLowVectorRegister(id->idReg3())); // mmm + assert(isValidUimm<3>(emitGetInsSC(id))); // i rr + break; + + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + assert(insOptsScalable(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(id->idOpSize())); + + switch (id->idIns()) + { + case INS_sve_ld2b: + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: + case INS_sve_ld2q: + case INS_sve_st2b: + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: + case INS_sve_st2q: + assert((isValidSimm_MultipleOf<4, 2>(emitGetInsSC(id)))); // iiii + break; + + case INS_sve_ld3b: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: + case INS_sve_ld3q: + case INS_sve_st3b: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: + case INS_sve_st3q: + assert((isValidSimm_MultipleOf<4, 3>(emitGetInsSC(id)))); // iiii + break; + + case INS_sve_ld4b: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: + case INS_sve_ld4q: + case INS_sve_st4b: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: + case INS_sve_st4q: + assert((isValidSimm_MultipleOf<4, 4>(emitGetInsSC(id)))); // iiii + break; + + case INS_sve_ld1rqb: + case INS_sve_ld1rqd: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + assert((isValidSimm_MultipleOf<4, 16>(emitGetInsSC(id)))); // iiii + break; + + case INS_sve_ld1rob: + case INS_sve_ld1rod: + case INS_sve_ld1roh: + case INS_sve_ld1row: + assert((isValidSimm_MultipleOf<4, 32>(emitGetInsSC(id)))); // iiii + break; + + default: + assert(isValidSimm<4>(emitGetInsSC(id))); // iiii + break; + } + break; + + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); // xx + // st1h is reserved for scalable B + assert((id->idIns() == INS_sve_st1h) ? insOptsScalableAtLeastHalf(id->idInsOpt()) + : insOptsScalableStandard(id->idInsOpt())); + break; + + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); // x + break; + + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + assert(insOptsScalable32bitExtends(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(id->idOpSize())); // xx + assert(isValidSimm<4>(imm)); // iiii + break; + + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isScalableVectorSize(id->idOpSize())); // x + assert(isValidSimm<4>(imm)); // iiii + break; + + case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + assert(insOptsScalable32bitExtends(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) + case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + assert(insOptsScalableWordsOrQuadwords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // scalar) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) + case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus + // scalar) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus + // scalar) + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + assert(insOptsScalableDoubleWordsOrQuadword(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // scalar) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) + case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) + case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) + case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // scalar) + case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) + imm = emitGetInsSC(id); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(emitIsValidEncodedRotationImm90_or_270(imm)); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) + imm = emitGetInsSC(id); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isVectorRegister(id->idReg4())); + assert(emitIsValidEncodedRotationImm0_to_270(imm)); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate + // (predicated) + imm = emitGetInsSC(id); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isScalableVectorSize(id->idOpSize())); + assert(emitIsValidEncodedSmallFloatImm(imm)); + break; + + case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient + imm = emitGetInsSC(id); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isValidUimm<3>(imm)); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isVectorRegister(id->idReg4())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing + // multiplicand + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isVectorRegister(id->idReg4())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register + case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register + assert(insOptsNone(id->idInsOpt())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isPredicateRegister(id->idReg1())); // TTTT + assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn + assert(isValidSimm<9>(emitGetInsSC(id))); // iii + break; + + case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register + case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register + assert(insOptsNone(id->idInsOpt())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isGeneralRegisterOrZR(id->idReg2())); // nnnnn + assert(isValidSimm<9>(emitGetInsSC(id))); // iii + break; + + case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit + // element size + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidUimm<2>(emitGetInsSC(id))); // ii + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + break; + + case IF_SVE_GH_3B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidUimm<2>(emitGetInsSC(id))); // ii + assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); + break; + + case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit + // element size + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidUimm<3>(emitGetInsSC(id))); // ii + // i + assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); + break; + + case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + assert(insOptsScalable(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidUimm<1>(emitGetInsSC(id))); // i + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + break; + + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit + // scaled offsets) + assert(insOptsScalable32bitExtends(id->idInsOpt())); + assert(isLowPredicateRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isLowPredicateRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) + assert(insOptsNone(id->idInsOpt())); + assert(isLowPredicateRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + assert(insOptsScalableWords(id->idInsOpt())); + assert(isLowPredicateRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) + assert(insOptsNone(id->idInsOpt())); + assert(isLowPredicateRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isValidUimm<5>(emitGetInsSC(id))); + break; + + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + break; + + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert((isValidUimm_MultipleOf<5, 8>(emitGetInsSC(id)))); + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + assert(insOptsScalableWords(id->idInsOpt())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated) + assert(insOptsNone(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + break; + + case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts + assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + break; + + case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isGeneralRegisterOrZR(id->idReg2())); // ZR is SP + break; + + case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + break; + + case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator + case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements + case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated) + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + break; + + case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated) + case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert + case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate + imm = emitGetInsSC(id); + assert(isValidVectorShiftAmount(imm, optGetSveElemsize(id->idInsOpt()), + emitInsIsVectorRightShift(id->idIns()))); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isScalableVectorSize(id->idOpSize())); + break; + + case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element + imm = emitGetInsSC(id); + assert(insOptsScalable(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isValidBroadcastImm(imm, optGetSveElemsize(id->idInsOpt()))); + break; + + case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i + imm = emitGetInsSC(id); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isScalableVectorSize(id->idOpSize())); + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimm<4>(imm)); + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimm<3>(imm)); + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm<2>(imm)); + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidUimm<1>(imm)); + break; + + default: + break; + } + break; + + case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq + imm = emitGetInsSC(id); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isScalableVectorSize(id->idOpSize())); + assert(isValidUimm<4>(imm)); + break; + + default: + printf("unexpected format %s\n", emitIfName(id->idInsFmt())); + assert(!"Unexpected format"); + break; + } +} +#endif // DEBUG + +//-------------------------------------------------------------------- +// emitDispInsSveHelp: Dump the given SVE instruction to jitstdout. +// +// Arguments: +// id - The instruction +// +void emitter::emitDispInsSveHelp(instrDesc* id) +{ + instruction ins = id->idIns(); + insFormat fmt = id->idInsFmt(); + emitAttr size = id->idOpSize(); + + switch (fmt) + { + ssize_t imm; + bitMaskImm bmi; + + // ., /M, ., . + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + // .D, /M, .D, .D + case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + // .H, /M, .H, .H + case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., /, . + case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated) + { + PredicateType ptype = (id->idPredicateReg2Merge()) ? PREDICATE_MERGE : PREDICATE_ZERO; + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // nnnnn + emitDispLowPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // ddddd + break; + } + + // ., /M, ., # + case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImm(emitGetInsSC(id), false); // iiii + break; + + // ., /M, ., .D + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm + break; + + // ., /M, ., . + // ., /M, ., . + case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend + // (predicated) + case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand + // (predicated) + case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + // ., /Z, ., . + case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispSveReg(id->idReg4(), id->idInsOpt(), false); + break; + + // ., ., . + case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high + // (unpredicated) + case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved + case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute + case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + case IF_SVE_BR_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments + case IF_SVE_CA_3A: // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads + case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp + case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp + case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) + // ., ., . + case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high + case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate + // .Q, .Q, .Q + case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments + // .D, .D, .D + case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate + // .D, .D, .D + case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) + // .B, .B, .B + case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment) + case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + // .D, .D, .D + // .S, .S, .S + case IF_SVE_GJ_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations + // .H, .H, .H + case IF_SVE_GW_3B: // ...........mmmmm ......nnnnnddddd -- SVE FP clamp + case IF_SVE_HK_3B: // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn/mmmmm + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm/aaaaa + break; + + // .D, .D, .D + case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) + // .D, .D, .D + case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd + emitDispSveReg(id->idReg2(), INS_OPTS_SCALABLE_D, true); // nnnnn + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm + break; + + // .D, .D, .D, .D + case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // kkkkk + break; + + // ., #, # + case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate + // increment) + { + ssize_t imm1; + ssize_t imm2; + insSveDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImm(imm1, true); // iiiii + emitDispImm(imm2, false); // iiiii + break; + } + + // ., #, + case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register + // increment) + { + const emitAttr intRegSize = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImm(emitGetInsSC(id), true); // iiiii + emitDispReg(id->idReg2(), intRegSize, false); // mmmmm + break; + } + + // ., , # + case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate + // increment) + { + const emitAttr intRegSize = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispReg(id->idReg2(), intRegSize, true); // mmmmm + emitDispImm(emitGetInsSC(id), false); // iiiii + break; + } + + // .H, .B, .B + case IF_SVE_GN_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long + case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., ., . + // ., {.}, . + case IF_SVE_BZ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + if (id->idIns() == INS_sve_tbl) + { + emitDispSveConsecutiveRegList(id->idReg2(), 1, id->idInsOpt(), true); // nnnnn + } + else + { + assert(id->idIns() == INS_sve_tbx); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + } + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., ., . + // ., {.}, . + case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + if (id->idIns() == INS_sve_tblq) + { + emitDispSveConsecutiveRegList(id->idReg2(), 1, id->idInsOpt(), true); // nnnnn + } + else + { + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + } + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., {., .}, . + case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveConsecutiveRegList(id->idReg2(), 2, id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., , + case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register + // increment) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispReg(id->idReg2(), size, true); // nnnnn + emitDispReg(id->idReg3(), size, false); // mmmmm + break; + + // {, {, MUL #}} + case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + // {, {, MUL #}} + case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count + imm = emitGetInsSC(id); + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp + if (imm > 1) + { + printf("mul "); + emitDispImm(imm, false, false); // iiii + } + break; + + // .D{, {, MUL #}} + // .H{, {, MUL #}} + // .S{, {, MUL #}} + case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp + if (imm > 1) + { + printf("mul "); + emitDispImm(imm, false, false); // iiii + } + break; + + // ., ., # + case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + + FALLTHROUGH; + // ., # + case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + bmi.immNRS = (unsigned)emitGetInsSC(id); + imm = emitDecodeBitMaskImm(bmi, optGetSveElemsize(id->idInsOpt())); + emitDispImm(imm, false); // iiiiiiiiiiiii + break; + + // , {, {, MUL #}} + // {, {, MUL #}} + // {, {, MUL #}} + case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count + switch (id->idIns()) + { + case INS_sve_sqincb: + case INS_sve_sqdecb: + case INS_sve_sqinch: + case INS_sve_sqdech: + case INS_sve_sqincw: + case INS_sve_sqdecw: + case INS_sve_sqincd: + case INS_sve_sqdecd: + emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd + + if (size == EA_4BYTE) + { + emitDispReg(id->idReg1(), EA_4BYTE, true); + } + break; + + default: + emitDispReg(id->idReg1(), size, true); // ddddd + break; + } + + imm = emitGetInsSC(id); + emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp + if (imm > 1) + { + printf("mul "); + emitDispImm(imm, false, false); // iiii + } + break; + + // .B, {.B, .B }, # + case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispVectorRegList(id->idReg2(), 2, id->idInsOpt(), true); // nnnnn + emitDispImm(imm, false); // iiiii iii + break; + + // .B, .B, .B, # + case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm + emitDispImm(imm, false); // iiiii iii + break; + + // ., /M, # + case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(id->idInsFmt()), INS_OPTS_NONE, true); // gggg + emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii + break; + + // ., ., .D + case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm + break; + + // ., [., .{, }] + case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + printf("["); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id) > 0); + emitDispSveExtendOptsModN(INS_OPTS_LSL, emitGetInsSC(id)); + printf("]"); + break; + + // .D, [.D, .D, SXTW{ }] + case IF_SVE_BH_3B: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + printf("["); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispSveExtendOptsModN(INS_OPTS_SXTW, emitGetInsSC(id)); + printf("]"); + break; + + // .D, [.D, .D, UXTW{ }] + case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + printf("["); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispSveExtendOptsModN(INS_OPTS_UXTW, emitGetInsSC(id)); + printf("]"); + break; + + // ., + case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispReg(id->idReg2(), optGetSveElemsize(id->idInsOpt()), false); // mmmmm + break; + + // ., + case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispReg(id->idReg2(), id->idInsOpt() == INS_OPTS_SCALABLE_D ? EA_8BYTE : EA_4BYTE, false); // mmmmm + break; + + // .H, .B + case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, false); // NNNN + break; + + // ., , ., . + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // , , , . + // , , , . + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // , , . + // , , . + case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) + case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) + case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register + case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register + case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., , . + case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) + case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) + case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) + case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords) + emitDispVectorReg(id->idReg1(), optSveToQuadwordElemsizeArrangement(id->idInsOpt()), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + //
, , . + case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) + emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., /M, . + case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) + case IF_SVE_AQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer unary operations (predicated) + case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements + case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated) + case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value + case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, true); // DDDD + emitDispSveReg(id->idReg2(), false); // nnnnn + break; + case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_D, true); // DDDD + emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false); // nnnnn + break; + case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, true); // DDDD + emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false); // nnnnn + break; + case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_S, true); // DDDD + emitDispSveRegIndex(id->idReg2(), emitGetInsSC(id), false); // nnnnn + break; + case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector + emitDispSveReg(id->idReg1(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_B, false); // NNNN + break; + case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector + emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_D, false); // NNNN + break; + case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector + emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_H, false); // NNNN + break; + case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector + emitDispSveRegIndex(id->idReg1(), emitGetInsSC(id), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), INS_OPTS_SCALABLE_S, false); // NNNN + break; + + // ., ., . + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // NNNN + emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // MMMM + break; + + // ., , . + case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // ., /M, + case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector + // (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispReg(id->idReg3(), size, false); // mmmmm + break; + + // ., /M, + case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispReg(encodingZRtoSP(id->idReg3()), size, false); // mmmmm + break; + + // .Q, /M, .Q + case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_Q, true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_Q, false); // nnnnn + break; + + // ., , {., .} + case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // VVV + emitDispSveConsecutiveRegList(id->idReg3(), insGetSveReg1ListSize(ins), id->idInsOpt(), false); // nnnnn + break; + + // ., , ., . + case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // VVV + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // MOV ., /M, . or SEL ., , ., . + case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated) + { + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + + if (id->idIns() == INS_sve_mov) + { + emitDispPredicateReg(id->idReg2(), PREDICATE_MERGE, id->idInsOpt(), true); // VVVV + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // nnnnn + } + else + { + emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // VVVV + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // mmmmm + } + break; + } + + // ., /Z, ., . + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // mmmmm + break; + + // ., /Z, ., .D + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false); // mmmmm + break; + + // ., /Z, ., # + case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate + case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn + emitDispImm(emitGetInsSC(id), false, (fmt == IF_SVE_CY_3B)); // iiiii + break; + + // .S, .H, .H[] + case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed) + // .S, .B, .B[] + case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + // .S, .H, .H[] + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + // .S, .S, .S[] + case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id), false); // ii/iii + break; + + // .S, .H, .H + case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product + case IF_SVE_HA_3A: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + case IF_SVE_HB_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long + case IF_SVE_HD_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate + case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product + case IF_SVE_GO_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // .S, .B, .B + case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd + emitDispSveReg(id->idReg2(), INS_OPTS_SCALABLE_B, true); // nnnnn + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_B, false); // mmmmm + break; + + // .D, .S, .S[] + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + // .D, .S, .S[] + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmm + emitDispElementIndex(emitGetInsSC(id), false); // ii + break; + + // .D, .H, .H[] + case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id), false); // ii + break; + + // .H, .B, .B[] + case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id), false); // iii + break; + + // .H, .H, .H[] + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + // .S, .S, .S[] + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + // .D, .D, .D[] + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + // .D, .D, .D[] + case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + // .H, .H, .H[] + case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id), false); // i/ii/iii + break; + + // .B, /Z, .B, .B + case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + { + bool isFourReg = + !((ins == INS_sve_mov) || (ins == INS_sve_movs) || (ins == INS_sve_not) || (ins == INS_sve_nots)); + PredicateType ptype = (ins == INS_sve_sel) ? PREDICATE_NONE : insGetPredicateType(fmt, 2); + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true); // gggg + emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), isFourReg); // NNNN + + if (isFourReg) + { + emitDispPredicateReg(id->idReg4(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM + } + + break; + } + + // .B, .B + case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // NNNN + break; + + // .B, /M, .B + case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg + emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN + break; + + // .B, /Z, .B, .B + case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition + { + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg + emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), true); // NNNN + emitDispPredicateReg(id->idReg4(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM + break; + } + + // .B, /, .B + case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition + case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition + { + PredicateType ptype = (id->idPredicateReg2Merge()) ? PREDICATE_MERGE : PREDICATE_ZERO; + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), ptype, id->idInsOpt(), true); // gggg + emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN + break; + } + + // .B, /Z, .B, .B + case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg + emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), true); // NNNN + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 4), id->idInsOpt(), false); // MMMM + break; + + // .B, , .B + case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD + break; + + // .{, } + case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize + { + bool dispPattern = (id->idSvePattern() != SVE_PATTERN_ALL); + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), dispPattern); // DDDD + if (dispPattern) + { + emitDispSvePattern(id->idSvePattern(), false); // ppppp + } + break; + } + + // ., . + case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // NNNN + break; + + // ., , . + case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // VVVV + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD + break; + + // .B, /Z + case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated) + case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // gggg + break; + + // .B + case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated) + case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // DDDD + break; + + // , , . + case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg + emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // NNNN + break; + + // ., /M, . + case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), (insOpts)((unsigned)id->idInsOpt() - 1), false); // mmmmm + break; + + // .H, { .S-.S }, # + case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, true); // nnnn + emitDispImm(emitGetInsSC(id), false); // iiii + break; + + // , ., + case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter) + emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // NNNN + emitDispVectorLengthSpecifier(id); + break; + + // , . + case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count + emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM + break; + + // ., . + case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count + case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM + break; + + // , ., + // , . + case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count + if ((ins == INS_sve_sqdecp) || (ins == INS_sve_sqincp)) + { + // 32-bit result: , ., + // 64-bit result: , . + const bool is32BitResult = (id->idOpSize() == EA_4BYTE); // X + emitDispReg(id->idReg1(), EA_8BYTE, true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), is32BitResult); // MMMM + + if (is32BitResult) + { + emitDispReg(id->idReg1(), EA_4BYTE, false); + } + } + else + { + assert((ins == INS_sve_uqdecp) || (ins == INS_sve_uqincp)); + emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), false); // MMMM + } + break; + + // none + case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise + break; + + // .B + case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // NNNN + break; + + // , + case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars + emitDispReg(id->idReg1(), id->idOpSize(), true); // nnnnn + emitDispReg(id->idReg2(), id->idOpSize(), false); // mmmmm + break; + + // .H, {.S-.S } + case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow + { + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_H, true); + emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_S, false); + break; + } + + // .B, {.H-.H } + case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts + { + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_B, true); + emitDispSveConsecutiveRegList(id->idReg2(), 2, INS_OPTS_SCALABLE_H, false); + break; + } + + // ., . + case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), optWidenSveElemsizeArrangement(id->idInsOpt()), false); // nnnnn + break; + + // , , # + case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment + { + const regNumber reg1 = (id->idReg1() == REG_ZR) ? REG_SP : id->idReg1(); + const regNumber reg2 = (id->idReg2() == REG_ZR) ? REG_SP : id->idReg2(); + emitDispReg(reg1, id->idOpSize(), true); // ddddd + emitDispReg(reg2, id->idOpSize(), true); // nnnnn + emitDispImm(emitGetInsSC(id), false); // iiiiii + break; + } + + // , # + case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size + emitDispReg(id->idReg1(), id->idOpSize(), true); // ddddd + emitDispImm(emitGetInsSC(id), false); // iiiiii + break; + + // ., ., # + case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long + { + const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1); + emitDispSveReg(id->idReg1(), largeSizeSpecifier, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispImm(emitGetInsSC(id), false); // iii + break; + } + + // ., ., # + case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow + { + const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), largeSizeSpecifier, true); // nnnnn + emitDispImm(emitGetInsSC(id), false); // iii + break; + } + + // ., ., ., + case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add + { + // Rotation bit implies rotation is 270 if set, else rotation is 90 + const ssize_t rot = emitDecodeRotationImm90_or_270(emitGetInsSC(id)); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // mmmmm + emitDispImm(rot, false); // r + break; + } + + // ., ., . + case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // .B, .B, .B + // .S, .S, .S + case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // mmmmm + break; + + // .B, .B + case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), false); // ddddd + break; + + // ., , + case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit + // ., , + case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // DDDD + emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn + emitDispReg(id->idReg3(), id->idOpSize(), false); // mmmmm + break; + + // , , .[, ] + case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // NNNN + emitDispPredicateReg(id->idReg3(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // MMMM + printf("["); + emitDispReg(id->idReg4(), EA_4BYTE, true); // vv + emitDispImm(emitGetInsSC(id), false); // ix xx + printf("]"); + break; + + // ., [] + case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter + emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), PREDICATE_N, id->idInsOpt(), false); // NNN + emitDispElementIndex(emitGetInsSC(id), false); // ii + break; + + // {., .}, [] + case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter + emitDispPredicateRegPair(id->idReg1(), id->idInsOpt()); // DDDD + emitDispPredicateReg(id->idReg2(), PREDICATE_N, id->idInsOpt(), false); // NNN + emitDispElementIndex(emitGetInsSC(id), false); // i + break; + + // {., .}, , + case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate + // pair) + emitDispLowPredicateRegPair(id->idReg1(), id->idInsOpt()); + emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn + emitDispReg(id->idReg3(), id->idOpSize(), false); // mmmmm + break; + + // ., , , + case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit + // (predicate-as-counter) + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // DDD + emitDispReg(id->idReg2(), id->idOpSize(), true); // nnnnn + emitDispReg(id->idReg3(), id->idOpSize(), true); // mmmmm + emitDispVectorLengthSpecifier(id); + break; + + // PTRUE . + case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), false); // DDD + break; + + // FDUP ., # + // FMOV ., # + case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii + break; + + // DUP ., #{, } + // MOV ., #{, } + case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) + { + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImmOptsLSL(imm, id->idHasShift(), 8); // h iiiiiiii + break; + } + + // ADD ., ., #{, } + // SQADD ., ., #{, } + // UQADD ., ., #{, } + // SUB ., ., #{, } + // SUBR ., ., #{, } + // SQSUB ., ., #{, } + // UQSUB ., ., #{, } + case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) + { + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImmOptsLSL(imm, id->idHasShift(), 8); // h iiiiiiii + break; + } + + // FMOV ., #0.0 + // (Preferred disassembly: FMOV ., #0) + case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImm(0, false); + break; + + // SMAX ., ., # + // SMIN ., ., # + // UMAX ., ., # + // UMIN ., ., # + case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) + // MUL ., ., # + case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImm(emitGetInsSC(id), false); // iiiiiiii + break; + + // ., ., . + case IF_SVE_EH_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated) + // .S, .B, .B + case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate + { + const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 2); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), smallSizeSpecifier, true); // nnnnn + emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm + break; + } + + // ., ., . + case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long + case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long + case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long + case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long + // ., ., . + case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long + case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long + // .Q, .D, .D + case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long + { + const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 1); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), smallSizeSpecifier, true); // nnnnn + emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm + break; + } + + // ., ., . + case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part + { + const insOpts largeSizeSpecifier = (insOpts)(id->idInsOpt() + 1); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), largeSizeSpecifier, true); // nnnnn + emitDispSveReg(id->idReg3(), largeSizeSpecifier, false); // mmmmm + break; + } + + // ., ., . + case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide + { + const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 1); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm + break; + } + + // CDOT ., ., ., + case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product + { + const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 2); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), smallSizeSpecifier, true); // nnnnn + emitDispSveReg(id->idReg3(), smallSizeSpecifier, true); // mmmmm + + // rot specifies a multiple of 90-degree rotations + emitDispImm(emitDecodeRotationImm0_to_270(emitGetInsSC(id)), false); // rr + break; + } + + // CMLA ., ., ., + // SQRDCMLAH ., ., ., + case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm + + // rot specifies a multiple of 90-degree rotations + emitDispImm(emitDecodeRotationImm0_to_270(emitGetInsSC(id)), false); // rr + break; + + // CDOT .S, .B, .B[], + case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) + { + const ssize_t imm = emitGetInsSC(id); + const ssize_t rot = (imm & 0b11); + const ssize_t index = (imm >> 2); + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_S, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(index, true); // ii + + // rot specifies a multiple of 90-degree rotations + emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr + break; + } + + // CDOT .D, .H, .H[], + case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) + { + const ssize_t imm = emitGetInsSC(id); + const ssize_t rot = (imm & 0b11); + const ssize_t index = (imm >> 2); + emitDispSveReg(id->idReg1(), INS_OPTS_SCALABLE_D, true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(index, true); // i + + // rot specifies a multiple of 90-degree rotations + emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr + break; + } + + // CMLA .H, .H, .H[], + case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) + // CMLA .S, .S, .S[], + case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) + // SQRDCMLAH .H, .H, .H[], + case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) + // SQRDCMLAH .S, .S, .S[], + case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) + // FCMLA .S, .S, .S[], + case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed) + { + const ssize_t imm = emitGetInsSC(id); + const ssize_t rot = (imm & 0b11); + const ssize_t index = (imm >> 2); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(index, true); // i + + // rot specifies a multiple of 90-degree rotations + emitDispImm(emitDecodeRotationImm0_to_270(rot), false); // rr + break; + } + + // .H, /M, .S + // .S, /M, .D + // .D, /M, .S + // .S, /M, .H + // .D, /M, .D + // .S, /M, .S + // .D, /M, .H + // .H, /M, .H + // .H, /M, .D + // .H, /M, .S + case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements + case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision + case IF_SVE_HO_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision + case IF_SVE_HO_3C: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision + case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer + case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point + { + insOpts opt = id->idInsOpt(); + + switch (ins) + { + // These cases have only one combination of operands so the option may be omitted. + case INS_sve_fcvtxnt: + opt = INS_OPTS_D_TO_S; + break; + case INS_sve_bfcvtnt: + opt = INS_OPTS_S_TO_H; + break; + case INS_sve_fcvtx: + opt = INS_OPTS_D_TO_S; + break; + case INS_sve_bfcvt: + opt = INS_OPTS_S_TO_H; + break; + default: + break; + } + + insOpts dst = INS_OPTS_NONE; + insOpts src = INS_OPTS_NONE; + optExpandConversionPair(opt, dst, src); + + emitDispSveReg(id->idReg1(), dst, true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), src, false); // nnnnn + break; + } + + // { .D }, /Z, [{, #, MUL VL}] + // Some of these formats may allow changing the element size instead of using 'D' for all instructions. + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + // { .B }, /Z, [{, #}] + // { .H }, /Z, [{, #}] + // { .S }, /Z, [{, #}] + // { .D }, /Z, [{, #}] + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + // { .Q, .Q }, /Z, [{, #, MUL VL}] + // { .Q, .Q, .Q }, /Z, [{, #, MUL VL}] + // { .Q, .Q, .Q, .Q }, /Z, [{, #, MUL VL}] + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + // { .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D }, /Z, [{, #, MUL VL}] + // { .B, .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D, .D }, /Z, [{, #, MUL VL}] + // { .B, .B, .B, .B }, /Z, [{, #, MUL VL}] + // { .H, .H, .H, .H }, /Z, [{, #, MUL VL}] + // { .S, .S, .S, .S }, /Z, [{, #, MUL VL}] + // { .D, .D, .D, .D }, /Z, [{, #, MUL VL}] + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + // { .Q, .Q }, , [{, #, MUL VL}] + // { .Q, .Q, .Q }, , [{, #, MUL VL}] + // { .Q, .Q, .Q, .Q }, , [{, #, MUL VL}] + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + // { .B }, , [{, #, MUL VL}] + // { .H }, , [{, #, MUL VL}] + // { .S }, , [{, #, MUL VL}] + // { .D }, , [{, #, MUL VL}] + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + // { .D }, , [{, #, MUL VL}] + // { .Q }, , [{, #, MUL VL}] + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + // { .B, .B }, , [{, #, MUL VL}] + // { .H, .H }, , [{, #, MUL VL}] + // { .S, .S }, , [{, #, MUL VL}] + // { .D, .D }, , [{, #, MUL VL}] + // { .B, .B, .B }, , [{, #, MUL VL}] + // { .H, .H, .H }, , [{, #, MUL VL}] + // { .S, .S, .S }, , [{, #, MUL VL}] + // { .D, .D, .D }, , [{, #, MUL VL}] + // { .B, .B, .B, .B }, , [{, #, MUL VL}] + // { .H, .H, .H, .H }, , [{, #, MUL VL}] + // { .S, .S, .S, .S }, , [{, #, MUL VL}] + // { .D, .D, .D, .D }, , [{, #, MUL VL}] + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + printf("["); + emitDispReg(id->idReg3(), EA_8BYTE, imm != 0); // nnnnn + if (imm != 0) + { + switch (fmt) + { + case IF_SVE_IO_3A: + // This does not have to be printed as hex. + // We only do it because the capstone disassembly displays this immediate as hex. + // We could not modify capstone without affecting other cases. + emitDispImm(emitGetInsSC(id), false, /* alwaysHex */ true); // iiii + break; + + case IF_SVE_IQ_3A: + case IF_SVE_IS_3A: + case IF_SVE_JE_3A: + case IF_SVE_JO_3A: + // This does not have to be printed as hex. + // We only do it because the capstone disassembly displays this immediate as hex. + // We could not modify capstone without affecting other cases. + emitDispImm(emitGetInsSC(id), true, /* alwaysHex */ true); // iiii + printf("mul vl"); + break; + + default: + emitDispImm(emitGetInsSC(id), true); // iiii + printf("mul vl"); + break; + } + } + printf("]"); + break; + + // {.}, , [, ] + // {.}, , [, , LSL #1] + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + // {.}, , [, , LSL #2] + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + // {.D }, , [, .D, #3] + // {.S }, , [, .S, #1] + // {.S }, , [, .S, #2] + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D, ] + // {.D }, , [, .D, #1] + // {.D }, , [, .D, #2] + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D, ] + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.S }, , [, .S, ] + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D, ] + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + // {.S }, , [, .S, ] + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + // {.D }, /Z, [, .D, ] + // {.S }, /Z, [, .S, #1] + // {.S }, /Z, [, .S, #2] + case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.S }, /Z, [, .S, ] + // {.D }, /Z, [, .D, #1] + // {.D }, /Z, [, .D, #2] + case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.D }, /Z, [, .D, ] + case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.S }, /Z, [, .S, ] + case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.D }, /Z, [, .D, #2] + // {.D }, /Z, [, .D, #3] + case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D, ] + case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D, ] + case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D] + // {.D }, /Z, [, .D, LSL #1] + // {.D }, /Z, [, .D, LSL #2] + case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.D }, /Z, [, .D] + case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.S }, /Z, [.S{, }] + case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + // {.D }, /Z, [.D{, }] + case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + // {.D }, /Z, [{, , LSL #3}] + // {.D }, /Z, [{, , LSL #2}] + case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) + // {.H }, /Z, [{, }] + // {.S }, /Z, [{, }] + // {.D }, /Z, [{, }] + case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + // {.B }, /Z, [{, }] + // {.H }, /Z, [{, }] + // {.S }, /Z, [{, }] + // {.D }, /Z, [{, }] + case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + // {.S }, /Z, [{, , LSL #1}] + // {.D }, /Z, [{, , LSL #1}] + // {.S }, /Z, [{, , LSL #2}] + // {.D }, /Z, [{, , LSL #2}] + case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + // {.H }, /Z, [{, , LSL #1}] + // {.S }, /Z, [{, , LSL #1}] + // {.D }, /Z, [{, , LSL #1}] + case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + // {.D }, /Z, [, , LSL #3] + case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + // {.Q }, /Z, [, , LSL #3] + case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + // {.D }, /Z, [, , LSL #2] + case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + // {.D }, /Z, [, , LSL #2 + case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.H }, /Z, [, ] + // {.S }, /Z, [, ] + // {.D }, /Z, [, ] + case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.S }, /Z, [, , LSL #1] + // {.D }, /Z, [, , LSL #1] + case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.B }, /Z, [, ] + // {.H }, /Z, [, ] + // {.S }, /Z, [, ] + // {.D }, /Z, [, ] + case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.H }, /Z, [, , LSL #1] + // {.S }, /Z, [, , LSL #1] + // {.D }, /Z, [, , LSL #1] + case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.B }, /Z, [, ] + // {.H }, /Z, [, ] + // {.S }, /Z, [, ] + // {.D }, /Z, [, ] + case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) + // {.B }, /Z, [, ] + // {.H }, /Z, [, ] + // {.S }, /Z, [, ] + // {.D }, /Z, [, ] + case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) + // {.Q, .Q }, /Z, [, , LSL #4] + // {.Q, .Q, .Q }, /Z, [, , LSL #4] + // {.Q, .Q, .Q, .Q }, /Z, [, , LSL #4] + case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // scalar) + // {.B, .B }, /Z, [, ] + // {.H, .H }, /Z, [, , LSL #1] + // {.S, .S }, /Z, [, , LSL #2] + // {.D, .D }, /Z, [, , LSL #3] + // {.B, .B, .B }, /Z, [, ] + // {.H, .H, .H }, /Z, [, , LSL #1] + // {.S, .S, .S }, /Z, [, , LSL #2] + // {.D, .D, .D }, /Z, [, , LSL #3] + // {.B, .B, .B, .B }, /Z, [, ] + // {.H, .H, .H, .H }, /Z, [, , LSL #1] + // {.S, .S, .S, .S }, /Z, [, , LSL #2] + // {.D, .D, .D, .D }, /Z, [, , LSL #3] + case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) + // {.D }, /Z, [, .D, LSL #2] + // {.D }, /Z, [, .D, LSL #3] + case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D] + case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D] + case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.Q }, /Z, [.D{, }] + case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) + // {.D }, /Z, [.D{, }] + case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus + // scalar) + // {.Q }, , [.D{, }] + case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) + // {.S }, , [.S{, }] + case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + // {.D }, , [.D{, }] + case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + // {.D }, , [.D{, }] + case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus + // scalar) + // {.B }, , [, ] + // {.H }, , [, , LSL #1] + // {.S }, , [, , LSL #2] + // {.D }, , [, , LSL #3] + case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // scalar) + // {.B, .B }, , [, ] + // {.H, .H }, , [, , LSL #1] + // {.S, .S }, , [, , LSL #2] + // {.D, .D }, , [, , LSL #3] + // {.B, .B, .B }, , [, ] + // {.H, .H, .H }, , [, , LSL #1] + // {.S, .S, .S }, , [, , LSL #2] + // {.D, .D, .D }, , [, , LSL #3] + // {.B, .B, .B, .B }, , [, ] + // {.H, .H, .H, .H }, , [, , LSL #1] + // {.S, .S, .S, .S }, , [, , LSL #2] + // {.D, .D, .D, .D }, , [, , LSL #3] + case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) + // {.Q }, , [, , LSL #2] + // {.D }, , [, , LSL #3] + case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + // {.Q }, , [, , LSL #3] + case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + // {.Q, .Q }, , [, , LSL #4] + // {.Q, .Q, .Q }, , [, , LSL #4] + // {.Q, .Q, .Q, .Q }, , [, , LSL #4] + case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // scalar) + // {.D }, , [, .D, LSL #1] + // {.D }, , [, .D, LSL #2] + // {.D }, , [, .D, LSL #3] + case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D] + case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D] + case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D] + case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveModAddr(ins, id->idReg3(), id->idReg4(), id->idInsOpt(), fmt); // nnnnn + // mmmmm + break; + + // {.}, , [{, #, MUL VL}] + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveImmMulVl(id->idReg3(), imm); + break; + + // {.}, , [{, #, MUL VL}] + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveImmMulVl(id->idReg3(), imm); + break; + + // , [{, #, MUL VL}] + case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register + // , [{, #, MUL VL}] + case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register + imm = emitGetInsSC(id); + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); // TTTT + emitDispSveImmMulVl(id->idReg2(), imm); + break; + + // , [{, #, MUL VL}] + case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register + // , [{, #, MUL VL}] + case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register + imm = emitGetInsSC(id); + emitDispReg(id->idReg1(), EA_SCALABLE, true); // ttttt + emitDispSveImmMulVl(id->idReg2(), imm); + break; + + // ., /M, ., ., + case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispImm(emitDecodeRotationImm90_or_270(imm), false); + break; + + // ., /M, ., ., + case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispSveReg(id->idReg4(), id->idInsOpt(), true); + emitDispImm(emitDecodeRotationImm0_to_270(imm), false); + break; + + // ., /Z, ., #0.0 + case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispFloatZero(); + break; + + // ., /M, ., + case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate + // (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSmallFloatImm(imm, id->idIns()); + break; + + // ., ., ., # + case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient + case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); + emitDispImm(emitGetInsSC(id), false); + break; + + // ., /M, . + case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); + break; + + // .H, /M, .H, .H + case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + // ., /M, ., . + case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing + // multiplicand + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispSveReg(id->idReg4(), id->idInsOpt(), false); + break; + + // .B, { .B }, [] + case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit + // element size + // .B, { .B }, [] + case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + // .H, { .H }, [] + case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit + // element size + // .H, { .H, .H }, [] + case IF_SVE_GH_3B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + // .H, {.H }, [] + case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveConsecutiveRegList(id->idReg1(), 1, id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); + emitDispElementIndex(imm, false); + break; + + // , , [, .S, ] + // , , [, .S, #1] + // , , [, .S, #2] + // , , [, .S, #3] + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + // , , [, .D, ] + // , , [, .D, #1] + // , , [, .D, #2] + // , , [, .D, #3] + case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit + // scaled offsets) + // , , [, .D] + // , , [, .D, LSL #1] + // , , [, .D, LSL #2] + // , , [, .D, LSL #3] + case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + // , , [, ] + // , , [, , LSL #1] + // , , [, , LSL #2] + // , , [, , LSL #3] + case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) + emitDispSvePrfop(id->idSvePrfop(), true); + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveModAddr(ins, id->idReg2(), id->idReg3(), id->idInsOpt(), fmt); + break; + + // , , [.S{, #}] + // , , [.D{, #}] + case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + imm = emitGetInsSC(id); + emitDispSvePrfop(id->idSvePrfop(), true); + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveImm(id->idReg2(), imm, id->idInsOpt()); + break; + + // , , [{, #, MUL VL}] + case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) + imm = emitGetInsSC(id); + emitDispSvePrfop(id->idSvePrfop(), true); + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveImmMulVl(id->idReg2(), imm); + break; + + // {.S }, /Z, [.S{, #}] + // {.D }, /Z, [.D{, #}] + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + // {.S }, /Z, [.S{, #}] + // {.D }, /Z, [.D{, #}] + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + // {.D }, /Z, [.D{, #}] + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + // {.S }, , [.S{, #}] + // {.D }, , [.D{, #}] + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + // {.D }, , [.D{, #}] + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + // {.D }, /Z, [{, #}] + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + imm = emitGetInsSC(id); + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(id->idIns()), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveImmIndex(id->idReg3(), id->idInsOpt(), imm); + break; + + // , + case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); + break; + + // ., + case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispReg(encodingZRtoSP(id->idReg2()), size, false); + break; + + // .H, .B + case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts + // ., . + case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), (insOpts)((unsigned)id->idInsOpt() - 1), false); + break; + + // ., . + case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator + // ., . + case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements + // ., . + case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); + break; + + // ., ., # + case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated) + // ., ., # + case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert + // ., ., # + case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); + emitDispImm(imm, false); + break; + + // ., /Z, #{, } + // ., /M, #{, } + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + { + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg + emitDispImmOptsLSL(imm, id->idHasShift(), 8); // iiiiiiii, h + break; + } + + // ., /M, # + case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // gggg + emitDispImm(0, false); + break; + + // ., .[] + // ., + case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + if (imm > 0) + { + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); // nnnnn + emitDispElementIndex(imm, false); + } + else + { + assert(imm == 0); + emitDispReg(id->idReg2(), optGetSveElemsize(id->idInsOpt()), false); + } + break; + + // ., .[] + case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); + emitDispElementIndex(imm, false); + break; + + // .B, .B, .B, # + case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); + emitDispImm(imm, false); + break; + + default: + printf("unexpected format %s", emitIfName(id->idInsFmt())); + assert(!"unexpectedFormat"); + break; + } +} + +#if defined(DEBUG) || defined(LATE_DISASM) +//---------------------------------------------------------------------------------------- +// getInsSveExecutionCharacteristics: +// Returns the current SVE instruction's execution characteristics +// +// Arguments: +// id - The current instruction descriptor to be evaluated +// result - out parameter for execution characteristics struct +// (only insLatency and insThroughput will be set) +// +// Notes: +// SVE latencies from Arm Neoverse N2 Software Optimization Guide, Issue 5.0, Revision: r0p3 +// +void emitter::getInsSveExecutionCharacteristics(instrDesc* id, insExecutionCharacteristics& result) +{ + instruction ins = id->idIns(); + switch (id->idInsFmt()) + { + // Predicate logical + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Arithmetic, basic + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + // Max/min, basic and pairwise + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Divides, 32 bit (Note: worse for 64 bit) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + result.insLatency = PERFSCORE_LATENCY_12C; // 7 to 12 + result.insThroughput = PERFSCORE_THROUGHPUT_11C; // 1/11 to 1/7 + break; + + // Multiply, B, H, S element size (Note: D element size is slightly slower) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Reduction, logical + case IF_SVE_AF_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (predicated) + result.insLatency = PERFSCORE_LATENCY_6C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case IF_SVE_AH_3A: // ........xx.....M ...gggnnnnnddddd -- SVE constructive prefix (predicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + // Reduction, arithmetic, D form (worse for B, S and H) + case IF_SVE_AI_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (predicated) + // Reduction, arithmetic, D form (worse for B, S and H) + case IF_SVE_AK_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case IF_SVE_AM_2A: // ........xx...... ...gggxxiiiddddd -- SVE bitwise shift by immediate (predicated) + switch (ins) + { + case INS_sve_asr: + case INS_sve_lsl: + case INS_sve_lsr: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_srshr: + case INS_sve_sqshl: + case INS_sve_urshr: + case INS_sve_sqshlu: + case INS_sve_uqshl: + case INS_sve_asrd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + // Arithmetic, shift + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Count/reverse bits + // Arithmetic, basic + // Floating point absolute value/difference + // Floating point arithmetic + // Logical + case IF_SVE_AP_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise unary operations (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + case IF_SVE_AQ_3A: + switch (ins) + { + // Arithmetic, basic + case INS_sve_abs: + case INS_sve_neg: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Extend, sign or zero + case INS_sve_sxtb: + case INS_sve_sxth: + case INS_sve_sxtw: + case INS_sve_uxtb: + case INS_sve_uxth: + case INS_sve_uxtw: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_AR_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE integer multiply-accumulate writing addend + // (predicated) + case IF_SVE_AS_4A: // ........xx.mmmmm ...gggaaaaaddddd -- SVE integer multiply-add writing multiplicand + // (predicated) + case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high + // (unpredicated) + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FF_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FF_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FF_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply-add (indexed) + case IF_SVE_FI_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FI_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FI_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply high (indexed) + case IF_SVE_FK_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + case IF_SVE_FK_3B: // ...........iimmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + case IF_SVE_FK_3C: // ...........immmm ......nnnnnddddd -- SVE2 saturating multiply-add high (indexed) + case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_5C; + break; + + case IF_SVE_GU_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GU_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GN_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case IF_SVE_GX_3A: // ...........iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_GX_3B: // ...........immmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_GY_3B: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + switch (ins) + { + case INS_sve_fdot: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_bfdot: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HA_3A: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + switch (ins) + { + case INS_sve_fdot: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_bfdot: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HB_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating-point multiply-add long + switch (ins) + { + case INS_sve_fmlalb: + case INS_sve_fmlalt: + case INS_sve_fmlslb: + case INS_sve_fmlslt: + case INS_sve_bfmlalb: + case INS_sve_bfmlalt: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_bfmlslb: + case INS_sve_bfmlslt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_AV_3A: // ...........mmmmm ......kkkkkddddd -- SVE2 bitwise ternary operations + switch (ins) + { + case INS_sve_eor3: + case INS_sve_bcax: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_bsl: + case INS_sve_bsl1n: + case INS_sve_bsl2n: + case INS_sve_nbsl: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_GU_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply-add (indexed) + case IF_SVE_GX_3C: // .........i.iimmm ......nnnnnddddd -- SVE floating-point multiply (indexed) + case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) + case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) + case IF_SVE_CA_3A: // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads + case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp + case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords) + case IF_SVE_GW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE FP clamp + case IF_SVE_AT_3B: // ...........mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_AB_3B: // ................ ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_HL_3B: // ................ ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + case IF_SVE_GO_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long long + case IF_SVE_GW_3B: // ...........mmmmm ......nnnnnddddd -- SVE FP clamp + case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + case IF_SVE_HA_3A_F: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product + case IF_SVE_HD_3A_A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate + case IF_SVE_HK_3B: // ...........mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_AT_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer add/subtract vectors (unpredicated) + case IF_SVE_BR_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments + case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments + case IF_SVE_BZ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) + case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) + case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long + case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide + case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved + case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long + case IF_SVE_GC_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract narrow high part + case IF_SVE_GF_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 histogram generation (segment) + case IF_SVE_AU_3A: // ...........mmmmm ......nnnnnddddd -- SVE bitwise logical operations (unpredicated) + case IF_SVE_GI_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE2 histogram generation (vector) + case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment + case IF_SVE_BC_1A: // ................ .....iiiiiiddddd -- SVE stack frame size + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + + case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long + switch (ins) + { + case INS_sve_smullb: + case INS_sve_smullt: + case INS_sve_umullb: + case INS_sve_umullt: + case INS_sve_sqdmullb: + case INS_sve_sqdmullt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_pmullb: + case INS_sve_pmullt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register + // increment) + case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate + // increment) + case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register + // increment) + case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate + // increment) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_8C; + break; + + case IF_SVE_BH_3A: // .........x.mmmmm ....hhnnnnnddddd -- SVE address generation + case IF_SVE_BH_3B: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation + case IF_SVE_BH_3B_A: // ...........mmmmm ....hhnnnnnddddd -- SVE address generation + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count + case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + case IF_SVE_BO_1A: // ...........Xiiii ......pppppddddd -- SVE saturating inc/dec register by element count + case IF_SVE_BP_1A: // ............iiii ......pppppddddd -- SVE saturating inc/dec vector by element count + case IF_SVE_BQ_2A: // ...........iiiii ...iiinnnnnddddd -- SVE extract vector (immediate offset, destructive) + case IF_SVE_BQ_2B: // ...........iiiii ...iiimmmmmddddd -- SVE extract vector (immediate offset, destructive) + case IF_SVE_BU_2A: // ........xx..gggg ...iiiiiiiiddddd -- SVE copy floating-point immediate (predicated) + case IF_SVE_BS_1A: // ..............ii iiiiiiiiiiiddddd -- SVE bitwise logical with immediate (unpredicated) + case IF_SVE_BT_1A: // ..............ii iiiiiiiiiiiddddd -- SVE broadcast bitmask immediate + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) + case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_BV_2A: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2A_J: // ........xx..gggg ..hiiiiiiiiddddd -- SVE copy integer immediate (predicated) + case IF_SVE_BV_2B: // ........xx..gggg ...........ddddd -- SVE copy integer immediate (predicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_BW_2A: // ........ii.xxxxx ......nnnnnddddd -- SVE broadcast indexed element + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector + case IF_SVE_CE_2B: // .........i...ii. ......nnnnn.DDDD -- SVE move predicate from vector + case IF_SVE_CE_2C: // ..............i. ......nnnnn.DDDD -- SVE move predicate from vector + case IF_SVE_CE_2D: // .............ii. ......nnnnn.DDDD -- SVE move predicate from vector + case IF_SVE_CF_2A: // ................ .......NNNNddddd -- SVE move predicate into vector + case IF_SVE_CF_2B: // .........i...ii. .......NNNNddddd -- SVE move predicate into vector + case IF_SVE_CF_2C: // ..............i. .......NNNNddddd -- SVE move predicate into vector + case IF_SVE_CF_2D: // .............ii. .......NNNNddddd -- SVE move predicate into vector + result.insThroughput = PERFSCORE_THROUGHPUT_140C; // @ToDo currently undocumented + result.insLatency = PERFSCORE_LATENCY_140C; + break; + + case IF_SVE_CC_2A: // ........xx...... ......mmmmmddddd -- SVE insert SIMD&FP scalar register + case IF_SVE_CD_2A: // ........xx...... ......mmmmmddddd -- SVE insert general register + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_5C; + break; + + case IF_SVE_CI_3A: // ........xx..MMMM .......NNNN.DDDD -- SVE permute predicate elements + case IF_SVE_CJ_2A: // ........xx...... .......NNNN.DDDD -- SVE reverse predicate elements + case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + // Conditional extract operations, SIMD&FP scalar and vector forms + case IF_SVE_CL_3A: // ........xx...... ...gggnnnnnddddd -- SVE compress active elements + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Conditional extract operations, scalar form + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + result.insLatency = PERFSCORE_LATENCY_8C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Copy, scalar SIMD&FP or imm + case IF_SVE_CP_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy SIMD&FP scalar register to vector + // (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Copy, scalar + case IF_SVE_CQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE copy general register to vector (predicated) + result.insLatency = PERFSCORE_LATENCY_5C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case IF_SVE_CT_3A: // ................ ...gggnnnnnddddd -- SVE reverse doublewords + result.insThroughput = PERFSCORE_THROUGHPUT_140C; // @ToDo Currently undocumented. + result.insLatency = PERFSCORE_LATENCY_140C; + break; + + case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive) + case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive) + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case IF_SVE_CW_4A: // ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CY_3A: // ........xx.iiiii ...gggnnnnn.DDDD -- SVE integer compare with signed immediate + case IF_SVE_CY_3B: // ........xx.iiiii ii.gggnnnnn.DDDD -- SVE integer compare with unsigned immediate + case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) + case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_EY_3B: // ...........immmm ......nnnnnddddd -- SVE integer dot product (indexed) + case IF_SVE_FE_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FE_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply long (indexed) + case IF_SVE_FG_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FG_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 integer multiply-add long (indexed) + case IF_SVE_FH_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FH_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply (indexed) + case IF_SVE_FJ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + case IF_SVE_FJ_3B: // ...........immmm ....i.nnnnnddddd -- SVE2 saturating multiply-add (indexed) + case IF_SVE_EH_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated) + case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long + case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long + case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long + case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate + case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case IF_SVE_GJ_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 crypto constructive binary operations + switch (ins) + { + case INS_sve_rax1: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_sm4ekey: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_GZ_3A: // ...........iimmm ....i.nnnnnddddd -- SVE floating-point multiply-add long (indexed) + switch (ins) + { + case INS_sve_fmlalb: + case INS_sve_fmlalt: + case INS_sve_fmlslb: + case INS_sve_fmlslt: + case INS_sve_bfmlalb: + case INS_sve_bfmlalt: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_bfmlslb: + case INS_sve_bfmlslt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + case IF_SVE_CZ_4A_A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + case IF_SVE_CZ_4A_K: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + case IF_SVE_CZ_4A_L: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations + switch (ins) + { + case INS_sve_mov: + case INS_sve_and: + case INS_sve_orr: + case INS_sve_eor: + case INS_sve_bic: + case INS_sve_orn: + case INS_sve_not: + case INS_sve_sel: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case INS_sve_bics: + case INS_sve_eors: + case INS_sve_nots: + case INS_sve_ands: + case INS_sve_orrs: + case INS_sve_orns: + case INS_sve_nors: + case INS_sve_nands: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case INS_sve_nor: + case INS_sve_nand: + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case INS_sve_movs: + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition + case IF_SVE_DC_3A: // ................ ..gggg.NNNN.MMMM -- SVE propagate break to next partition + switch (ins) + { + case INS_sve_brkpa: + case INS_sve_brkpb: + case INS_sve_brkn: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case INS_sve_brkpas: + case INS_sve_brkpbs: + case INS_sve_brkns: + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_DB_3A: // ................ ..gggg.NNNNMDDDD -- SVE partition break condition + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_DB_3B: // ................ ..gggg.NNNN.DDDD -- SVE partition break condition + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_DE_1A: // ........xx...... ......ppppp.DDDD -- SVE predicate initialize + switch (ins) + { + case INS_sve_ptrue: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case INS_sve_ptrues: + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_DF_2A: // ........xx...... .......VVVV.DDDD -- SVE predicate next active + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated) + switch (ins) + { + case INS_sve_rdffr: + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case INS_sve_rdffrs: + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_DH_1A: // ................ ............DDDD -- SVE predicate read from FFR (unpredicated) + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case IF_SVE_DJ_1A: // ................ ............DDDD -- SVE predicate zero + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_DI_2A: // ................ ..gggg.NNNN..... -- SVE predicate test + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_DK_3A: // ........xx...... ..gggg.NNNNddddd -- SVE predicate count + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Extract/insert operation, SIMD and FP scalar form + case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Extract/insert operation, scalar + case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register + result.insLatency = PERFSCORE_LATENCY_5C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Count/reverse bits + // Reverse, vector + case IF_SVE_CU_3A: // ........xx...... ...gggnnnnnddddd -- SVE reverse within elements + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Arithmetic, pairwise add + // Max/min, basic and pairwise + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + case IF_SVE_ES_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer unary operations (predicated) + switch (ins) + { + // Arithmetic, complex + case INS_sve_sqabs: + case INS_sve_sqneg: + // Reciprocal estimate + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + // Reciprocal estimate + case INS_sve_urecpe: + case INS_sve_ursqrte: + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + // Arithmetic, complex + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Arithmetic, shift complex + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + // Arithmetic, pairwise add and accum long + case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long + case IF_SVE_EF_3A: // ...........mmmmm ......nnnnnddddd -- SVE two-way dot product + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + // Floating point arithmetic + // Floating point min/max pairwise + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Floating point reduction, F64. (Note: Worse for F32 and F16) + case IF_SVE_HE_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + // Floating point associative add, F64. (Note: Worse for F32 and F16) + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + case IF_SVE_HK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point arithmetic (unpredicated) + switch (ins) + { + case INS_sve_frecps: + case INS_sve_frsqrts: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case INS_sve_fmul: + case INS_sve_ftsmul: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case INS_sve_fadd: + case INS_sve_fsub: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + switch (ins) + { + // Floating point absolute value/difference + case INS_sve_fabd: + // Floating point min/max + case INS_sve_fmax: + case INS_sve_fmaxnm: + case INS_sve_fmin: + case INS_sve_fminnm: + // Floating point arithmetic + case INS_sve_fadd: + case INS_sve_fsub: + case INS_sve_fsubr: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Floating point divide, F64 (Note: Worse for F32, F16) + case INS_sve_fdiv: + case INS_sve_fdivr: + result.insLatency = PERFSCORE_LATENCY_15C; // 7 to 15 + result.insThroughput = PERFSCORE_THROUGHPUT_14C; // 1/14 to 1/7 + break; + + // Floating point multiply + case INS_sve_fmul: + case INS_sve_fmulx: + case INS_sve_fscale: + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + case INS_sve_famax: + case INS_sve_famin: + result.insLatency = PERFSCORE_LATENCY_20C; // TODO-SVE: Placeholder + result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HO_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision + case IF_SVE_HO_3B: + case IF_SVE_HO_3C: + case IF_SVE_HP_3B: // ................ ...gggnnnnnddddd -- SVE floating-point convert to integer + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + // Floating point round to integral, F64. (Note: Worse for F32 and F16) + case IF_SVE_HQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point round to integral value + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case IF_SVE_HR_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point unary operations + switch (ins) + { + // Floating point reciprocal estimate, F64. (Note: Worse for F32 and F16) + case INS_sve_frecpx: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_1C; + break; + + // Floating point square root F64. (Note: Worse for F32 and F16) + case INS_sve_fsqrt: + result.insThroughput = PERFSCORE_THROUGHPUT_16C; + result.insLatency = PERFSCORE_LATENCY_14C; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HS_3A: // ................ ...gggnnnnnddddd -- SVE integer convert to floating-point + result.insThroughput = PERFSCORE_THROUGHPUT_4X; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + + case IF_SVE_DL_2A: // ........xx...... .....l.NNNNddddd -- SVE predicate count (predicate-as-counter) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_DM_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec register by predicate count + case IF_SVE_DN_2A: // ........xx...... .......MMMMddddd -- SVE inc/dec vector by predicate count + case IF_SVE_DP_2A: // ........xx...... .......MMMMddddd -- SVE saturating inc/dec vector by predicate count + case IF_SVE_DO_2A: // ........xx...... .....X.MMMMddddd -- SVE saturating inc/dec register by predicate count + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + + case IF_SVE_DQ_0A: // ................ ................ -- SVE FFR initialise + case IF_SVE_DR_1A: // ................ .......NNNN..... -- SVE FFR write from predicate + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_DW_2A: // ........xx...... ......iiNNN.DDDD -- SVE extract mask predicate from predicate-as-counter + case IF_SVE_DW_2B: // ........xx...... .......iNNN.DDDD -- SVE extract mask predicate from predicate-as-counter + case IF_SVE_DS_2A: // .........x.mmmmm ......nnnnn..... -- SVE conditionally terminate scalars + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_1C; + break; + + case IF_SVE_DV_4A: // ........ix.xxxvv ..NNNN.MMMM.DDDD -- SVE broadcast predicate element + case IF_SVE_FZ_2A: // ................ ......nnnn.ddddd -- SME2 multi-vec extract narrow + case IF_SVE_GY_3A: // ...........iimmm ....i.nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + case IF_SVE_GY_3B_D: // ...........iimmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product (indexed) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_HG_2A: // ................ ......nnnn.ddddd -- SVE2 FP8 downconverts + switch (ins) + { + case INS_sve_fcvtnt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + case INS_sve_fcvtn: + case INS_sve_bfcvtn: + case INS_sve_fcvtnb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + // Not available in Arm Neoverse N2 Software Optimization Guide. + case IF_SVE_AG_3A: // ........xx...... ...gggnnnnnddddd -- SVE bitwise logical reduction (quadwords) + case IF_SVE_AJ_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer add reduction (quadwords) + case IF_SVE_AL_3A: // ........xx...... ...gggnnnnnddddd -- SVE integer min/max reduction (quadwords) + case IF_SVE_GS_3A: // ........xx...... ...gggnnnnnddddd -- SVE floating-point recursive reduction (quadwords) + result.insLatency = PERFSCORE_LATENCY_20C; // TODO-SVE: Placeholder + result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder + break; + + // Not available in Arm Neoverse N2 Software Optimization Guide. + case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow + result.insThroughput = PERFSCORE_THROUGHPUT_25C; // TODO-SVE: Placeholder + result.insLatency = PERFSCORE_LATENCY_20C; // TODO-SVE: Placeholder + break; + + case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow + case IF_SVE_FA_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) + case IF_SVE_FA_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer dot product (indexed) + case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case IF_SVE_GK_2A: // ................ ......mmmmmddddd -- SVE2 crypto destructive binary operations + case IF_SVE_GL_1A: // ................ ...........ddddd -- SVE2 crypto unary operations + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_DT_3A: // ........xx.mmmmm ...X..nnnnn.DDDD -- SVE integer compare scalar count and limit + case IF_SVE_DX_3A: // ........xx.mmmmm ......nnnnn.DDD. -- SVE integer compare scalar count and limit (predicate + // pair) + case IF_SVE_DY_3A: // ........xx.mmmmm ..l...nnnnn..DDD -- SVE integer compare scalar count and limit + // (predicate-as-counter) + case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue + case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) + case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) + case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) + case IF_SVE_EB_1B: // ........xx...... ...........ddddd -- SVE broadcast integer immediate (unpredicated) + case IF_SVE_FV_2A: // ........xx...... .....rmmmmmddddd -- SVE2 complex integer add + case IF_SVE_FY_3A: // .........x.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long with carry + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) + switch (ins) + { + case INS_sve_umin: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + } + break; + + case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) + case IF_SVE_FB_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) + case IF_SVE_FB_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add (indexed) + case IF_SVE_FC_3A: // ...........iimmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) + case IF_SVE_FC_3B: // ...........immmm ....rrnnnnnddddd -- SVE2 complex saturating multiply-add (indexed) + case IF_SVE_EK_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer multiply-add + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_5C; + break; + + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IH_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus + // immediate) + case IF_SVE_IJ_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_E: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_F: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + case IF_SVE_IJ_3A_G: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_IL_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus immediate) + case IF_SVE_IL_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_B: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + case IF_SVE_IL_3A_C: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-fault load (scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + + case IF_SVE_IM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus + // immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IO_3A: // ............iiii ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus + // immediate) + switch (ins) + { + case INS_sve_ld1rqb: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rob: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqh: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1roh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqw: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1row: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqd: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rod: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IQ_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // immediate) + switch (ins) + { + case INS_sve_ld2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IS_3A: // ............iiii ...gggnnnnnttttt -- SVE load multiple structures (scalar plus immediate) + switch (ins) + { + case INS_sve_ld2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3b: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3h: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3w: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3d: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JE_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // immediate) + switch (ins) + { + case INS_sve_st2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long + case IF_SVE_JM_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // immediate) + case IF_SVE_JN_3C: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3C_D: // ............iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow + switch (ins) + { + case INS_sve_sqshrunb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_sqshrunt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_sqrshrunb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_sqrshrunt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_shrnb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_shrnt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_rshrnb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_rshrnt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_sqshrnb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_sqshrnt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_sqrshrnb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_sqrshrnt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_uqshrnb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_uqshrnt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_uqrshrnb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_uqrshrnt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JO_3A: // ............iiii ...gggnnnnnttttt -- SVE store multiple structures (scalar plus immediate) + switch (ins) + { + case INS_sve_st2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3b: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4b: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3h: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4h: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3w: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4w: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3d: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4d: + result.insThroughput = PERFSCORE_THROUGHPUT_9C; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JJ_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_C: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4A_D: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4A: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit + // unscaled offsets) + case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + case IF_SVE_JN_3B: // ..........x.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) + case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) + switch (ins) + { + case INS_sve_ld1rqb: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rob: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqh: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1roh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqw: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1row: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqd: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rod: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // scalar) + switch (ins) + { + case INS_sve_ld2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) + switch (ins) + { + case INS_sve_ld2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) + switch (ins) + { + case INS_sve_ld1q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus + // scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) + switch (ins) + { + case INS_sve_st1q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // scalar) + case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) + switch (ins) + { + case INS_sve_st2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4b: + result.insThroughput = PERFSCORE_THROUGHPUT_9X; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4h: + result.insThroughput = PERFSCORE_THROUGHPUT_9X; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4w: + result.insThroughput = PERFSCORE_THROUGHPUT_9X; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4d: + result.insThroughput = PERFSCORE_THROUGHPUT_9X; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // scalar) + switch (ins) + { + case INS_sve_st2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) + case IF_SVE_EI_3A: // ...........mmmmm ......nnnnnddddd -- SVE mixed sign dot product + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_GV_3A: // ...........immmm ....rrnnnnnddddd -- SVE floating-point complex multiply-add (indexed) + case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) + case IF_SVE_HD_3A: // ...........mmmmm ......nnnnnddddd -- SVE floating point matrix multiply accumulate + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_5C; + break; + + case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate + // (predicated) + switch (ins) + { + case INS_sve_fmul: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + default: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + } + break; + + case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + switch (ins) + { + case INS_sve_bfmla: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case INS_sve_bfmls: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing + // multiplicand + case IF_SVE_HU_4A: // ........xx.mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register + case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + + case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register + case IF_SVE_JH_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE store vector register + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_GG_3A: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit + // element size + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_GH_3B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_GH_3B_B: // ........ii.mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_GG_3B: // ........ii.mmmmm ...i..nnnnnddddd -- SVE2 lookup table with 2-bit indices and 16-bit + // element size + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_GH_3A: // ........i..mmmmm ......nnnnnddddd -- SVE2 lookup table with 4-bit indices and 16-bit + // element size + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_HY_3A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HY_3A_A: // .........h.mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit + // scaled offsets) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HY_3B: // ...........mmmmm ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (scalar plus 32-bit scaled + // offsets) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IB_3A: // ...........mmmmm ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus scalar) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HZ_2A_B: // ...........iiiii ...gggnnnnn.oooo -- SVE 32-bit gather prefetch (vector plus immediate) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IA_2A: // ..........iiiiii ...gggnnnnn.oooo -- SVE contiguous prefetch (scalar plus immediate) + switch (ins) + { + case INS_sve_prfb: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfw: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_prfd: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HX_3A_B: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_HX_3A_E: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit gather load (vector plus immediate) + case IF_SVE_IV_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit gather load (vector plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_JI_3A_A: // ...........iiiii ...gggnnnnnttttt -- SVE 32-bit scatter store (vector plus immediate) + case IF_SVE_JL_3A: // ...........iiiii ...gggnnnnnttttt -- SVE 64-bit scatter store (vector plus immediate) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_IC_3A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_A: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_B: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + case IF_SVE_IC_3A_C: // ..........iiiiii ...gggnnnnnttttt -- SVE load and broadcast element + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + + case IF_SVE_BI_2A: // ................ ......nnnnnddddd -- SVE constructive prefix (unpredicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_HH_2A: // ................ ......nnnnnddddd -- SVE2 FP8 upconverts + switch (ins) + { + case INS_sve_f1cvt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_f2cvt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_bf1cvt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_bf2cvt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_f1cvtlt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_f2cvtlt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_bf1cvtlt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_bf2cvtlt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_BJ_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point exponential accelerator + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_CB_2A: // ........xx...... ......nnnnnddddd -- SVE broadcast general register + switch (ins) + { + case INS_sve_mov: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_dup: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_CG_2A: // ........xx...... ......nnnnnddddd -- SVE reverse vector elements + switch (ins) + { + case INS_sve_rev: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_CH_2A: // ........xx...... ......nnnnnddddd -- SVE unpack vector elements + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_HF_2A: // ........xx...... ......nnnnnddddd -- SVE floating-point reciprocal estimate (unpredicated) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_BF_2A: // ........xx.xxiii ......nnnnnddddd -- SVE bitwise shift by immediate (unpredicated) + case IF_SVE_FT_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift and insert + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_FU_2A: // ........xx.xxiii ......nnnnnddddd -- SVE2 bitwise shift right and accumulate + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } +} +#endif // defined(DEBUG) || defined(LATE_DISASM) + #endif // TARGET_ARM64