diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 4d1a2fd36f43f..d1784e019703f 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5431,9 +5431,12 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) * disassembler thinks the instructions as the same as we do. */ -// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here. +// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" and one or more of the categories to run all the unit tests here. // After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time. -//#define ALL_ARM64_EMITTER_UNIT_TESTS +#define ALL_ARM64_EMITTER_UNIT_TESTS +// #define ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL +// #define ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD +// #define ALL_ARM64_EMITTER_UNIT_TESTS_SVE #if defined(DEBUG) void CodeGen::genArm64EmitterUnitTests() @@ -5450,7 +5453,7 @@ void CodeGen::genArm64EmitterUnitTests() emitter* theEmitter = GetEmitter(); #endif // ALL_ARM64_EMITTER_UNIT_TESTS -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // We use this: // genDefineTempLabel(genCreateTempLabel()); // to create artificial labels to help separate groups of tests. @@ -5555,9 +5558,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_stlxrb, EA_4BYTE, REG_R8, REG_R5, REG_R14); theEmitter->emitIns_R_R_R(INS_stlxrh, EA_4BYTE, REG_R12, REG_R3, REG_R15); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -5770,9 +5773,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_tbx_4regs, EA_8BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_tbx_4regs, EA_16BYTE, REG_V0, REG_V1, REG_V6, INS_OPTS_16B); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -5953,9 +5956,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_ld4r, EA_8BYTE, REG_V4, REG_R8, REG_R9, INS_OPTS_1D); theEmitter->emitIns_R_R_R(INS_ld4r, EA_16BYTE, REG_V10, REG_R14, REG_R15, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -6136,9 +6139,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_ld4r, EA_8BYTE, REG_V30, REG_R2, 32, INS_OPTS_1D); theEmitter->emitIns_R_R_I(INS_ld4r, EA_16BYTE, REG_V3, REG_R7, 32, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -6193,9 +6196,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_st4, EA_4BYTE, REG_V10, REG_R14, 0); theEmitter->emitIns_R_R_I(INS_st4, EA_8BYTE, REG_V15, REG_R19, 1); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -6250,9 +6253,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_st4, EA_4BYTE, REG_V12, REG_R16, REG_R17, 0, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_st4, EA_8BYTE, REG_V18, REG_R22, REG_R23, 1, INS_OPTS_POST_INDEX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Loads to and Stores from one, two, three, or four SIMD&FP registers // @@ -6307,9 +6310,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I_I(INS_st4, EA_4BYTE, REG_V10, REG_R14, 0, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_I_I(INS_st4, EA_8BYTE, REG_V15, REG_R19, 1, 32, INS_OPTS_POST_INDEX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Compares // @@ -6341,9 +6344,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL); theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_R // @@ -6362,9 +6365,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5); theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_I // @@ -6415,9 +6418,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000); theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R // @@ -6452,9 +6455,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_Mov(INS_uxtb, EA_4BYTE, REG_R3, REG_R13, /* canSkip */ false); theEmitter->emitIns_Mov(INS_uxth, EA_4BYTE, REG_R2, REG_R14, /* canSkip */ false); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_I_I // @@ -6473,9 +6476,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL); theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_I // @@ -6583,9 +6586,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL); theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_I cmp/txt // @@ -6647,9 +6650,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH); theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R // @@ -6702,9 +6705,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10); theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // ARMv8.1 LSE Atomics // @@ -6771,9 +6774,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_staddl, EA_4BYTE, REG_R8, REG_R10); theEmitter->emitIns_R_R(INS_stadd, EA_8BYTE, REG_R8, REG_R10); theEmitter->emitIns_R_R(INS_staddl, EA_8BYTE, REG_R8, REG_R10); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_I_I // @@ -6804,9 +6807,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9); theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R_I // @@ -6913,7 +6916,7 @@ void CodeGen::genArm64EmitterUnitTests() #endif // ALL_ARM64_EMITTER_UNIT_TESTS -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R_I -- load/store pair // @@ -6959,9 +6962,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX); theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R_Ext -- load/store shifted/extend // @@ -7086,9 +7089,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // R_R_R_R // @@ -7109,9 +7112,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23); theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_COND // @@ -7147,9 +7150,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_R_COND // @@ -7171,9 +7174,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_R_R_COND // @@ -7196,9 +7199,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT); // gt theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // R_R_FLAGS_COND // @@ -7282,9 +7285,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT); // gt theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE); // le -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Branch to register // @@ -7295,9 +7298,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8); theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL // // Misc // @@ -7323,9 +7326,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST); theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_GENERAL -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD //////////////////////////////////////////////////////////////////////////////// // // SIMD and Floating point @@ -7499,9 +7502,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX); theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R mov and aliases for mov // @@ -7587,9 +7590,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_ext, EA_16BYTE, REG_V8, REG_V9, REG_V10, 11, INS_OPTS_16B); theEmitter->emitIns_R_R_R_I(INS_ext, EA_16BYTE, REG_V12, REG_V13, REG_V14, 15, INS_OPTS_16B); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_I movi and mvni // @@ -7662,9 +7665,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S); // MSL 8 theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16 -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_I orr/bic vector immediate // @@ -7699,9 +7702,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S); // LSL 16 theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24 -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_F cmp/fmov immediate // @@ -7740,9 +7743,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0); theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R cmeq/fmov/fcmp/fcvt // @@ -7784,9 +7787,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H); theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R floating point conversions // @@ -7981,9 +7984,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R floating point operations, one dest, one source // @@ -8106,7 +8109,7 @@ void CodeGen::genArm64EmitterUnitTests() #endif -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD genDefineTempLabel(genCreateTempLabel()); // abs scalar @@ -8391,9 +8394,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_xtn2, EA_16BYTE, REG_V4, REG_V10, INS_OPTS_8H); theEmitter->emitIns_R_R(INS_xtn2, EA_16BYTE, REG_V5, REG_V11, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R floating point round to int, one dest, one source // @@ -8461,9 +8464,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S); theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R floating point operations, one dest, two source // @@ -8589,9 +8592,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_I vector operations, one dest, one source reg, one immed // @@ -9026,9 +9029,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V20, REG_V21, 17, INS_OPTS_4S); theEmitter->emitIns_R_R_I(INS_uqshrn2, EA_16BYTE, REG_V22, REG_V23, 32, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R vector operations, one dest, two source // @@ -9140,9 +9143,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // sdot vector theEmitter->emitIns_R_R_R(INS_sdot, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_sdot, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4S); @@ -9288,9 +9291,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_fcmgt, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_fcmgt, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // trn1 vector theEmitter->emitIns_R_R_R(INS_trn1, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_trn1, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B); @@ -9344,9 +9347,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_zip2, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S); theEmitter->emitIns_R_R_R(INS_zip2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); theEmitter->emitIns_R_R_R(INS_zip2, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // srshl scalar theEmitter->emitIns_R_R_R(INS_srshl, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_NONE); @@ -9794,9 +9797,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B); theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H); theEmitter->emitIns_R_R_R(INS_usubw2, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R vector multiply // @@ -9854,9 +9857,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // pmull vector theEmitter->emitIns_R_R_R(INS_pmull, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B); theEmitter->emitIns_R_R_R(INS_pmull, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_1D); @@ -10056,9 +10059,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_umull2, EA_16BYTE, REG_V6, REG_V7, REG_V8, 7, INS_OPTS_8H); theEmitter->emitIns_R_R_R_I(INS_umull2, EA_16BYTE, REG_V9, REG_V10, REG_V11, 3, INS_OPTS_4S); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R floating point operations, one source/dest, and two source // @@ -10085,9 +10088,9 @@ void CodeGen::genArm64EmitterUnitTests() theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S); theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D); -#endif // ALL_ARM64_EMITTER_UNIT_TESTS +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD -#ifdef ALL_ARM64_EMITTER_UNIT_TESTS +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_ADVSIMD // // R_R_R_R floating point operations, one dest, and three source // @@ -10104,6 +10107,238 @@ void CodeGen::genArm64EmitterUnitTests() #endif +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE + // + // R_R_R SVE operations, one dest, two source + // + + genDefineTempLabel(genCreateTempLabel()); + + // IF_SVE_AA_3A + theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_P1, REG_V2, + INS_OPTS_SCALABLE_B); // AND ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_P4, REG_V5, + INS_OPTS_SCALABLE_H); // BIC ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V14, REG_P5, REG_V16, + INS_OPTS_SCALABLE_S); // EOR ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V29, REG_P7, REG_V31, + INS_OPTS_SCALABLE_D); // ORR ., /M, ., . + + // IF_SVE_AB_3A + theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V5, REG_P6, REG_V7, + INS_OPTS_SCALABLE_B); // ADD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V15, REG_P7, REG_V29, + INS_OPTS_SCALABLE_H); // SUB ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_subr, EA_SCALABLE, REG_V2, REG_P0, REG_V13, + INS_OPTS_SCALABLE_S); // SUBR ., /M, ., . + + // IF_SVE_AC_3A + theEmitter->emitIns_R_R_R(INS_sve_sdiv, EA_SCALABLE, REG_V3, REG_P2, REG_V9, + INS_OPTS_SCALABLE_S); // SDIV ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sdivr, EA_SCALABLE, REG_V31, REG_P3, REG_V29, + INS_OPTS_SCALABLE_D); // SDIVR ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_udiv, EA_SCALABLE, REG_V1, REG_P0, REG_V0, + INS_OPTS_SCALABLE_S); // UDIV ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_udivr, EA_SCALABLE, REG_V13, REG_P7, REG_V15, + INS_OPTS_SCALABLE_D); // UDIVR ., /M, ., . + + // IF_SVE_AD_3A + theEmitter->emitIns_R_R_R(INS_sve_smax, EA_SCALABLE, REG_V24, REG_P0, REG_V2, + INS_OPTS_SCALABLE_B); // SMAX ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_smin, EA_SCALABLE, REG_V9, REG_P1, REG_V27, + INS_OPTS_SCALABLE_H); // SMIN ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_sabd, EA_SCALABLE, REG_V5, REG_P2, REG_V6, + INS_OPTS_SCALABLE_B); // SABD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_uabd, EA_SCALABLE, REG_V23, REG_P3, REG_V9, + INS_OPTS_SCALABLE_S); // UABD ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_umax, EA_SCALABLE, REG_V15, REG_P4, REG_V2, + INS_OPTS_SCALABLE_S); // UMAX ., /M, ., . + theEmitter->emitIns_R_R_R(INS_sve_umin, EA_SCALABLE, REG_V12, REG_P7, REG_V0, + INS_OPTS_SCALABLE_D); // UMIN ., /M, ., . + + // IF_SVE_AE_3A + theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_P1, REG_V3, + INS_OPTS_SCALABLE_D); /* MUL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V17, REG_P5, REG_V5, + INS_OPTS_SCALABLE_S); /* SMULH ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V12, REG_P2, REG_V24, + INS_OPTS_SCALABLE_B); /* UMULH ., /M, ., . */ + + // IF_SVE_AN_3A + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V5, REG_P0, REG_V21, + INS_OPTS_SCALABLE_S); /* ASR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_asrr, EA_SCALABLE, REG_V1, REG_P7, REG_V20, + INS_OPTS_SCALABLE_B); /* ASRR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_P2, REG_V0, + INS_OPTS_SCALABLE_H); /* LSL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lslr, EA_SCALABLE, REG_V27, REG_P6, REG_V31, + INS_OPTS_SCALABLE_D); /* LSLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V5, REG_P5, REG_V6, + INS_OPTS_SCALABLE_B); /* LSR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_lsrr, EA_SCALABLE, REG_V15, REG_P4, REG_V17, + INS_OPTS_SCALABLE_S); /* LSRR ., /M, ., . */ + + // IF_SVE_AO_3A + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V4, REG_P3, REG_V24, + INS_OPTS_SCALABLE_WIDE_B); /* ASR ., /M, ., .D */ + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_P7, REG_V3, + INS_OPTS_SCALABLE_WIDE_H); /* LSL ., /M, ., .D */ + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_P0, REG_V0, + INS_OPTS_SCALABLE_WIDE_S); /* LSR ., /M, ., .D */ + + // IF_SVE_CM_3A + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_SCALABLE, REG_V31, REG_P7, REG_V31, + INS_OPTS_SCALABLE_B); /* CLASTA ., , ., . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_SCALABLE, REG_V30, REG_P6, REG_V30, + INS_OPTS_SCALABLE_D); /* CLASTB ., , ., . */ + + // IF_SVE_CN_3A + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_2BYTE, REG_V12, REG_P1, REG_V15, + INS_OPTS_SCALABLE_H_TO_SIMD); /* CLASTA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_V13, REG_P2, REG_V16, + INS_OPTS_SCALABLE_S_TO_SIMD); /* CLASTB , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_V14, REG_P0, REG_V17, + INS_OPTS_SCALABLE_D_TO_SIMD); /* CLASTB , , , . */ + + // IF_SVE_CO_3A + // Note: EA_4BYTE used for B and H (destination register is W) + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R0, REG_P0, REG_V0, + INS_OPTS_SCALABLE_B_TO_SCALAR); /* CLASTA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clasta, EA_4BYTE, REG_R1, REG_P2, REG_V3, + INS_OPTS_SCALABLE_H_TO_SCALAR); /* CLASTA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_4BYTE, REG_R23, REG_P5, REG_V12, + INS_OPTS_SCALABLE_S_TO_SCALAR); /* CLASTB , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_clastb, EA_8BYTE, REG_R3, REG_P6, REG_V9, + INS_OPTS_SCALABLE_D_TO_SCALAR); /* CLASTB , , , . */ + + // IF_SVE_EP_3A + theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, + INS_OPTS_SCALABLE_B); /* SHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_shsub, EA_SCALABLE, REG_V16, REG_P1, REG_V11, + INS_OPTS_SCALABLE_H); /* SHSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_shsubr, EA_SCALABLE, REG_V17, REG_P2, REG_V12, + INS_OPTS_SCALABLE_S); /* SHSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srhadd, EA_SCALABLE, REG_V18, REG_P3, REG_V13, + INS_OPTS_SCALABLE_D); /* SRHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhadd, EA_SCALABLE, REG_V19, REG_P4, REG_V14, + INS_OPTS_SCALABLE_B); /* UHADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhsub, EA_SCALABLE, REG_V20, REG_P5, REG_V15, + INS_OPTS_SCALABLE_H); /* UHSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uhsubr, EA_SCALABLE, REG_V21, REG_P6, REG_V16, + INS_OPTS_SCALABLE_S); /* UHSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urhadd, EA_SCALABLE, REG_V22, REG_P7, REG_V17, + INS_OPTS_SCALABLE_D); /* URHADD ., /M, ., . */ + + // IF_SVE_ER_3A + theEmitter->emitIns_R_R_R(INS_sve_addp, EA_SCALABLE, REG_V23, REG_P6, REG_V18, + INS_OPTS_SCALABLE_B); /* ADDP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_smaxp, EA_SCALABLE, REG_V24, REG_P5, REG_V19, + INS_OPTS_SCALABLE_H); /* SMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sminp, EA_SCALABLE, REG_V25, REG_P4, REG_V20, + INS_OPTS_SCALABLE_S); /* SMINP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_umaxp, EA_SCALABLE, REG_V26, REG_P3, REG_V21, + INS_OPTS_SCALABLE_D); /* UMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uminp, EA_SCALABLE, REG_V27, REG_P2, REG_V22, + INS_OPTS_SCALABLE_B); /* UMINP ., /M, ., . */ + + // IF_SVE_ET_3A + theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V28, REG_P1, REG_V23, + INS_OPTS_SCALABLE_B); /* SQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V29, REG_P0, REG_V24, + INS_OPTS_SCALABLE_H); /* SQSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqsubr, EA_SCALABLE, REG_V30, REG_P1, REG_V25, + INS_OPTS_SCALABLE_H); /* SQSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_suqadd, EA_SCALABLE, REG_V31, REG_P2, REG_V26, + INS_OPTS_SCALABLE_B); /* SUQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V0, REG_P3, REG_V27, + INS_OPTS_SCALABLE_S); /* UQADD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V1, REG_P4, REG_V28, + INS_OPTS_SCALABLE_D); /* UQSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqsubr, EA_SCALABLE, REG_V2, REG_P5, REG_V29, + INS_OPTS_SCALABLE_B); /* UQSUBR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_usqadd, EA_SCALABLE, REG_V3, REG_P6, REG_V30, + INS_OPTS_SCALABLE_B); /* USQADD ., /M, ., . */ + + // IF_SVE_EU_3A + theEmitter->emitIns_R_R_R(INS_sve_sqrshl, EA_SCALABLE, REG_V4, REG_P7, REG_V31, + INS_OPTS_SCALABLE_B); /* SQRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqrshlr, EA_SCALABLE, REG_V5, REG_P0, REG_V30, + INS_OPTS_SCALABLE_H); /* SQRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqshl, EA_SCALABLE, REG_V6, REG_P1, REG_V29, + INS_OPTS_SCALABLE_S); /* SQSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_sqshlr, EA_SCALABLE, REG_V7, REG_P2, REG_V28, + INS_OPTS_SCALABLE_D); /* SQSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srshl, EA_SCALABLE, REG_V8, REG_P3, REG_V27, + INS_OPTS_SCALABLE_B); /* SRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_srshlr, EA_SCALABLE, REG_V9, REG_P4, REG_V26, + INS_OPTS_SCALABLE_H); /* SRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqrshl, EA_SCALABLE, REG_V10, REG_P5, REG_V25, + INS_OPTS_SCALABLE_S); /* UQRSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqrshlr, EA_SCALABLE, REG_V11, REG_P6, REG_V24, + INS_OPTS_SCALABLE_D); /* UQRSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqshl, EA_SCALABLE, REG_V12, REG_P7, REG_V23, + INS_OPTS_SCALABLE_B); /* UQSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_uqshlr, EA_SCALABLE, REG_V13, REG_P0, REG_V22, + INS_OPTS_SCALABLE_H); /* UQSHLR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urshl, EA_SCALABLE, REG_V14, REG_P1, REG_V21, + INS_OPTS_SCALABLE_S); /* URSHL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20, + INS_OPTS_SCALABLE_D); /* URSHLR ., /M, ., . */ + + // IF_SVE_GR_3A + theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19, + INS_OPTS_SCALABLE_H); /* FADDP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxnmp, EA_SCALABLE, REG_V17, REG_P4, REG_V18, + INS_OPTS_SCALABLE_S); /* FMAXNMP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxp, EA_SCALABLE, REG_V18, REG_P5, REG_V17, + INS_OPTS_SCALABLE_D); /* FMAXP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminnmp, EA_SCALABLE, REG_V19, REG_P6, REG_V16, + INS_OPTS_SCALABLE_S); /* FMINNMP ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminp, EA_SCALABLE, REG_V20, REG_P7, REG_V15, + INS_OPTS_SCALABLE_H); /* FMINP ., /M, ., . */ + + // IF_SVE_HJ_3A + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_2BYTE, REG_V21, REG_P6, REG_V14, + INS_OPTS_SCALABLE_H_TO_SIMD); /* FADDA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_4BYTE, REG_V22, REG_P5, REG_V13, + INS_OPTS_SCALABLE_S_TO_SIMD); /* FADDA , , , . */ + theEmitter->emitIns_R_R_R(INS_sve_fadda, EA_8BYTE, REG_V23, REG_P4, REG_V12, + INS_OPTS_SCALABLE_D_TO_SIMD); /* FADDA , , , . */ + // IF_SVE_HL_3A + theEmitter->emitIns_R_R_R(INS_sve_fabd, EA_SCALABLE, REG_V24, REG_P3, REG_V11, + INS_OPTS_SCALABLE_H); /* FABD ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V25, REG_P2, REG_V10, + INS_OPTS_SCALABLE_S); /* FADD ., /M, ., . */ + // These are not yet supported by capstone. + // theEmitter->emitIns_R_R_R(INS_sve_famax, EA_SCALABLE, REG_V26, REG_P1, REG_V9, INS_OPTS_SCALABLE_D); + /* FAMAX ., /M, ., . */ + // theEmitter->emitIns_R_R_R(INS_sve_famin, EA_SCALABLE, REG_V27, REG_P0, REG_V8, INS_OPTS_SCALABLE_H); + /* FAMIN ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fdiv, EA_SCALABLE, REG_V28, REG_P0, REG_V7, + INS_OPTS_SCALABLE_S); /* FDIV ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fdivr, EA_SCALABLE, REG_V29, REG_P1, REG_V6, + INS_OPTS_SCALABLE_D); /* FDIVR ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmax, EA_SCALABLE, REG_V30, REG_P2, REG_V5, + INS_OPTS_SCALABLE_H); /* FMAX ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmaxnm, EA_SCALABLE, REG_V31, REG_P3, REG_V4, + INS_OPTS_SCALABLE_S); /* FMAXNM ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmin, EA_SCALABLE, REG_V0, REG_P4, REG_V3, + INS_OPTS_SCALABLE_D); /* FMIN ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fminnm, EA_SCALABLE, REG_V1, REG_P5, REG_V2, + INS_OPTS_SCALABLE_H); /* FMINNM ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V2, REG_P6, REG_V1, + INS_OPTS_SCALABLE_S); /* FMUL ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fmulx, EA_SCALABLE, REG_V3, REG_P7, REG_V0, + INS_OPTS_SCALABLE_D); /* FMULX ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fscale, EA_SCALABLE, REG_V4, REG_P6, REG_V31, + INS_OPTS_SCALABLE_H); /* FSCALE ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V5, REG_P5, REG_V30, + INS_OPTS_SCALABLE_S); /* FSUB ., /M, ., . */ + theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29, + INS_OPTS_SCALABLE_D); /* FSUBR ., /M, ., . */ + +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE + #ifdef ALL_ARM64_EMITTER_UNIT_TESTS BasicBlock* label = genCreateTempLabel(); diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index b18451d9d1fe8..49875c459927e 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -2730,6 +2730,8 @@ const emitAttr emitter::emitSizeDecode[emitter::OPSZ_COUNT] = { EA_1BYTE, EA_2BYTE, EA_4BYTE, EA_8BYTE, EA_16BYTE, #if defined(TARGET_XARCH) EA_32BYTE, EA_64BYTE, +#elif defined(TARGET_ARM64) + EA_SCALABLE, #endif // TARGET_XARCH }; diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 5471cc0bc0467..26c83cd513f3b 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -505,6 +505,9 @@ class emitter OPSZ32 = 5, OPSZ64 = 6, OPSZ_COUNT = 7, +#elif defined(TARGET_ARM64) + OPSZ_SCALABLE = 5, + OPSZ_COUNT = 6, #else OPSZ_COUNT = 5, #endif @@ -512,7 +515,7 @@ class emitter #ifdef TARGET_AMD64 OPSZP = OPSZ8, #else - OPSZP = OPSZ4, + OPSZP = OPSZ4, #endif }; @@ -1708,7 +1711,9 @@ class emitter #define PERFSCORE_THROUGHPUT_8C 8.0f // slower - 8 cycles #define PERFSCORE_THROUGHPUT_9C 9.0f // slower - 9 cycles #define PERFSCORE_THROUGHPUT_10C 10.0f // slower - 10 cycles +#define PERFSCORE_THROUGHPUT_11C 10.0f // slower - 10 cycles #define PERFSCORE_THROUGHPUT_13C 13.0f // slower - 13 cycles +#define PERFSCORE_THROUGHPUT_14C 13.0f // slower - 13 cycles #define PERFSCORE_THROUGHPUT_19C 19.0f // slower - 19 cycles #define PERFSCORE_THROUGHPUT_25C 25.0f // slower - 25 cycles #define PERFSCORE_THROUGHPUT_33C 33.0f // slower - 33 cycles diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5705f79c79b4d..c4b8eaa50224c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -943,6 +943,87 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(datasize == EA_8BYTE); break; + // Scalable. + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableSimple(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + // Scalable, .S or .D. + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + // Scalable Wide. + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableWide(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + // Scalable to Simd. + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + elemsize = id->idOpSize(); + assert(insOptsScalableToSimd(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsize(elemsize)); + break; + + // Scalable to FP Simd. + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableToSimdFloat(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidVectorElemsizeSveFloat(elemsize)); + break; + + // Scalable to general register. + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + elemsize = id->idOpSize(); + assert(insOptsScalableToScalar(id->idInsOpt())); // xx + assert(isGeneralRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isValidScalarDatasize(elemsize)); + break; + + // Scalable FP. + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + elemsize = id->idOpSize(); + assert(insOptsScalableFloat(id->idInsOpt())); // xx + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -1477,7 +1558,7 @@ const char* emitter::emitPredicateRegName(regNumber reg) int index = (int)reg - (int)REG_P0; - return vRegNames[index]; + return pRegNames[index]; } /***************************************************************************** @@ -8048,6 +8129,216 @@ void emitter::emitIns_R_R_R( fmt = IF_DV_3A; break; + case INS_sve_and: + case INS_sve_bic: + case INS_sve_eor: + case INS_sve_orr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_AA_3A; + break; + + case INS_sve_add: + case INS_sve_sub: + case INS_sve_subr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_AB_3A; + break; + + case INS_sve_sdiv: + case INS_sve_sdivr: + case INS_sve_udiv: + case INS_sve_udivr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableWords(opt)); + fmt = IF_SVE_AC_3A; + break; + + case INS_sve_sabd: + case INS_sve_smax: + case INS_sve_smin: + case INS_sve_uabd: + case INS_sve_umax: + case INS_sve_umin: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_AD_3A; + break; + + case INS_sve_mul: + case INS_sve_smulh: + case INS_sve_umulh: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_AE_3A; + break; + + case INS_sve_asrr: + case INS_sve_lslr: + case INS_sve_lsrr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_AN_3A; + break; + + case INS_sve_asr: + case INS_sve_lsl: + case INS_sve_lsr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsScalableSimple(opt)) + { + fmt = IF_SVE_AN_3A; + } + else + { + assert(insOptsScalableWide(opt)); + fmt = IF_SVE_AO_3A; + } + break; + + case INS_sve_clasta: + case INS_sve_clastb: + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + if (insOptsScalableSimple(opt)) + { + assert(isVectorRegister(reg1)); + fmt = IF_SVE_CM_3A; + } + else if (insOptsScalableToSimd(opt)) + { + assert(isFloatReg(reg1)); + assert(isValidVectorElemsize(size)); + fmt = IF_SVE_CN_3A; + } + else + { + assert(insOptsScalableToScalar(opt)); + assert(isGeneralRegister(reg1)); + assert(isValidScalarDatasize(size)); + fmt = IF_SVE_CO_3A; + } + break; + + case INS_sve_shadd: + case INS_sve_shsub: + case INS_sve_shsubr: + case INS_sve_srhadd: + case INS_sve_uhadd: + case INS_sve_uhsub: + case INS_sve_uhsubr: + case INS_sve_urhadd: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_EP_3A; + break; + + case INS_sve_addp: + case INS_sve_smaxp: + case INS_sve_sminp: + case INS_sve_umaxp: + case INS_sve_uminp: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_ER_3A; + break; + + case INS_sve_sqadd: + case INS_sve_sqsub: + case INS_sve_sqsubr: + case INS_sve_suqadd: + case INS_sve_uqadd: + case INS_sve_uqsub: + case INS_sve_uqsubr: + case INS_sve_usqadd: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_ET_3A; + break; + + case INS_sve_sqrshl: + case INS_sve_sqrshlr: + case INS_sve_sqshl: + case INS_sve_sqshlr: + case INS_sve_srshl: + case INS_sve_srshlr: + case INS_sve_uqrshl: + case INS_sve_uqrshlr: + case INS_sve_uqshl: + case INS_sve_uqshlr: + case INS_sve_urshl: + case INS_sve_urshlr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableSimple(opt)); + fmt = IF_SVE_EU_3A; + break; + + case INS_sve_faddp: + case INS_sve_fmaxnmp: + case INS_sve_fmaxp: + case INS_sve_fminnmp: + case INS_sve_fminp: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableFloat(opt)); + fmt = IF_SVE_GR_3A; + break; + + case INS_sve_fadda: + assert(isFloatReg(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableToSimdFloat(opt)); + assert(isValidVectorElemsizeSveFloat(size)); + fmt = IF_SVE_HJ_3A; + break; + + case INS_sve_fabd: + case INS_sve_fadd: + case INS_sve_famax: + case INS_sve_famin: + case INS_sve_fdiv: + case INS_sve_fdivr: + case INS_sve_fmax: + case INS_sve_fmaxnm: + case INS_sve_fmin: + case INS_sve_fminnm: + case INS_sve_fmul: + case INS_sve_fmulx: + case INS_sve_fscale: + case INS_sve_fsub: + case INS_sve_fsubr: + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(insOptsScalableFloat(opt)); + fmt = IF_SVE_HL_3A; + break; + default: unreached(); break; @@ -10665,7 +10956,7 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeReg_P_12_to_10(regNumber reg) { - assert(isPredicateRegister(reg)); + assert(isLowPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); return ureg << 10; @@ -10795,7 +11086,7 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeReg_P_7_to_5(regNumber reg) { - assert(isPredicateRegister(reg)); + assert(isLowPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); return ureg << 5; @@ -10808,7 +11099,7 @@ void emitter::emitIns_Call(EmitCallType callType, /*static*/ emitter::code_t emitter::insEncodeReg_P_3_to_1(regNumber reg) { - assert(isPredicateRegister(reg)); + assert(isLowPredicateRegister(reg)); emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_P0; assert((ureg >= 0) && (ureg <= 15)); return ureg << 1; @@ -11659,6 +11950,44 @@ void emitter::emitIns_Call(EmitCallType callType, } } +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize(insOpts opt) +{ + switch (opt) + { + case INS_OPTS_SCALABLE_B: + case INS_OPTS_SCALABLE_WIDE_B: + case INS_OPTS_SCALABLE_B_TO_SIMD: + case INS_OPTS_SCALABLE_B_TO_SCALAR: + return 0x00000000; + + case INS_OPTS_SCALABLE_H: + case INS_OPTS_SCALABLE_WIDE_H: + case INS_OPTS_SCALABLE_H_TO_SIMD: + case INS_OPTS_SCALABLE_H_TO_SCALAR: + return 0x00400000; // set the bit at location 22 + + case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_WIDE_S: + case INS_OPTS_SCALABLE_S_TO_SIMD: + case INS_OPTS_SCALABLE_S_TO_SCALAR: + return 0x00800000; // set the bit at location 23 + + case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_D_TO_SIMD: + case INS_OPTS_SCALABLE_D_TO_SCALAR: + return 0x00C00000; // set the bit at location 23 and 22 + + default: + assert(!"Invalid insOpt for vector register"); + } + return 0; +} + BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id) { instruction ins = id->idIns(); @@ -13606,6 +13935,42 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + // Scalable. + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(id->idInsOpt()); // xx + dst += emitOutput_Instr(dst, code); + break; + + // Scalable to general register. + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeSveElemsize(id->idInsOpt()); // xx + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -14161,16 +14526,34 @@ void emitter::emitDispVectorElemList( //------------------------------------------------------------------------ // emitDispPredicateReg: Display a predicate register name with with an arrangement suffix // -void emitter::emitDispPredicateReg(regNumber reg, insOpts opt, bool addComma) +void emitter::emitDispPredicateReg(regNumber reg, PredicateType ptype, bool addComma) { assert(isPredicateRegister(reg)); printf(emitPredicateRegName(reg)); - emitDispArrangement(opt); + + if (ptype == PREDICATE_MERGE) + { + printf("/m"); + } + else if (ptype == PREDICATE_ZERO) + { + printf("/z"); + } if (addComma) emitDispComma(); } +//------------------------------------------------------------------------ +// emitDispLowPredicateReg: Display a low predicate register name with with an arrangement suffix +// +void emitter::emitDispLowPredicateReg(regNumber reg, PredicateType ptype, bool addComma) +{ + assert(isLowPredicateRegister(reg)); + reg = (regNumber)((((unsigned)reg - REG_PREDICATE_FIRST) & 0x7) + REG_PREDICATE_FIRST); + emitDispPredicateReg(reg, ptype, addComma); +} + //------------------------------------------------------------------------ // emitDispArrangement: Display a SIMD vector arrangement suffix // @@ -14187,6 +14570,9 @@ void emitter::emitDispArrangement(insOpts opt) str = "16b"; break; case INS_OPTS_SCALABLE_B: + case INS_OPTS_SCALABLE_WIDE_B: + case INS_OPTS_SCALABLE_B_TO_SIMD: + case INS_OPTS_SCALABLE_B_TO_SCALAR: str = "b"; break; case INS_OPTS_4H: @@ -14196,6 +14582,9 @@ void emitter::emitDispArrangement(insOpts opt) str = "8h"; break; case INS_OPTS_SCALABLE_H: + case INS_OPTS_SCALABLE_WIDE_H: + case INS_OPTS_SCALABLE_H_TO_SIMD: + case INS_OPTS_SCALABLE_H_TO_SCALAR: str = "h"; break; case INS_OPTS_2S: @@ -14205,6 +14594,9 @@ void emitter::emitDispArrangement(insOpts opt) str = "4s"; break; case INS_OPTS_SCALABLE_S: + case INS_OPTS_SCALABLE_WIDE_S: + case INS_OPTS_SCALABLE_S_TO_SIMD: + case INS_OPTS_SCALABLE_S_TO_SCALAR: str = "s"; break; case INS_OPTS_1D: @@ -14214,6 +14606,8 @@ void emitter::emitDispArrangement(insOpts opt) str = "2d"; break; case INS_OPTS_SCALABLE_D: + case INS_OPTS_SCALABLE_D_TO_SIMD: + case INS_OPTS_SCALABLE_D_TO_SCALAR: str = "d"; break; @@ -15778,6 +16172,52 @@ void emitter::emitDispInsHelp( } break; + // Scalable. + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // Scalable. Reg3 has elements of size 8 bytes. + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), PREDICATE_MERGE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), INS_OPTS_SCALABLE_D, false); // mmmmm + break; + + // Scalable. No predicate type. + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispLowPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + + // Scalable to general register or SIMD. No predicate type. + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispLowPredicateReg(id->idReg2(), PREDICATE_NONE, true); // ggg + emitDispReg(id->idReg1(), size, true); // ddddd + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmmmm + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -17963,6 +18403,128 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins } break; + // SVE latencies from Arm Neoverse N2 Software Optimization Guide, Issue 5.0, Revision: r0p3 + + // Predicate logical + case IF_SVE_AA_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise logical operations (predicated) + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Arithmetic, basic + case IF_SVE_AB_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer add/subtract vectors (predicated) + case IF_SVE_EP_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer halving add/subtract (predicated) + // Max/min, basic and pairwise + case IF_SVE_AD_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer min/max/difference (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Divides, 32 bit (Note: worse for 64 bit) + case IF_SVE_AC_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer divide vectors (predicated) + result.insLatency = PERFSCORE_LATENCY_12C; // 7 to 12 + result.insThroughput = PERFSCORE_THROUGHPUT_11C; // 1/11 to 1/7 + break; + + // Multiply, B, H, S element size (Note: D element size is slightly slower) + case IF_SVE_AE_3A: // ........xx...... ...gggmmmmmddddd -- SVE integer multiply vectors (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Arithmetic, shift + case IF_SVE_AN_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by vector (predicated) + case IF_SVE_AO_3A: // ........xx...... ...gggmmmmmddddd -- SVE bitwise shift by wide elements (predicated) + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Conditional extract operations, SIMD&FP scalar and vector forms + case IF_SVE_CM_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally broadcast element to vector + case IF_SVE_CN_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to SIMD&FP scalar + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Conditional extract operations, scalar form + case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register + result.insLatency = PERFSCORE_LATENCY_8C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Arithmetic, pairwise add + // Max/min, basic and pairwise + case IF_SVE_ER_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 integer pairwise arithmetic + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Arithmetic, complex + case IF_SVE_ET_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating add/subtract + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + break; + + // Arithmetic, shift complex + case IF_SVE_EU_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 saturating/rounding bitwise shift left + // (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + // Floating point arithmetic + // Floating point min/max pairwise + case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Floating point associative add, F64. (Note: Worse for F32 and F16) + case IF_SVE_HJ_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point serial reduction (predicated) + result.insLatency = PERFSCORE_LATENCY_4C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) + switch (ins) + { + // Floating point absolute value/difference + case INS_sve_fabd: + // Floating point min/max + case INS_sve_fmax: + case INS_sve_fmaxnm: + case INS_sve_fmin: + case INS_sve_fminnm: + // Floating point arithmetic + case INS_sve_fadd: + case INS_sve_fsub: + case INS_sve_fsubr: + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + // Floating point divide, F64 (Note: Worse for F32, F16) + case INS_sve_fdiv: + case INS_sve_fdivr: + result.insLatency = PERFSCORE_LATENCY_15C; // 7 to 15 + result.insThroughput = PERFSCORE_THROUGHPUT_14C; // 1/14 to 1/7 + break; + + // Floating point multiply + case INS_sve_fmul: + case INS_sve_fmulx: + case INS_sve_fscale: + result.insLatency = PERFSCORE_LATENCY_3C; + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 35ea1caab9e3a..94ac7336813a2 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -19,6 +19,13 @@ static bool strictArmAsm; /* Debug-only routines to display instructions */ /************************************************************************/ +enum PredicateType +{ + PREDICATE_NONE = 0, + PREDICATE_MERGE, + PREDICATE_ZERO, +}; + const char* emitSveRegName(regNumber reg); const char* emitVectorRegName(regNumber reg); const char* emitPredicateRegName(regNumber reg); @@ -45,7 +52,8 @@ void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma); void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma); void emitDispVectorRegList(regNumber firstReg, unsigned listSize, insOpts opt, bool addComma); void emitDispVectorElemList(regNumber firstReg, unsigned listSize, emitAttr elemsize, unsigned index, bool addComma); -void emitDispPredicateReg(regNumber reg, insOpts opt, bool addComma); +void emitDispPredicateReg(regNumber reg, PredicateType ptype, bool addComma); +void emitDispLowPredicateReg(regNumber reg, PredicateType ptype, bool addComma); void emitDispArrangement(insOpts opt); void emitDispElemsize(emitAttr elemsize); void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr); @@ -452,6 +460,9 @@ static code_t insEncodeExtendScale(ssize_t imm); // Returns the encoding to have the Rm register be auto scaled by the ld/st size static code_t insEncodeReg3Scale(bool isScaled); +// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction +static code_t insEncodeSveElemsize(insOpts opt); + // Returns true if 'reg' represents an integer register. static bool isIntegerRegister(regNumber reg) { @@ -702,6 +713,16 @@ inline static bool isValidVectorElemsizeFloat(emitAttr size) return (size == EA_8BYTE) || (size == EA_4BYTE); } +inline static bool isValidVectorElemsizeSveFloat(emitAttr size) +{ + return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE); +} + +inline static bool isScalableVectorSize(emitAttr size) +{ + return (size == EA_SCALABLE); +} + inline static bool isGeneralRegister(regNumber reg) { return (reg >= REG_INT_FIRST) && (reg <= REG_LR); @@ -732,6 +753,11 @@ inline static bool isPredicateRegister(regNumber reg) return (reg >= REG_PREDICATE_FIRST && reg <= REG_PREDICATE_LAST); } +inline static bool isLowPredicateRegister(regNumber reg) +{ + return (reg >= REG_PREDICATE_FIRST && reg <= REG_PREDICATE_LOW_LAST); +} + inline static bool insOptsNone(insOpts opt) { return (opt == INS_OPTS_NONE); @@ -830,8 +856,56 @@ inline static bool insOptsConvertIntToFloat(insOpts opt) inline static bool insOptsScalable(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_B || opt == INS_OPTS_SCALABLE_H || opt == INS_OPTS_SCALABLE_S || - opt == INS_OPTS_SCALABLE_D)); + // Opt is any of the scalable types. + return ((insOptsScalableSimple(opt)) || (insOptsScalableWide(opt)) || (insOptsScalableToSimd(opt)) || + (insOptsScalableToScalar(opt))); +} + +inline static bool insOptsScalableSimple(insOpts opt) +{ + // `opt` is any of the standard scalable types. + return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || + (opt == INS_OPTS_SCALABLE_D)); +} + +inline static bool insOptsScalableWords(insOpts opt) +{ + // `opt` is any of the standard word and above scalable types. + return ((opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); +} + +inline static bool insOptsScalableFloat(insOpts opt) +{ + // `opt` is any of the standard scalable types that are valid for FP. + return ((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); +} + +inline static bool insOptsScalableWide(insOpts opt) +{ + // `opt` is any of the scalable types that are valid for widening to size D. + return ((opt == INS_OPTS_SCALABLE_WIDE_B) || (opt == INS_OPTS_SCALABLE_WIDE_H) || + (opt == INS_OPTS_SCALABLE_WIDE_S)); +} + +inline static bool insOptsScalableToSimd(insOpts opt) +{ + // `opt` is any of the scalable types that are valid for conversion to a scalar in a SIMD register. + return ((opt == INS_OPTS_SCALABLE_B_TO_SIMD) || (opt == INS_OPTS_SCALABLE_H_TO_SIMD) || + (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); +} + +inline static bool insOptsScalableToSimdFloat(insOpts opt) +{ + // `opt` is any of the scalable types that are valid for conversion to an FP scalar in a SIMD register. + return ((opt == INS_OPTS_SCALABLE_H_TO_SIMD) || (opt == INS_OPTS_SCALABLE_S_TO_SIMD) || + (opt == INS_OPTS_SCALABLE_D_TO_SIMD)); +} + +inline static bool insOptsScalableToScalar(insOpts opt) +{ + // `opt` is any of the SIMD scalable types that are valid for conversion to scalar. + return ((opt == INS_OPTS_SCALABLE_B_TO_SCALAR) || (opt == INS_OPTS_SCALABLE_H_TO_SCALAR) || + (opt == INS_OPTS_SCALABLE_S_TO_SCALAR) || (opt == INS_OPTS_SCALABLE_D_TO_SCALAR)); } static bool isValidImmCond(ssize_t imm); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index a3a70ab92107b..0a5e3c291489b 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -273,6 +273,20 @@ enum insOpts : unsigned INS_OPTS_SCALABLE_S, INS_OPTS_SCALABLE_D, + INS_OPTS_SCALABLE_WIDE_B, + INS_OPTS_SCALABLE_WIDE_H, + INS_OPTS_SCALABLE_WIDE_S, + + INS_OPTS_SCALABLE_B_TO_SIMD, + INS_OPTS_SCALABLE_H_TO_SIMD, + INS_OPTS_SCALABLE_S_TO_SIMD, + INS_OPTS_SCALABLE_D_TO_SIMD, + + INS_OPTS_SCALABLE_B_TO_SCALAR, + INS_OPTS_SCALABLE_H_TO_SCALAR, + INS_OPTS_SCALABLE_S_TO_SCALAR, + INS_OPTS_SCALABLE_D_TO_SCALAR, + INS_OPTS_MSL, // Vector Immediate (shifting ones variant) INS_OPTS_S_TO_4BYTE, // Single to INT32 diff --git a/src/coreclr/jit/targetarm64.h b/src/coreclr/jit/targetarm64.h index 61b3f3245ee22..961862e5184d7 100644 --- a/src/coreclr/jit/targetarm64.h +++ b/src/coreclr/jit/targetarm64.h @@ -52,6 +52,7 @@ #define LAST_FP_ARGREG REG_V15 #define REG_PREDICATE_FIRST REG_P0 #define REG_PREDICATE_LAST REG_P15 + #define REG_PREDICATE_LOW_LAST REG_P7 // Some instructions can only use the first half of the predicate registers. #define REGNUM_BITS 6 // number of bits in a REG_* #define REGSIZE_BYTES 8 // number of bytes in one general purpose register