From 30f0bfaa3e5db90ee1f1a0cd3aec212be1b373b6 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 19 Jan 2024 16:49:19 -0500 Subject: [PATCH 1/6] Implement IF_SVE_ED_1A, IF_SVE_EE_1A --- src/coreclr/jit/codegenarm64test.cpp | 28 +++++++++ src/coreclr/jit/emitarm64.cpp | 88 +++++++++++++++++++++++++++- src/coreclr/jit/emitarm64.h | 6 ++ 3 files changed, 119 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 03edaae736690..85fb587634c6b 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5597,6 +5597,34 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_F(INS_sve_fmov, EA_SCALABLE, REG_V4, -0.125, INS_OPTS_SCALABLE_S); // FMOV ., # theEmitter->emitIns_R_F(INS_sve_fmov, EA_SCALABLE, REG_V5, 31.0, INS_OPTS_SCALABLE_D); // FMOV ., # + // IF_SVE_ED_1A + theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, + INS_OPTS_SCALABLE_B); // SMAX ., ., # + theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V1, 127, + INS_OPTS_SCALABLE_H); // SMAX ., ., # + theEmitter->emitIns_R_I(INS_sve_smin, EA_SCALABLE, REG_V2, -128, + INS_OPTS_SCALABLE_S); // SMIN ., ., # + theEmitter->emitIns_R_I(INS_sve_smin, EA_SCALABLE, REG_V3, 127, + INS_OPTS_SCALABLE_D); // SMIN ., ., # + theEmitter->emitIns_R_I(INS_sve_umax, EA_SCALABLE, REG_V4, 0, + INS_OPTS_SCALABLE_B); // UMAX ., ., # + theEmitter->emitIns_R_I(INS_sve_umax, EA_SCALABLE, REG_V5, 255, + INS_OPTS_SCALABLE_H); // UMAX ., ., # + theEmitter->emitIns_R_I(INS_sve_umin, EA_SCALABLE, REG_V6, 0, + INS_OPTS_SCALABLE_S); // UMIN ., ., # + theEmitter->emitIns_R_I(INS_sve_umin, EA_SCALABLE, REG_V7, 255, + INS_OPTS_SCALABLE_D); // UMIN ., ., # + + // IF_SVE_EE_1A + theEmitter->emitIns_R_I(INS_sve_mul, EA_SCALABLE, REG_V0, -128, + INS_OPTS_SCALABLE_B); // MUL ., ., # + theEmitter->emitIns_R_I(INS_sve_mul, EA_SCALABLE, REG_V1, 0, + INS_OPTS_SCALABLE_H); // MUL ., ., # + theEmitter->emitIns_R_I(INS_sve_mul, EA_SCALABLE, REG_V2, 5, + INS_OPTS_SCALABLE_S); // MUL ., ., # + theEmitter->emitIns_R_I(INS_sve_mul, EA_SCALABLE, REG_V3, 127, + INS_OPTS_SCALABLE_D); // MUL ., ., # + // IF_SVE_IH_3A theEmitter->emitIns_R_R_R_I(INS_sve_ld1d, EA_SCALABLE, REG_V5, REG_P3, REG_R4, 0, INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [{, #, MUL VL}] diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5c9fb990bb859..07f7a409e2fdd 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1400,6 +1400,20 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm8(emitGetInsSC(id))); // iiiiiiii break; + case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm8(emitGetInsSC(id)) || isValidUimm8(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm8(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -6007,6 +6021,7 @@ void emitter::emitIns_R_I(instruction ins, emitAttr elemsize = EA_UNKNOWN; insFormat fmt = IF_NONE; bool canEncode = false; + bool signedImm = false; /* Figure out the encoding format of the instruction */ switch (ins) @@ -6235,6 +6250,39 @@ void emitter::emitIns_R_I(instruction ins, } break; + case INS_sve_smax: + case INS_sve_smin: + signedImm = true; + + FALLTHROUGH; + case INS_sve_umax: + case INS_sve_umin: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg)); // ddddd + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + + if (signedImm) + { + assert(isValidSimm8(imm)); // iiiiiiii + } + else + { + assert(isValidUimm8(imm)); // iiiiiiii + } + + fmt = IF_SVE_ED_1A; + canEncode = true; + break; + + case INS_sve_mul: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg)); // ddddd + assert(isValidSimm8(imm)); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_EE_1A; + canEncode = true; + break; + default: unreached(); break; @@ -16804,13 +16852,17 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) break; case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) + case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) + case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) + { code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd - code |= ((code_t)emitGetInsSC(id) << 5); // iiiiiiii + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code_t imm8 = (code_t)(emitGetInsSC(id) & 0xFF); + code |= (imm8 << 5); // iiiiiiii code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx dst += emitOutput_Instr(dst, code); break; - + } case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -19517,6 +19569,18 @@ void emitter::emitDispInsHelp( emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii break; + // SMAX ., ., # + // SMIN ., ., # + // UMAX ., ., # + // UMIN ., ., # + case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) + // MUL ., ., # + case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImm(emitGetInsSC(id), false); // iiiiiiii + break; + // { .D }, /Z, [{, #, MUL VL}] // Some of these formats may allow changing the element size instead of using 'D' for all instructions. case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus @@ -22234,6 +22298,24 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) + switch (ins) + { + case INS_sve_umin: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + default: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + } + break; + + case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_5C; + break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 04f8322a06fc9..b4b62d1eb50fb 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -600,6 +600,12 @@ static bool isValidUimm8(ssize_t value) return (0 <= value) && (value <= 0xFFLL); }; +// Returns true if 'value' is a legal signed immediate 8 bit encoding (such as for SMAX, SMIN). +static bool isValidSimm8(ssize_t value) +{ + return (-128 <= value) && (value <= 127); +}; + // Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN). static bool isValidUimm12(ssize_t value) { From 57d1e2c476bdd0ae0bd38e09ca954d1d4aef6c7f Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 19 Jan 2024 18:05:15 -0500 Subject: [PATCH 2/6] Implement IF_SVE_EB_1A --- src/coreclr/jit/codegenarm64.cpp | 3 +- src/coreclr/jit/codegenarm64test.cpp | 18 ++++ src/coreclr/jit/emit.h | 10 +++ src/coreclr/jit/emitarm64.cpp | 122 +++++++++++++++++++++++---- src/coreclr/jit/emitarm64.h | 11 +-- src/coreclr/jit/instr.h | 1 + 6 files changed, 144 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 489aa6a744942..9a83060ce1870 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2241,7 +2241,8 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, { if (emitter::emitIns_valid_imm_for_mov(imm, size)) { - GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm, INS_OPTS_NONE DEBUGARG(targetHandle) DEBUGARG(gtFlags)); + GetEmitter()->emitIns_R_I(INS_mov, size, reg, imm, INS_OPTS_NONE, + INS_SCALABLE_OPTS_NONE DEBUGARG(targetHandle) DEBUGARG(gtFlags)); } else { diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 85fb587634c6b..755fd73544a7f 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5597,6 +5597,24 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_F(INS_sve_fmov, EA_SCALABLE, REG_V4, -0.125, INS_OPTS_SCALABLE_S); // FMOV ., # theEmitter->emitIns_R_F(INS_sve_fmov, EA_SCALABLE, REG_V5, 31.0, INS_OPTS_SCALABLE_D); // FMOV ., # + // IF_SVE_EB_1A + theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V0, -128, + INS_OPTS_SCALABLE_B); // DUP ., #{, } + theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V1, 0, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_SHIFT); // DUP ., #{, } + theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V2, 5, + INS_OPTS_SCALABLE_S); // DUP ., #{, } + theEmitter->emitIns_R_I(INS_sve_dup, EA_SCALABLE, REG_V3, 127, + INS_OPTS_SCALABLE_D); // DUP ., #{, } + theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V4, 0, + INS_OPTS_SCALABLE_B); // MOV ., #{, } + theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V5, -128, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_SHIFT); // MOV ., #{, } + theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V6, 5, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_SHIFT); // MOV ., #{, } + theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V7, 127, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_SHIFT); // MOV ., #{, } + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 51ed6b72a0c52..cac65e0eafcca 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1433,6 +1433,16 @@ class emitter assert(!idIsSmallDsc()); idAddr()->_idRegBit = val ? 1 : 0; } + bool idOptionalShift() const + { + assert(!idIsSmallDsc()); + return (idAddr()->_idRegBit == 1); + } + void idOptionalShift(bool val) + { + assert(!idIsSmallDsc()); + idAddr()->_idRegBit = val ? 1 : 0; + } #endif // TARGET_ARM64 #endif // TARGET_ARMARCH diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 07f7a409e2fdd..db8eb24402589 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1396,8 +1396,17 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) assert(insOptsScalableAtLeastHalf(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd - assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx assert(isValidUimm8(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + + case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) + assert(insOptsScalableStandard(id->idInsOpt())); + // Size specifier must be able to fit left-shifted immediate + assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift()); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm8(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx break; case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) @@ -6010,18 +6019,21 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts o * Add an instruction referencing a register and a constant. */ -void emitter::emitIns_R_I(instruction ins, - emitAttr attr, - regNumber reg, - ssize_t imm, - insOpts opt /* = INS_OPTS_NONE */ +void emitter::emitIns_R_I(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */ DEBUGARG(size_t targetHandle /* = 0 */) DEBUGARG(GenTreeFlags gtFlags /* = GTF_EMPTY */)) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; - bool canEncode = false; - bool signedImm = false; + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + bool canEncode = false; + bool signedImm = false; + bool optionalShift = false; + bool hasShift = true; /* Figure out the encoding format of the instruction */ switch (ins) @@ -6283,6 +6295,25 @@ void emitter::emitIns_R_I(instruction ins, canEncode = true; break; + case INS_sve_mov: + case INS_sve_dup: + optionalShift = true; + hasShift = (sopt == INS_SCALABLE_OPTS_SHIFT); + + assert(insOptsScalableStandard(opt)); + // Size specifier must be able to fit left-shifted immediate + assert(!hasShift || insOptsScalableAtLeastHalf(opt)); + assert(insScalableOptsNone(sopt) || hasShift); // h + assert(isVectorRegister(reg)); // ddddd + assert(isValidSimm8(imm)); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_EB_1A; + canEncode = true; + + // MOV is an alias for DUP, and is always the preferred disassembly. + ins = INS_sve_mov; + break; + default: unreached(); break; @@ -6292,7 +6323,8 @@ void emitter::emitIns_R_I(instruction ins, assert(canEncode); assert(fmt != IF_NONE); - instrDesc* id = emitNewInstrSC(attr, imm); + // Instructions with optional shifts need larger instrDesc to store state + instrDesc* id = optionalShift ? emitNewInstrCns(attr, imm) : emitNewInstrSC(attr, imm); id->idIns(ins); id->idInsFmt(fmt); @@ -6300,6 +6332,12 @@ void emitter::emitIns_R_I(instruction ins, id->idReg1(reg); + // For instructions with optional shifts (INS_sve_mov, INS_sve_dup, etc.) + if (optionalShift) + { + id->idOptionalShift(hasShift); + } + #ifdef DEBUG id->idDebugOnlyInfo()->idMemCookie = targetHandle; id->idDebugOnlyInfo()->idFlags = gtFlags; @@ -16856,13 +16894,26 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) { code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd - code_t imm8 = (code_t)(emitGetInsSC(id) & 0xFF); - code |= (imm8 << 5); // iiiiiiii + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code_t imm8 = (code_t)(emitGetInsSC(id) & 0xFF); // iiiiiiii + code |= (imm8 << 5); + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + } + + case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) + { + code = emitInsCodeSve(ins, fmt); + code |= (id->idOptionalShift() ? 0x2000 : 0); // h + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code_t imm8 = (code_t)(emitGetInsSC(id) & 0xFF); // iiiiiiii + code |= (imm8 << 5); code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx dst += emitOutput_Instr(dst, code); break; } + case IF_SVE_DU_3A: // ........xx.mmmmm ......nnnnn.DDDD -- SVE pointer conflict compare code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -19569,6 +19620,27 @@ void emitter::emitDispInsHelp( emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii break; + // DUP ., #{, } + // MOV ., #{, } + case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) + { + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + ssize_t imm = emitGetInsSC(id); // iiiiiiii + if (id->idOptionalShift()) // h + { + // Special case for left-shifted zero + if (imm == 0) + { + printf("#0, LSL #8"); + break; + } + + imm <<= 8; + } + emitDispImm(imm, false); + break; + } + // SMAX ., ., # // SMIN ., ., # // UMAX ., ., # @@ -22304,6 +22376,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_sve_umin: result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; default: result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; @@ -22316,6 +22389,25 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_5C; break; + case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) + switch (ins) + { + // TODO-SVE: Why are these different? MOV is an alias for DUP + case INS_sve_mov: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case INS_sve_dup: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index b4b62d1eb50fb..d1c43a16789e7 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -1094,11 +1094,12 @@ void emitIns_I(instruction ins, emitAttr attr, ssize_t imm); void emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts opt = INS_OPTS_NONE); -void emitIns_R_I(instruction ins, - emitAttr attr, - regNumber reg, - ssize_t imm, - insOpts opt = INS_OPTS_NONE DEBUGARG(size_t targetHandle = 0) +void emitIns_R_I(instruction ins, + emitAttr attr, + regNumber reg, + ssize_t imm, + insOpts opt = INS_OPTS_NONE, + insScalableOpts sopt = INS_SCALABLE_OPTS_NONE DEBUGARG(size_t targetHandle = 0) DEBUGARG(GenTreeFlags gtFlags = GTF_EMPTY)); void emitIns_R_F(instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt = INS_OPTS_NONE); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 5fd9dd456d65c..affe9211b27fc 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -316,6 +316,7 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_WITH_PREDICATE_PAIR, // Variants with {., .} predicate pair (eg whilege) INS_SCALABLE_OPTS_VL_2X, // Variants with a vector length specifier of 2x (eg whilege) INS_SCALABLE_OPTS_VL_4X, // Variants with a vector length specifier of 4x (eg whilege) + INS_SCALABLE_OPTS_SHIFT, // Variants with an optional shift operation (eg dup) // Removable once REG_V0 and REG_P0 are distinct INS_SCALABLE_OPTS_UNPREDICATED, // Variants without a predicate (eg add) From ef49578a824aa91fab73846ad7604a73a7c2442d Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 19 Jan 2024 18:52:39 -0500 Subject: [PATCH 3/6] Implement IF_SVE_EC_1A --- src/coreclr/jit/codegenarm64test.cpp | 16 ++++++ src/coreclr/jit/emitarm64.cpp | 83 ++++++++++++++++++---------- src/coreclr/jit/emitarm64.h | 2 +- 3 files changed, 71 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 755fd73544a7f..e1dd95816a666 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5615,6 +5615,22 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_I(INS_sve_mov, EA_SCALABLE, REG_V7, 127, INS_OPTS_SCALABLE_D, INS_SCALABLE_OPTS_SHIFT); // MOV ., #{, } + // IF_SVE_EC_1A + theEmitter->emitIns_R_I(INS_sve_add, EA_SCALABLE, REG_V0, 0, + INS_OPTS_SCALABLE_B); // ADD ., ., #{, } + theEmitter->emitIns_R_I(INS_sve_sqadd, EA_SCALABLE, REG_V1, 0, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_SHIFT); // SQADD ., ., #{, } + theEmitter->emitIns_R_I(INS_sve_sqsub, EA_SCALABLE, REG_V2, 1, + INS_OPTS_SCALABLE_S); // SQSUB ., ., #{, } + theEmitter->emitIns_R_I(INS_sve_sub, EA_SCALABLE, REG_V3, 128, + INS_OPTS_SCALABLE_D); // SUB ., ., #{, } + theEmitter->emitIns_R_I(INS_sve_subr, EA_SCALABLE, REG_V4, 255, + INS_OPTS_SCALABLE_B); // SUBR ., ., #{, } + theEmitter->emitIns_R_I(INS_sve_uqadd, EA_SCALABLE, REG_V5, 5, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_SHIFT); // UQADD ., ., #{, } + theEmitter->emitIns_R_I(INS_sve_uqsub, EA_SCALABLE, REG_V6, 255, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_SHIFT); // UQSUB ., ., #{, } + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index db8eb24402589..363916c8a09cc 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1409,6 +1409,15 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx break; + case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) + assert(insOptsScalableStandard(id->idInsOpt())); + // Size specifier must be able to fit left-shifted immediate + assert(insOptsScalableAtLeastHalf(id->idInsOpt()) || !id->idOptionalShift()); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidUimm8(emitGetInsSC(id))); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd @@ -6314,6 +6323,27 @@ void emitter::emitIns_R_I(instruction ins, ins = INS_sve_mov; break; + case INS_sve_add: + case INS_sve_sub: + case INS_sve_sqadd: + case INS_sve_sqsub: + case INS_sve_uqadd: + case INS_sve_uqsub: + case INS_sve_subr: + optionalShift = true; + hasShift = (sopt == INS_SCALABLE_OPTS_SHIFT); + + assert(insOptsScalableStandard(opt)); + // Size specifier must be able to fit left-shifted immediate + assert(!hasShift || insOptsScalableAtLeastHalf(opt)); + assert(insScalableOptsNone(sopt) || hasShift); // h + assert(isVectorRegister(reg)); // ddddd + assert(isValidUimm8(imm)); // iiiiiiii + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_EC_1A; + canEncode = true; + break; + default: unreached(); break; @@ -6332,7 +6362,7 @@ void emitter::emitIns_R_I(instruction ins, id->idReg1(reg); - // For instructions with optional shifts (INS_sve_mov, INS_sve_dup, etc.) + // For instructions with optional shifts (MOV, DUP, etc.) if (optionalShift) { id->idOptionalShift(hasShift); @@ -16903,6 +16933,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) } case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) + case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) { code = emitInsCodeSve(ins, fmt); code |= (id->idOptionalShift() ? 0x2000 : 0); // h @@ -17350,18 +17381,18 @@ void emitter::emitDispFloatImm(ssize_t imm8) /***************************************************************************** * - * Display an immediate that is optionally LSL12. + * Display an immediate with an optional left-shift. */ -void emitter::emitDispImmOptsLSL12(ssize_t imm, insOpts opt) +void emitter::emitDispImmOptsLSL(ssize_t imm, bool hasShift, unsigned shiftAmount) { - if (!strictArmAsm && insOptsLSL12(opt)) + if (!strictArmAsm && hasShift) { - imm <<= 12; + imm <<= shiftAmount; } emitDispImm(imm, false); - if (strictArmAsm && insOptsLSL12(opt)) + if (strictArmAsm && hasShift) { - printf(", LSL #12"); + printf(", LSL #%u", shiftAmount); } } @@ -18487,7 +18518,7 @@ void emitter::emitDispInsHelp( case IF_DI_1A: // DI_1A X.......shiiiiii iiiiiinnnnn..... Rn imm(i12,sh) emitDispReg(id->idReg1(), size, true); - emitDispImmOptsLSL12(emitGetInsSC(id), id->idInsOpt()); + emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12); emitDispCommentForHandle(0, id->idDebugOnlyInfo()->idMemCookie, id->idDebugOnlyInfo()->idFlags); break; @@ -18544,7 +18575,7 @@ void emitter::emitDispInsHelp( } else { - emitDispImmOptsLSL12(emitGetInsSC(id), id->idInsOpt()); + emitDispImmOptsLSL(emitGetInsSC(id), insOptsLSL12(id->idInsOpt()), 12); } break; @@ -19620,26 +19651,23 @@ void emitter::emitDispInsHelp( emitDispFloatImm(emitGetInsSC(id)); // iiiiiiii break; + // ADD ., ., #{, } + // SQADD ., ., #{, } + // UQADD ., ., #{, } + // SUB ., ., #{, } + // SUBR ., ., #{, } + // SQSUB ., ., #{, } + // UQSUB ., ., #{, } + case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + + FALLTHROUGH; // DUP ., #{, } // MOV ., #{, } case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) - { - emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd - ssize_t imm = emitGetInsSC(id); // iiiiiiii - if (id->idOptionalShift()) // h - { - // Special case for left-shifted zero - if (imm == 0) - { - printf("#0, LSL #8"); - break; - } - - imm <<= 8; - } - emitDispImm(imm, false); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImmOptsLSL(emitGetInsSC(id), id->idOptionalShift(), 8); // iiiiiiii, h break; - } // SMAX ., ., # // SMIN ., ., # @@ -22361,11 +22389,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) + case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index d1c43a16789e7..4112aff6ac892 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -41,7 +41,7 @@ void emitDispInst(instruction ins); void emitDispImm(ssize_t imm, bool addComma, bool alwaysHex = false, bool isAddrOffset = false); void emitDispFloatZero(); void emitDispFloatImm(ssize_t imm8); -void emitDispImmOptsLSL12(ssize_t imm, insOpts opt); +void emitDispImmOptsLSL(ssize_t imm, bool hasShift, unsigned shiftAmount); void emitDispCond(insCond cond); void emitDispFlags(insCflags flags); void emitDispBarrier(insBarrier barrier); From e17493e801cece6d1ad57745c680fbdfa9ae04f4 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 22 Jan 2024 12:05:01 -0500 Subject: [PATCH 4/6] Fix PerfScore --- src/coreclr/jit/emitarm64.cpp | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 363916c8a09cc..109b5ffb8be04 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -22390,6 +22390,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_DZ_1A: // ........xx...... .............DDD -- sve_int_pn_ptrue case IF_SVE_EA_1A: // ........xx...... ...iiiiiiiiddddd -- SVE broadcast floating-point immediate (unpredicated) + case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; @@ -22414,25 +22415,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_5C; break; - case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) - switch (ins) - { - // TODO-SVE: Why are these different? MOV is an alias for DUP - case INS_sve_mov: - result.insThroughput = PERFSCORE_THROUGHPUT_2C; - result.insLatency = PERFSCORE_LATENCY_2C; - break; - case INS_sve_dup: - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - result.insLatency = PERFSCORE_LATENCY_3C; - break; - default: - // all other instructions - perfScoreUnhandledInstruction(id, &result); - break; - } - break; - case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus // immediate) case IF_SVE_IH_3A_A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus From 953f0583ef63a8c36ac35e5d35fa8cddf14c1375 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 22 Jan 2024 13:48:03 -0500 Subject: [PATCH 5/6] Refactor instrDesc init --- src/coreclr/jit/emitarm64.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 109b5ffb8be04..7d578af3ff66a 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -6353,8 +6353,18 @@ void emitter::emitIns_R_I(instruction ins, assert(canEncode); assert(fmt != IF_NONE); - // Instructions with optional shifts need larger instrDesc to store state - instrDesc* id = optionalShift ? emitNewInstrCns(attr, imm) : emitNewInstrSC(attr, imm); + instrDesc* id; + + if (!optionalShift) + { + id = emitNewInstrSC(attr, imm); + } + else + { + // Instructions with optional shifts (MOV, DUP, etc.) need larger instrDesc to store state + id = emitNewInstrCns(attr, imm); + id->idOptionalShift(hasShift); + } id->idIns(ins); id->idInsFmt(fmt); @@ -6362,12 +6372,6 @@ void emitter::emitIns_R_I(instruction ins, id->idReg1(reg); - // For instructions with optional shifts (MOV, DUP, etc.) - if (optionalShift) - { - id->idOptionalShift(hasShift); - } - #ifdef DEBUG id->idDebugOnlyInfo()->idMemCookie = targetHandle; id->idDebugOnlyInfo()->idFlags = gtFlags; From b57d6cc3f02ae23cdae699294a10df98459463f2 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 23 Jan 2024 11:59:45 -0500 Subject: [PATCH 6/6] Create insEncodeImm8 helper --- src/coreclr/jit/emitarm64.cpp | 25 ++++++++++++++++++------- src/coreclr/jit/emitarm64.h | 3 +++ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 7d578af3ff66a..c4ed042b5b666 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -14658,6 +14658,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 14; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. + */ + +/*static*/ emitter::code_t emitter::insEncodeImm8_12_to_5(ssize_t imm) +{ + assert(isValidSimm8(imm) || isValidUimm8(imm)); + return (code_t)((imm & 0xFF) << 5); +} + /***************************************************************************** * * Returns the encoding to select the 4/8-byte width specifier @@ -16927,10 +16938,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_ED_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer min/max immediate (unpredicated) case IF_SVE_EE_1A: // ........xx...... ...iiiiiiiiddddd -- SVE integer multiply immediate (unpredicated) { + imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd - code_t imm8 = (code_t)(emitGetInsSC(id) & 0xFF); // iiiiiiii - code |= (imm8 << 5); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeImm8_12_to_5(imm); // iiiiiiii code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx dst += emitOutput_Instr(dst, code); break; @@ -16939,11 +16950,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_EB_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE broadcast integer immediate (unpredicated) case IF_SVE_EC_1A: // ........xx...... ..hiiiiiiiiddddd -- SVE integer add/subtract immediate (unpredicated) { + imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); - code |= (id->idOptionalShift() ? 0x2000 : 0); // h - code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd - code_t imm8 = (code_t)(emitGetInsSC(id) & 0xFF); // iiiiiiii - code |= (imm8 << 5); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeImm8_12_to_5(imm); // iiiiiiii + code |= (id->idOptionalShift() ? 0x2000 : 0); // h code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx dst += emitOutput_Instr(dst, code); break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 4112aff6ac892..84266e68e8953 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -526,6 +526,9 @@ static code_t insEncodeSimm5_20_to_16(ssize_t imm); // Returns the encoding for the immediate value as 7-bits at bit locations '20-14'. static code_t insEncodeUimm7_20_to_14(ssize_t imm); +// Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. +static code_t insEncodeImm8_12_to_5(ssize_t imm); + // Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. // This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'. static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm);