diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index ec01f356e194ed..488054dd552a87 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1781,7 +1781,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // We furthermore allocate the "monitor acquired" bool between PSP and // the saved registers because this is part of the EnC header. // Note that OSR methods reuse the monitor bool created by tier 0. - saveRegsPlusPSPSize += compiler->lvaLclSize(compiler->lvaMonAcquired); + saveRegsPlusPSPSize += compiler->lvaLclStackHomeSize(compiler->lvaMonAcquired); } unsigned const saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN); @@ -2966,7 +2966,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode) if (lclNode->IsMultiReg()) { // This is the case of storing to a multi-reg HFA local from a fixed-size SIMD type. - assert(varTypeIsSIMD(data) && varDsc->lvIsHfa() && (varDsc->GetHfaType() == TYP_FLOAT)); + assert(varTypeIsSIMD(data)); regNumber operandReg = genConsumeReg(data); unsigned int regCount = varDsc->lvFieldCnt; for (unsigned i = 0; i < regCount; ++i) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 8184b20068387e..82a4f35db1b042 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -792,7 +792,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) if (treeNode->putInIncomingArgArea()) { varNumOut = getFirstArgWithStackSlot(); - argOffsetMax = compiler->compArgSize; + argOffsetMax = compiler->lvaParameterStackSize; #if FEATURE_FASTTAILCALL // This must be a fast tail call. assert(treeNode->gtCall->IsFastTailCall()); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index dbdd45d0eedc56..fec5249dba483a 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3820,7 +3820,7 @@ void CodeGen::genCheckUseBlockInit() { // Var is on the stack at entry. initStkLclCnt += - roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); + roundUp(compiler->lvaLclStackHomeSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); counted = true; } } @@ -3869,7 +3869,8 @@ void CodeGen::genCheckUseBlockInit() if (!counted) { - initStkLclCnt += roundUp(compiler->lvaLclSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); + initStkLclCnt += + roundUp(compiler->lvaLclStackHomeSize(varNum), TARGET_POINTER_SIZE) / sizeof(int); counted = true; } } @@ -4130,7 +4131,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, (varDsc->lvExactSize() >= TARGET_POINTER_SIZE)) { // We only initialize the GC variables in the TYP_STRUCT - const unsigned slots = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES; + const unsigned slots = (unsigned)compiler->lvaLclStackHomeSize(varNum) / REGSIZE_BYTES; ClassLayout* layout = varDsc->GetLayout(); for (unsigned i = 0; i < slots; i++) @@ -4147,7 +4148,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed); // zero out the whole thing rounded up to a single stack slot size - unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int)); + unsigned lclSize = roundUp(compiler->lvaLclStackHomeSize(varNum), (unsigned)sizeof(int)); unsigned i; for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES) { @@ -4589,21 +4590,6 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed } else { - if (isFramePointerUsed()) - { -#if defined(TARGET_ARM) - // GetStackOffset() is always valid for incoming stack-arguments, even if the argument - // will become enregistered. - // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES - noway_assert((2 * REGSIZE_BYTES <= varDsc->GetStackOffset()) && - (size_t(varDsc->GetStackOffset()) < compiler->compArgSize + 2 * REGSIZE_BYTES)); -#else - // GetStackOffset() is always valid for incoming stack-arguments, even if the argument - // will become enregistered. - noway_assert((0 < varDsc->GetStackOffset()) && (size_t(varDsc->GetStackOffset()) < compiler->compArgSize)); -#endif - } - // We will just use the initReg since it is an available register // and we are probably done using it anyway... reg = initReg; @@ -5212,7 +5198,7 @@ void CodeGen::genFnProlog() } signed int loOffs = varDsc->GetStackOffset(); - signed int hiOffs = varDsc->GetStackOffset() + compiler->lvaLclSize(varNum); + signed int hiOffs = varDsc->GetStackOffset() + compiler->lvaLclStackHomeSize(varNum); /* We need to know the offset range of tracked stack GC refs */ /* We assume that the GC reference can be anywhere in the TYP_STRUCT */ @@ -5669,7 +5655,7 @@ void CodeGen::genFnProlog() { // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) unsigned filterEndOffsetSlotOffs = - compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE; + compiler->lvaLclStackHomeSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE; // Zero out the slot for nesting level 0 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE; @@ -7880,12 +7866,13 @@ void CodeGen::genMultiRegStoreToLocal(GenTreeLclVar* lclNode) offset += genTypeSize(srcType); #ifdef DEBUG + unsigned stackHomeSize = compiler->lvaLclStackHomeSize(lclNum); #ifdef TARGET_64BIT - assert(offset <= varDsc->lvSize()); + assert(offset <= stackHomeSize); #else // !TARGET_64BIT if (varTypeIsStruct(varDsc)) { - assert(offset <= varDsc->lvSize()); + assert(offset <= stackHomeSize); } else { @@ -8200,7 +8187,7 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn) assert(varDsc->lvOnFrame); - unsigned int size = compiler->lvaLclSize(varNum); + unsigned int size = compiler->lvaLclStackHomeSize(varNum); if ((size / TARGET_POINTER_SIZE) > 16) { // This will require more than 16 instructions, switch to rep stosd/memset call. diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index f484bb21ab1eb6..f51023c4305221 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1893,7 +1893,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg // We can't write beyond the arg area unless this is a tail call, in which case we use // the first stack arg as the base of the incoming arg area. #ifdef DEBUG - unsigned areaSize = compiler->lvaLclSize(outArgVarNum); + unsigned areaSize = compiler->lvaLclStackHomeSize(outArgVarNum); #if FEATURE_FASTTAILCALL if (putArgStk->gtCall->IsFastTailCall()) { diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index ceb64e2fc355cb..926a74cc4ace46 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -4841,8 +4841,8 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) assert(varDsc->lvType == TYP_STRUCT); assert(varDsc->lvOnFrame && !varDsc->lvRegister); - srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine - // as that is how much stack is allocated for this LclVar + srcSize = compiler->lvaLclStackHomeSize(varNode->GetLclNum()); + layout = varDsc->GetLayout(); } else // we must have a GT_BLK @@ -4870,8 +4870,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) if (varNode != nullptr) { // If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size. - const LclVarDsc* varDsc = compiler->lvaGetDesc(varNode); - const unsigned varStackSize = varDsc->lvSize(); + const unsigned varStackSize = compiler->lvaLclStackHomeSize(varNode->GetLclNum()); if (varStackSize >= srcSize) { srcSize = varStackSize; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 1e43cc1e84952e..e0ac3e5044366b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -279,7 +279,8 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) unsigned filterEndOffsetSlotOffs; - filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); + filterEndOffsetSlotOffs = + (unsigned)(compiler->lvaLclStackHomeSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); unsigned curNestingSlotOffs; curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE)); @@ -2210,10 +2211,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) // The last slot is reserved for ICodeManager::FixContext(ppEndRegion) unsigned filterEndOffsetSlotOffs; - PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > TARGET_POINTER_SIZE); // below doesn't - // underflow. + assert(compiler->lvaLclStackHomeSize(compiler->lvaShadowSPslotsVar) > TARGET_POINTER_SIZE); filterEndOffsetSlotOffs = - (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); + (unsigned)(compiler->lvaLclStackHomeSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE); size_t curNestingSlotOffs; curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE); @@ -10752,10 +10752,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) if (fCalleePop) { - noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * REGSIZE_BYTES); - stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; - - noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand + stkArgSize = compiler->lvaParameterStackSize; + noway_assert(stkArgSize < 0x10000); // "ret" only has 2 byte operand } #ifdef UNIX_X86_ABI diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3fd967b9552898..e7b60869d6fb07 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -625,7 +625,7 @@ class LclVarDsc #endif // FEATURE_HFA_FIELDS_PRESENT #ifdef DEBUG - // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct + // TODO-Cleanup: this flag is only in use by asserts that are checking for struct // types, and is needed because of cases where TYP_STRUCT is bashed to an integral type. // Consider cleaning this up so this workaround is not required. unsigned char lvUnusedStruct : 1; // All references to this promoted struct are through its field locals. @@ -1118,9 +1118,6 @@ class LclVarDsc } unsigned lvExactSize() const; - unsigned lvSize() const; - - size_t lvArgStackSize() const; unsigned lvSlotNum; // original slot # (if remapped) @@ -4312,7 +4309,7 @@ class Compiler return varNum; } - unsigned lvaLclSize(unsigned varNum); + unsigned lvaLclStackHomeSize(unsigned varNum); unsigned lvaLclExactSize(unsigned varNum); bool lvaHaveManyLocals(float percent = 1.0f) const; @@ -4437,6 +4434,7 @@ class Compiler bool ShouldPromoteStructVar(unsigned lclNum); void PromoteStructVar(unsigned lclNum); void SortStructFields(); + bool IsArmHfaParameter(unsigned lclNum); var_types TryPromoteValueClassAsPrimitive(CORINFO_TYPE_LAYOUT_NODE* treeNodes, size_t maxTreeNodes, size_t index); void AdvanceSubTree(CORINFO_TYPE_LAYOUT_NODE* treeNodes, size_t maxTreeNodes, size_t* index); @@ -4457,27 +4455,26 @@ class Compiler bool lvaIsGCTracked(const LclVarDsc* varDsc); #if defined(FEATURE_SIMD) - bool lvaMapSimd12ToSimd16(const LclVarDsc* varDsc) + bool lvaMapSimd12ToSimd16(unsigned varNum) { - assert(varDsc->lvType == TYP_SIMD12); + LclVarDsc* varDsc = lvaGetDesc(varNum); + assert(varDsc->TypeGet() == TYP_SIMD12); -#if defined(TARGET_64BIT) - assert(compAppleArm64Abi() || varDsc->lvSize() == 16); -#endif // defined(TARGET_64BIT) + unsigned stackHomeSize = lvaLclStackHomeSize(varNum); // We make local variable SIMD12 types 16 bytes instead of just 12. - // lvSize() will return 16 bytes for SIMD12, even for fields. + // lvaLclStackHomeSize() will return 16 bytes for SIMD12, even for fields. // However, we can't do that mapping if the var is a dependently promoted struct field. // Such a field must remain its exact size within its parent struct unless it is a single // field *and* it is the only field in a struct of 16 bytes. - if (varDsc->lvSize() != 16) + if (stackHomeSize != 16) { return false; } if (lvaIsFieldOfDependentlyPromotedStruct(varDsc)) { LclVarDsc* parentVarDsc = lvaGetDesc(varDsc->lvParentLcl); - return (parentVarDsc->lvFieldCnt == 1) && (parentVarDsc->lvSize() == 16); + return (parentVarDsc->lvFieldCnt == 1) && (lvaLclStackHomeSize(varDsc->lvParentLcl) == 16); } return true; } diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index bdf77b1f404fe5..7428137448c4a8 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4085,6 +4085,17 @@ inline bool Compiler::impIsPrimitive(CorInfoType jitType) inline Compiler::lvaPromotionType Compiler::lvaGetPromotionType(const LclVarDsc* varDsc) { + // TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted, + // where the struct itself is no longer used because all access is via its member fields. + // When that happens, the struct is marked as unused and its type has been changed to + // TYP_INT (to keep the GC tracking code from looking at it). + // See Compiler::raAssignVars() for details. For example: + // N002 ( 4, 3) [00EA067C] ------------- return struct $346 + // N001 ( 3, 2) [00EA0628] ------------- lclVar struct(U) V03 loc2 + // float V03.f1 (offs=0x00) -> V12 tmp7 + // f8 (last use) (last use) $345 + // Here, the "struct(U)" shows that the "V03 loc2" variable is unused. Not shown is that V03 + // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree. assert(!varDsc->lvPromoted || varTypeIsPromotable(varDsc) || varDsc->lvUnusedStruct); if (!varDsc->lvPromoted) @@ -4322,19 +4333,20 @@ bool Compiler::fgVarNeedsExplicitZeroInit(unsigned varNum, bool bbInALoop, bool return false; } -// Below conditions guarantee block initialization, which will initialize -// all struct fields. If the logic for block initialization in CodeGen::genCheckUseBlockInit() -// changes, these conditions need to be updated. + // Below conditions guarantee block initialization, which will initialize + // all struct fields. If the logic for block initialization in CodeGen::genCheckUseBlockInit() + // changes, these conditions need to be updated. + unsigned stackHomeSize = lvaLclStackHomeSize(varNum); #ifdef TARGET_64BIT #if defined(TARGET_AMD64) // We can clear using aligned SIMD so the threshold is lower, // and clears in order which is better for auto-prefetching - if (roundUp(varDsc->lvSize(), TARGET_POINTER_SIZE) / sizeof(int) > 4) + if (roundUp(stackHomeSize, TARGET_POINTER_SIZE) / sizeof(int) > 4) #else // !defined(TARGET_AMD64) - if (roundUp(varDsc->lvSize(), TARGET_POINTER_SIZE) / sizeof(int) > 8) + if (roundUp(stackHomeSize, TARGET_POINTER_SIZE) / sizeof(int) > 8) #endif #else - if (roundUp(varDsc->lvSize(), TARGET_POINTER_SIZE) / sizeof(int) > 4) + if (roundUp(stackHomeSize, TARGET_POINTER_SIZE) / sizeof(int) > 4) #endif { return false; diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 12917dbc1b9b34..54a78cc7ae8966 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -1588,10 +1588,9 @@ size_t GCInfo::gcInfoBlockHdrSave( assert(header->revPInvokeOffset != INVALID_REV_PINVOKE_OFFSET); } - assert((compiler->compArgSize & 0x3) == 0); - - size_t argCount = - (compiler->compArgSize - (compiler->codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; + assert(compiler->lvaParameterStackSize == + (compiler->compArgSize - compiler->codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)); + size_t argCount = compiler->lvaParameterStackSize / REGSIZE_BYTES; assert(argCount <= MAX_USHORT_SIZE_T); header->argCount = static_cast(argCount); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 95087e29be2304..36991b961bba1b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -27052,9 +27052,9 @@ GenTree* Compiler::gtNewSimdWithElementNode( // GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount) { - LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar()); - unsigned lclNum = lvaGetLclNum(opVarDsc); - unsigned fieldSize = opVarDsc->lvSize() / fieldCount; + unsigned lclNum = op->AsLclVar()->GetLclNum(); + LclVarDsc* opVarDsc = lvaGetDesc(lclNum); + unsigned fieldSize = opVarDsc->lvExactSize() / fieldCount; var_types fieldType = Compiler::getSIMDTypeForSize(fieldSize); GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); @@ -27083,9 +27083,9 @@ GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fi // GenTreeFieldList* Compiler::gtConvertParamOpToFieldList(GenTree* op, unsigned fieldCount, CORINFO_CLASS_HANDLE clsHnd) { - LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar()); - unsigned lclNum = lvaGetLclNum(opVarDsc); - unsigned fieldSize = opVarDsc->lvSize() / fieldCount; + unsigned lclNum = op->AsLclVar()->GetLclNum(); + LclVarDsc* opVarDsc = lvaGetDesc(lclNum); + unsigned fieldSize = opVarDsc->lvExactSize() / fieldCount; GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); int offset = 0; unsigned sizeBytes = 0; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 17f4f0dec4edfd..42acc18f69a3d5 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -6757,7 +6757,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) VAR_ST_VALID: /* if it is a struct store, make certain we don't overflow the buffer */ - assert(lclTyp != TYP_STRUCT || lvaLclSize(lclNum) >= info.compCompHnd->getClassSize(clsHnd)); + assert(lclTyp != TYP_STRUCT || lvaLclStackHomeSize(lclNum) >= info.compCompHnd->getClassSize(clsHnd)); if (lvaTable[lclNum].lvNormalizeOnLoad()) { diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 977b7404bb7159..1dff57be8016e1 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -444,7 +444,7 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo) instruction can only pop 2^16 arguments. Could be handled correctly but it will be very difficult for fully interruptible code */ - if (compArgSize != (size_t)(unsigned short)compArgSize) + if (lvaParameterStackSize != (size_t)(unsigned short)lvaParameterStackSize) IMPL_LIMITATION("Too many arguments for the \"ret\" instruction to pop"); #endif } @@ -739,7 +739,6 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo, unsigned skipArgs, un switch (origArgType) { case TYP_STRUCT: - assert(varDsc->lvSize() == argSize); cAlign = varDsc->lvStructDoubleAlign ? 2 : 1; // HFA arguments go on the stack frame. They don't get spilled in the prolog like struct @@ -2556,7 +2555,7 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) // promotion of non FP or SIMD type fields is disallowed. // TODO-1stClassStructs: add support in Lowering and prolog generation // to enable promoting these types. - if (varDsc->lvIsParam && (varDsc->lvIsHfa() != varTypeUsesFloatReg(fieldType))) + if (varDsc->lvIsParam && (IsArmHfaParameter(lclNum) != varTypeUsesFloatReg(fieldType))) { canPromote = false; } @@ -2613,6 +2612,28 @@ bool Compiler::StructPromotionHelper::CanPromoteStructVar(unsigned lclNum) return canPromote; } +//-------------------------------------------------------------------------------------------- +// IsArmHfaParameter - Check if a local is an ARM or ARM64 HFA parameter. +// This is a quirk to match old promotion behavior. +// +// Arguments: +// lclNum - The local +// +// Return value: +// True if it is an HFA parameter. +// +bool Compiler::StructPromotionHelper::IsArmHfaParameter(unsigned lclNum) +{ + if (!GlobalJitOptions::compFeatureHfa) + { + return false; + } + + CorInfoHFAElemType hfaType = + compiler->info.compCompHnd->getHFAType(compiler->lvaGetDesc(lclNum)->GetLayout()->GetClassHandle()); + return hfaType != CORINFO_HFA_ELEM_NONE; +} + //-------------------------------------------------------------------------------------------- // ShouldPromoteStructVar - Should a struct var be promoted if it can be promoted? // This routine mainly performs profitability checks. Right now it also has @@ -2667,7 +2688,7 @@ bool Compiler::StructPromotionHelper::ShouldPromoteStructVar(unsigned lclNum) shouldPromote = false; } #endif // TARGET_LOONGARCH64 || TARGET_RISCV64 - else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !varDsc->lvIsHfa()) + else if (varDsc->lvIsParam && !compiler->lvaIsImplicitByRefLocal(lclNum) && !IsArmHfaParameter(lclNum)) { #if FEATURE_MULTIREG_STRUCT_PROMOTE // Is this a variable holding a value with exactly two fields passed in @@ -2913,7 +2934,8 @@ void Compiler::StructPromotionHelper::PromoteStructVar(unsigned lclNum) if (varTypeIsValidHfaType(hfaType)) { fieldVarDsc->SetHfaType(hfaType); - fieldVarDsc->lvIsMultiRegArg = (varDsc->lvIsMultiRegArg != 0) && (fieldVarDsc->lvHfaSlots() > 1); + fieldVarDsc->lvIsMultiRegArg = + (varDsc->lvIsMultiRegArg != 0) && (fieldVarDsc->lvExactSize() > genTypeSize(hfaType)); } } } @@ -3710,7 +3732,7 @@ void Compiler::lvaUpdateClass(unsigned varNum, GenTree* tree, CORINFO_CLASS_HAND } //------------------------------------------------------------------------ -// lvaLclSize: returns size of a local variable, in bytes +// lvaLclStackHomeSize: returns size of stack home of a local variable, in bytes // // Arguments: // varNum -- variable to query @@ -3718,26 +3740,56 @@ void Compiler::lvaUpdateClass(unsigned varNum, GenTree* tree, CORINFO_CLASS_HAND // Returns: // Number of bytes needed on the frame for such a local. // -unsigned Compiler::lvaLclSize(unsigned varNum) +unsigned Compiler::lvaLclStackHomeSize(unsigned varNum) { assert(varNum < lvaCount); - var_types varType = lvaTable[varNum].TypeGet(); + LclVarDsc* varDsc = lvaGetDesc(varNum); + var_types varType = varDsc->TypeGet(); - if (varType == TYP_STRUCT) + if (!varTypeIsStruct(varType)) { - return lvaTable[varNum].lvSize(); +#ifdef TARGET_64BIT + // We only need this Quirk for TARGET_64BIT + if (varDsc->lvQuirkToLong) + { + noway_assert(varDsc->IsAddressExposed()); + return genTypeStSz(TYP_LONG) * sizeof(int); // return 8 (2 * 4) + } +#endif + + return genTypeStSz(varType) * sizeof(int); } -#ifdef TARGET_64BIT - // We only need this Quirk for TARGET_64BIT - if (lvaTable[varNum].lvQuirkToLong) + if (varDsc->lvIsParam && !varDsc->lvIsStructField) { - noway_assert(lvaTable[varNum].IsAddressExposed()); - return genTypeStSz(TYP_LONG) * sizeof(int); // return 8 (2 * 4) + // If this parameter was passed on the stack then we often reuse that + // space for its home. Take into account that this space might actually + // not be pointer-sized for some cases (macos-arm64 ABI currently). + const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(varNum); + if (abiInfo.HasExactlyOneStackSegment()) + { + return abiInfo.Segment(0).GetStackSize(); + } + + // There are other cases where the caller has allocated space for the + // parameter, like windows-x64 with shadow space for register + // parameters, but in those cases this rounding is fine. + return roundUp(varDsc->lvExactSize(), TARGET_POINTER_SIZE); } -#endif - return genTypeStSz(varType) * sizeof(int); + +#if defined(FEATURE_SIMD) && !defined(TARGET_64BIT) + // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do + // this for arguments, which must be passed according the defined ABI. We don't want to do this for + // dependently promoted struct fields, but we don't know that here. See lvaMapSimd12ToSimd16(). + // (Note that for 64-bits, we are already rounding up to 16.) + if (varDsc->TypeGet() == TYP_SIMD12) + { + return 16; + } +#endif // defined(FEATURE_SIMD) && !defined(TARGET_64BIT) + + return roundUp(varDsc->lvExactSize(), TARGET_POINTER_SIZE); } // @@ -4205,93 +4257,6 @@ unsigned LclVarDsc::lvExactSize() const return (lvType == TYP_STRUCT) ? GetLayout()->GetSize() : genTypeSize(lvType); } -//------------------------------------------------------------------------ -// lvSize: Get the size of a struct local on the stack frame. -// -// Return Value: -// Size in bytes. -// -unsigned LclVarDsc::lvSize() const // Size needed for storage representation. Only used for structs. -{ - // TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted, - // where the struct itself is no longer used because all access is via its member fields. - // When that happens, the struct is marked as unused and its type has been changed to - // TYP_INT (to keep the GC tracking code from looking at it). - // See Compiler::raAssignVars() for details. For example: - // N002 ( 4, 3) [00EA067C] ------------- return struct $346 - // N001 ( 3, 2) [00EA0628] ------------- lclVar struct(U) V03 loc2 - // float V03.f1 (offs=0x00) -> V12 tmp7 - // f8 (last use) (last use) $345 - // Here, the "struct(U)" shows that the "V03 loc2" variable is unused. Not shown is that V03 - // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree. - - assert(varTypeIsStruct(lvType) || (lvPromoted && lvUnusedStruct)); - - if (lvIsParam) - { - assert(varTypeIsStruct(lvType)); - const bool isFloatHfa = (lvIsHfa() && (GetHfaType() == TYP_FLOAT)); - const unsigned argSizeAlignment = Compiler::eeGetArgSizeAlignment(lvType, isFloatHfa); - return roundUp(lvExactSize(), argSizeAlignment); - } - -#if defined(FEATURE_SIMD) && !defined(TARGET_64BIT) - // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do - // this for arguments, which must be passed according the defined ABI. We don't want to do this for - // dependently promoted struct fields, but we don't know that here. See lvaMapSimd12ToSimd16(). - // (Note that for 64-bits, we are already rounding up to 16.) - if (lvType == TYP_SIMD12) - { - assert(!lvIsParam); - return 16; - } -#endif // defined(FEATURE_SIMD) && !defined(TARGET_64BIT) - - return roundUp(lvExactSize(), TARGET_POINTER_SIZE); -} - -/********************************************************************************** - * Get stack size of the varDsc. - */ -size_t LclVarDsc::lvArgStackSize() const -{ - // Make sure this will have a stack size - assert(!this->lvIsRegArg); - - size_t stackSize = 0; - if (varTypeIsStruct(this)) - { -#if defined(WINDOWS_AMD64_ABI) - // Structs are either passed by reference or can be passed by value using one pointer - stackSize = TARGET_POINTER_SIZE; -#elif defined(TARGET_ARM64) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // lvSize performs a roundup. - stackSize = this->lvSize(); - -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if ((stackSize > TARGET_POINTER_SIZE * 2) && (!this->lvIsHfa())) - { - // If the size is greater than 16 bytes then it will - // be passed by reference. - stackSize = TARGET_POINTER_SIZE; - } -#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - -#else // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI !TARGET_LOONGARCH64 !TARGET_RISCV64 - - NYI("Unsupported target."); - unreached(); - -#endif // !TARGET_ARM64 !WINDOWS_AMD64_ABI !UNIX_AMD64_ABI - } - else - { - stackSize = TARGET_POINTER_SIZE; - } - - return stackSize; -} - //------------------------------------------------------------------------ // GetRegisterType: Determine register type for this local var. // @@ -5634,7 +5599,7 @@ void Compiler::lvaFixVirtualFrameOffsets() varDsc = lvaGetDesc(lvaPSPSym); assert(varDsc->lvFramePointerBased); // We always access it RBP-relative. assert(!varDsc->lvMustInit); // It is never "must init". - varDsc->SetStackOffset(codeGen->genCallerSPtoInitialSPdelta() + lvaLclSize(lvaOutgoingArgSpaceVar)); + varDsc->SetStackOffset(codeGen->genCallerSPtoInitialSPdelta() + lvaLclStackHomeSize(lvaOutgoingArgSpaceVar)); if (opts.IsOSR()) { @@ -5717,7 +5682,7 @@ void Compiler::lvaFixVirtualFrameOffsets() delta += TARGET_POINTER_SIZE; } - delta += lvaLclSize(lvaMonAcquired); + delta += lvaLclStackHomeSize(lvaMonAcquired); } else if (lvaPSPSym != BAD_VAR_NUM) { @@ -6284,7 +6249,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // This var must go first, in what is called the 'frame header' for EnC so that it is // preserved when remapping occurs. See vm\eetwain.cpp for detailed comment specifying frame // layout requirements for EnC to work. - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclSize(lvaMonAcquired), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclStackHomeSize(lvaMonAcquired), stkOffs); } } @@ -6409,7 +6374,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() stkOffs -= TARGET_POINTER_SIZE; } } - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaShadowSPslotsVar, lvaLclSize(lvaShadowSPslotsVar), stkOffs); + stkOffs = + lvaAllocLocalAndSetVirtualOffset(lvaShadowSPslotsVar, lvaLclStackHomeSize(lvaShadowSPslotsVar), stkOffs); } #endif // FEATURE_EH_WINDOWS_X86 @@ -6419,7 +6385,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() if (!opts.IsOSR() || !info.compPatchpointInfo->HasSecurityCookie()) { - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclStackHomeSize(lvaGSSecurityCookie), + stkOffs); } } @@ -6746,7 +6713,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } // Reserve the stack space for this variable - stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclStackHomeSize(lclNum), stkOffs); #if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // If we have an incoming register argument that has a promoted field then we // need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar @@ -6769,7 +6736,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() if (!opts.IsOSR() || !info.compPatchpointInfo->HasSecurityCookie()) { // LOCALLOC used, but we have no unsafe buffer. Allocated cookie last, close to localloc buffer. - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclStackHomeSize(lvaGSSecurityCookie), + stkOffs); } } @@ -6859,7 +6827,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // Since this will always use an SP relative offset of zero // at the end of lvaFixVirtualFrameOffsets, it will be set to absolute '0' - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaOutgoingArgSpaceVar, lvaLclSize(lvaOutgoingArgSpaceVar), stkOffs); + stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaOutgoingArgSpaceVar, lvaLclStackHomeSize(lvaOutgoingArgSpaceVar), + stkOffs); } #endif // FEATURE_FIXED_OUT_ARGS @@ -7486,7 +7455,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r printf(" %7s ", varTypeName(type)); if (genTypeSize(type) == 0) { - printf("(%2d) ", lvaLclSize(lclNum)); + printf("(%2d) ", lvaLclStackHomeSize(lclNum)); } else { @@ -8199,7 +8168,7 @@ Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTree** pTree, fgWalkData* if (varType != TYP_STRUCT) { // Change the variable to a custom layout struct - unsigned size = roundUp(padding + pComp->lvaLclSize(lclNum), TARGET_POINTER_SIZE); + unsigned size = roundUp(padding + pComp->lvaLclStackHomeSize(lclNum), TARGET_POINTER_SIZE); ClassLayoutBuilder builder(pComp, size); #ifdef DEBUG builder.SetName(pComp->printfAlloc("%s_%u_Stress", varTypeName(varType), size), diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 56882a27d32b30..ade9b3b0ed9fa2 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3027,7 +3027,6 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) unsigned int overwrittenStart = put->getArgOffset(); unsigned int overwrittenEnd = overwrittenStart + put->GetStackByteSize(); - int baseOff = -1; // Stack offset of first arg on stack for (unsigned callerArgLclNum = 0; callerArgLclNum < comp->info.compArgsCount; callerArgLclNum++) { @@ -3038,34 +3037,12 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) continue; } - unsigned int argStart; - unsigned int argEnd; -#if defined(TARGET_AMD64) - if (TargetOS::IsWindows) - { - // On Windows x64, the argument position determines the stack slot uniquely, and even the - // register args take up space in the stack frame (shadow space). - argStart = callerArgLclNum * TARGET_POINTER_SIZE; - argEnd = argStart + static_cast(callerArgDsc->lvArgStackSize()); - } - else -#endif // TARGET_AMD64 - { - assert(callerArgDsc->GetStackOffset() != BAD_STK_OFFS); + const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(callerArgLclNum); + assert(abiInfo.HasExactlyOneStackSegment()); + const ABIPassingSegment& seg = abiInfo.Segment(0); - if (baseOff == -1) - { - baseOff = callerArgDsc->GetStackOffset(); - } - - // On all ABIs where we fast tail call the stack args should come in order. - assert(baseOff <= callerArgDsc->GetStackOffset()); - - // Compute offset of this stack argument relative to the first stack arg. - // This will be its offset into the incoming arg space area. - argStart = static_cast(callerArgDsc->GetStackOffset() - baseOff); - argEnd = argStart + comp->lvaLclSize(callerArgLclNum); - } + unsigned argStart = seg.GetStackOffset(); + unsigned argEnd = argStart + seg.GetStackSize(); // If ranges do not overlap then this PUTARG_STK will not mess up the arg. if ((overwrittenEnd <= argStart) || (overwrittenStart >= argEnd)) @@ -7752,9 +7729,7 @@ void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node) // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear // it either. - LclVarDsc* varDsc = comp->lvaGetDesc(node->AsLclVarCommon()); - - if (comp->lvaMapSimd12ToSimd16(varDsc)) + if (comp->lvaMapSimd12ToSimd16(node->AsLclVarCommon()->GetLclNum())) { JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n"); DISPNODE(node); @@ -8304,7 +8279,8 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node) #if defined(FEATURE_SIMD) && defined(TARGET_64BIT) if (node->TypeIs(TYP_SIMD12)) { - assert(compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc) || (varDsc->lvSize() == 12)); + assert(compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc) || + (compiler->lvaLclStackHomeSize(node->AsLclVar()->GetLclNum()) == 12)); } #endif // FEATURE_SIMD && TARGET_64BIT if (varDsc->lvPromoted) @@ -8800,7 +8776,7 @@ void Lowering::ContainCheckRet(GenTreeUnOp* ret) { const LclVarDsc* varDsc = comp->lvaGetDesc(op1->AsLclVarCommon()); // This must be a multi-reg return or an HFA of a single element. - assert(varDsc->lvIsMultiRegRet || (varDsc->lvIsHfa() && varTypeIsValidHfaType(varDsc->lvType))); + assert(varDsc->lvIsMultiRegRet); // Mark var as contained if not enregisterable. if (!varDsc->IsEnregisterableLcl()) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 909785fe496a12..bf0d4506ea383b 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -8387,7 +8387,7 @@ void LinearScan::insertMove( var_types typ = varDsc->TypeGet(); #if defined(FEATURE_SIMD) - if ((typ == TYP_SIMD12) && compiler->lvaMapSimd12ToSimd16(varDsc)) + if ((typ == TYP_SIMD12) && compiler->lvaMapSimd12ToSimd16(lclNum)) { typ = TYP_SIMD16; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 9544bac68f3109..518ab65dcf7dd5 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5864,9 +5864,9 @@ void Compiler::fgMorphTailCallViaJitHelper(GenTreeCall* call) call->gtArgs.Remove(thisArg); } - unsigned nOldStkArgsWords = - (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES; - GenTree* arg3Node = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); + assert(lvaParameterStackSize == (compArgSize - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)); + unsigned nOldStkArgsWords = lvaParameterStackSize / REGSIZE_BYTES; + GenTree* arg3Node = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL); CallArg* arg3 = call->gtArgs.PushBack(this, NewCallArg::Primitive(arg3Node).WellKnown(WellKnownArg::X86TailCallSpecialArg)); // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate. diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 8de3609aa85e75..5e298345fc4505 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -3933,7 +3933,7 @@ void CSE_Heuristic::Initialize() if (onStack) { - frameSize += m_pCompiler->lvaLclSize(lclNum); + frameSize += m_pCompiler->lvaLclStackHomeSize(lclNum); } else { diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index 1de4dd8bd66992..a2e3c28005aedb 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -256,9 +256,9 @@ void Compiler::raMarkStkVars() noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN)); #if FEATURE_FIXED_OUT_ARGS - noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0); + noway_assert((lclNum == lvaOutgoingArgSpaceVar) || (lvaLclStackHomeSize(lclNum) != 0)); #else // FEATURE_FIXED_OUT_ARGS - noway_assert(lvaLclSize(lclNum) != 0); + noway_assert(lvaLclStackHomeSize(lclNum) != 0); #endif // FEATURE_FIXED_OUT_ARGS varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 9589527c5cac65..06444fb31cf956 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -599,7 +599,7 @@ var_types RegSet::tmpNormalizeType(var_types type) // We always spill SIMD12 to a 16-byte SIMD16 temp. // This is because we don't have a single instruction to store 12 bytes, so we want // to ensure that we always have the full 16 bytes for loading & storing the value. - // We also allocate non-argument locals as 16 bytes; see lvSize(). + // We also allocate non-argument locals as 16 bytes; see lvaLclStackHomeSize(). if (type == TYP_SIMD12) { type = TYP_SIMD16; diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index 160bafd4787e8d..83482f13919dd2 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -1964,7 +1964,7 @@ void CodeGen::genSetScopeInfo(unsigned which, noway_assert(cookieOffset < varOffset); unsigned offset = varOffset - cookieOffset; - unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; + unsigned stkArgSize = compiler->lvaParameterStackSize; noway_assert(offset < stkArgSize); offset = stkArgSize - offset;