Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISC-V][JIT] Fix DevDiv_718151 test #88404

Merged
merged 3 commits into from
Jul 18, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 49 additions & 115 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4862,9 +4862,7 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode)
void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
assert(treeNode->OperIs(GT_PUTARG_STK));
GenTree* source = treeNode->gtOp1;
var_types targetType = genActualType(source->TypeGet());
emitter* emit = GetEmitter();
emitter* emit = GetEmitter();

// This is the varNum for our store operations,
// typically this is the varNum for the Outgoing arg space
Expand All @@ -4891,7 +4889,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
// Since it is a fast tail call, the existence of first incoming arg is guaranteed
// because fast tail call requires that in-coming arg area of caller is >= out-going
// arg area required for tail call.
LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
LclVarDsc* varDsc = compiler->lvaGetDesc(varNumOut);
assert(varDsc != nullptr);
#endif // FEATURE_FASTTAILCALL
}
Expand All @@ -4901,39 +4899,40 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
}

bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_FIELD_LIST);
clamp03 marked this conversation as resolved.
Show resolved Hide resolved
GenTree* source = treeNode->gtGetOp1();

if (!isStruct) // a normal non-Struct argument
if (!source->TypeIs(TYP_STRUCT)) // a normal non-Struct argument
{
if (varTypeIsSIMD(targetType))
if (varTypeIsSIMD(source->TypeGet()))
{
NYI_RISCV64("SIMD in genPutArgStk-----unimplemented/unused on RISCV64 yet----");
}

instruction storeIns = ins_Store(targetType);
emitAttr storeAttr = emitTypeSize(targetType);
var_types slotType = genActualType(source);
instruction storeIns = ins_Store(slotType);
emitAttr storeAttr = emitTypeSize(slotType);

// If it is contained then source must be the integer constant zero
if (source->isContained())
{
assert(source->OperGet() == GT_CNS_INT);
assert(source->AsIntConCommon()->IconValue() == 0);

emit->emitIns_S_R(storeIns, storeAttr, REG_R0, varNumOut, argOffsetOut);
}
else
{
genConsumeReg(source);
if (storeIns == INS_sw)
{
emit->emitIns_R_R_R(INS_addw, EA_4BYTE, source->GetRegNum(), source->GetRegNum(), REG_R0);
clamp03 marked this conversation as resolved.
Show resolved Hide resolved
storeIns = INS_sd;
storeAttr = EA_8BYTE;
}

emit->emitIns_S_R(storeIns, storeAttr, REG_R0, varNumOut, argOffsetOut);
}
else
{
genConsumeReg(source);
emit->emitIns_S_R(storeIns, storeAttr, source->GetRegNum(), varNumOut, argOffsetOut);
}
argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing area
assert(argOffsetOut <= argOffsetMax); // We can't write beyond the outgoing arg area
}
else // We have some kind of a struct argument
{
Expand All @@ -4943,125 +4942,61 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
{
genPutArgStkFieldList(treeNode, varNumOut);
}
else // We must have a GT_BLK or a GT_LCL_VAR
else
{
noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_BLK));
noway_assert(source->OperIsLocalRead() || source->OperIs(GT_BLK));

var_types targetType = source->TypeGet();
noway_assert(varTypeIsStruct(targetType));

// Setup loReg from the internal registers that we reserved in lower.
//
regNumber loReg = treeNode->ExtractTempReg();
regNumber addrReg = REG_NA;
regNumber loReg = treeNode->ExtractTempReg();

GenTreeLclVarCommon* varNode = nullptr;
GenTree* addrNode = nullptr;
GenTreeLclVarCommon* srcLclNode = nullptr;
regNumber addrReg = REG_NA;
ClassLayout* layout = nullptr;

if (source->OperGet() == GT_LCL_VAR)
{
varNode = source->AsLclVarCommon();
}
else // we must have a GT_BLK
// Setup "layout", "srcLclNode" and "addrReg".
if (source->OperIsLocalRead())
{
assert(source->OperGet() == GT_BLK);
srcLclNode = source->AsLclVarCommon();
layout = srcLclNode->GetLayout(compiler);
LclVarDsc* varDsc = compiler->lvaGetDesc(srcLclNode);

addrNode = source->AsOp()->gtOp1;

// addrNode can either be a GT_LCL_ADDR<0> or an address expression
//
if (addrNode->IsLclVarAddr())
{
// We have a GT_BLK(GT_LCL_ADDR<0>)
//
// We will treat this case the same as above
// (i.e if we just had this GT_LCL_VAR directly as the source)
// so update 'source' to point this GT_LCL_ADDR node
// and continue to the codegen for the LCL_VAR node below
//
varNode = addrNode->AsLclVarCommon();
addrNode = nullptr;
}
else // addrNode is used
{
// Generate code to load the address that we need into a register
genConsumeAddress(addrNode);
addrReg = addrNode->GetRegNum();
}
}

// Either varNode or addrNOde must have been setup above,
// the xor ensures that only one of the two is setup, not both
assert((varNode != nullptr) ^ (addrNode != nullptr));

ClassLayout* layout;

// unsigned gcPtrCount; // The count of GC pointers in the struct
unsigned srcSize;

// gcPtrCount = treeNode->gtNumSlots;
// Setup the srcSize and layout
if (source->OperGet() == GT_LCL_VAR)
{
assert(varNode != nullptr);
LclVarDsc* varDsc = compiler->lvaGetDesc(varNode);

// This struct also must live in the stack frame
// And it can't live in a register (SIMD)
assert(varDsc->lvType == TYP_STRUCT);
// This struct must live on the stack frame.
assert(varDsc->lvOnFrame && !varDsc->lvRegister);

srcSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
// as that is how much stack is allocated for this LclVar
layout = varDsc->GetLayout();
}
else // we must have a GT_BLK
{
assert(source->OperGet() == GT_BLK);

// If the source is an BLK node then we need to use the type information
// it provides (size and GC layout) even if the node wraps a lclvar. Due
// to struct reinterpretation (e.g. Unsafe.As<X, Y>) it is possible that
// the BLK node has a different type than the lclvar.
layout = source->AsBlk()->GetLayout();
srcSize = layout->GetSize();
addrReg = genConsumeReg(source->AsBlk()->Addr());
}

unsigned structSize;
unsigned srcSize = layout->GetSize();

noway_assert(srcSize <= MAX_PASS_MULTIREG_BYTES);

unsigned dstSize = treeNode->GetStackByteSize();
if (dstSize != srcSize)
{
// We can generate a smaller code if store size is a multiple of TARGET_POINTER_SIZE.
// The dst size can be rounded up to PUTARG_STK size.
// The src size can be rounded up if it reads a local variable slot because the local
// variable stack allocation size is rounded up to be a multiple of the TARGET_POINTER_SIZE.
// The exception is arm64 apple arguments because they can be passed without padding.
if (varNode != nullptr)

// We can generate smaller code if store size is a multiple of TARGET_POINTER_SIZE.
// The dst size can be rounded up to PUTARG_STK size. The src size can be rounded up
// if it reads a local variable because reading "too much" from a local cannot fault.
//
if ((dstSize != srcSize) && (srcLclNode != nullptr))
{
unsigned widenedSrcSize = roundUp(srcSize, TARGET_POINTER_SIZE);
if (widenedSrcSize <= dstSize)
{
// If we have a varNode, even if it was casted using `OBJ`, we can read its original memory size.
const LclVarDsc* varDsc = compiler->lvaGetDesc(varNode);
const unsigned varStackSize = varDsc->lvSize();
if (varStackSize >= srcSize)
{
srcSize = varStackSize;
}
srcSize = widenedSrcSize;
}
}
if (dstSize == srcSize)
{
structSize = dstSize;
}
else
{
// With Unsafe object wwe can have different strange combinations:
// PutArgStk<8>(Obj<16>(LclVar<8>)) -> copy 8 bytes;
// PutArgStk<16>(Obj<16>(LclVar<8>)) -> copy 16 bytes, reading undefined memory after the local.
structSize = min(dstSize, srcSize);
}

int remainingSize = structSize;
assert(srcSize <= dstSize);

int remainingSize = srcSize;
unsigned structOffset = 0;
unsigned lclOffset = (srcLclNode != nullptr) ? srcLclNode->GetLclOffs() : 0;
unsigned nextIndex = 0;

while (remainingSize > 0)
Expand Down Expand Up @@ -5093,17 +5028,16 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
}
}

const emitAttr attr = emitTypeSize(type);
const emitAttr attr = emitActualTypeSize(type);
const unsigned moveSize = genTypeSize(type);
assert(EA_SIZE_IN_BYTES(attr) == moveSize);

remainingSize -= moveSize;

instruction loadIns = ins_Load(type);
if (varNode != nullptr)
if (srcLclNode != nullptr)
{
// Load from our varNumImp source
emit->emitIns_R_S(loadIns, attr, loReg, varNode->GetLclNum(), structOffset);
// Load from our local source
emit->emitIns_R_S(loadIns, attr, loReg, srcLclNode->GetLclNum(), lclOffset + structOffset);
}
else
{
Expand Down
38 changes: 5 additions & 33 deletions src/coreclr/jit/lowerriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,42 +393,14 @@ void Lowering::LowerPutArgStkOrSplit(GenTreePutArgStk* putArgNode)

if (src->TypeIs(TYP_STRUCT))
{
// STRUCT args (FIELD_LIST / OBJ) will always be contained.
// STRUCT args (FIELD_LIST / BLK / LCL_VAR / LCL_FLD) will always be contained.
MakeSrcContained(putArgNode, src);

// Currently, codegen does not support LCL_VAR/LCL_FLD sources, so we morph them to OBJs.
// TODO-ADDR: support the local nodes in codegen and remove this code.
if (src->OperIsLocalRead())
{
unsigned lclNum = src->AsLclVarCommon()->GetLclNum();
ClassLayout* layout = nullptr;
GenTree* lclAddr = nullptr;

if (src->OperIs(GT_LCL_VAR))
{
layout = comp->lvaGetDesc(lclNum)->GetLayout();
lclAddr = comp->gtNewLclVarAddrNode(lclNum);

comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::IsStructArg));
}
else
{
layout = src->AsLclFld()->GetLayout();
lclAddr = comp->gtNewLclAddrNode(lclNum, src->AsLclFld()->GetLclOffs());
}

src->ChangeOper(GT_BLK);
src->AsBlk()->SetAddr(lclAddr);
src->AsBlk()->Initialize(layout);

BlockRange().InsertBefore(src, lclAddr);
}

// Codegen supports containment of local addresses under OBJs.
if (src->OperIs(GT_BLK) && src->AsBlk()->Addr()->IsLclVarAddr())
if (src->OperIs(GT_LCL_VAR))
{
// TODO-RISCV64-CQ: support containment of LCL_ADDR with non-zero offset too.
MakeSrcContained(src, src->AsBlk()->Addr());
// TODO-1stClassStructs: support struct enregistration here by retyping "src" to its register type for
// the non-split case.
comp->lvaSetVarDoNotEnregister(src->AsLclVar()->GetLclNum() DEBUGARG(DoNotEnregisterReason::IsStructArg));
}
}
}
Expand Down
37 changes: 12 additions & 25 deletions src/coreclr/jit/lsrariscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -934,20 +934,20 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
{
assert(argNode->gtOper == GT_PUTARG_STK);

GenTree* putArgChild = argNode->gtGetOp1();
GenTree* src = argNode->gtGetOp1();

int srcCount = 0;

// Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
if (putArgChild->TypeIs(TYP_STRUCT) || putArgChild->OperIs(GT_FIELD_LIST))
if (src->TypeIs(TYP_STRUCT))
{
// We will use store instructions that each write a register sized value

if (putArgChild->OperIs(GT_FIELD_LIST))
if (src->OperIs(GT_FIELD_LIST))
{
assert(putArgChild->isContained());
assert(src->isContained());
// We consume all of the items in the GT_FIELD_LIST
for (GenTreeFieldList::Use& use : putArgChild->AsFieldList()->Uses())
for (GenTreeFieldList::Use& use : src->AsFieldList()->Uses())
{
BuildUse(use.GetNode());
srcCount++;
Expand All @@ -959,36 +959,23 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
buildInternalIntRegisterDefForNode(argNode);
buildInternalIntRegisterDefForNode(argNode);

if (putArgChild->OperGet() == GT_BLK)
assert(src->isContained());

if (src->OperGet() == GT_BLK)
{
assert(putArgChild->isContained());
GenTree* objChild = putArgChild->gtGetOp1();
if (objChild->OperGet() == GT_LCL_ADDR)
{
// We will generate all of the code for the GT_PUTARG_STK, the GT_BLK and the GT_LCL_ADDR
// as one contained operation, and there are no source registers.
//
assert(objChild->isContained());
}
else
{
// We will generate all of the code for the GT_PUTARG_STK and its child node
// as one contained operation
//
srcCount = BuildOperandUses(objChild);
}
srcCount = BuildOperandUses(src->AsBlk()->Addr());
}
else
{
// No source registers.
putArgChild->OperIs(GT_LCL_VAR);
assert(src->OperIs(GT_LCL_VAR, GT_LCL_FLD));
}
}
}
else
{
assert(!putArgChild->isContained());
srcCount = BuildOperandUses(putArgChild);
assert(!src->isContained());
srcCount = BuildOperandUses(src);
}
buildInternalRegisterUses();
return srcCount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct StructWithStructField
public Struct16bytes structField;
}

public class DevDiv_714266
public class DevDiv_718151
{
[MethodImpl(MethodImplOptions.NoInlining)]
int foo(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int a9, int a10, Struct16bytes s)
Expand All @@ -38,7 +38,7 @@ public static int TestEntryPoint()
StructWithStructField s = new StructWithStructField();
s.structField.a = 100;

DevDiv_714266 test = new DevDiv_714266();
DevDiv_718151 test = new DevDiv_718151();
return test.foo(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, s.structField);
}

Expand Down