Skip to content

Commit

Permalink
[MERGE #3681 @MikeHolman] add inlining support for asm.js/wasm
Browse files Browse the repository at this point in the history
Merge pull request #3681 from MikeHolman:wasminline

Add support for profiling in asm.js, and profile direct internal calls.

Use temp registers rather than hard coded registers for intermediate bytecode values, so backend correctly understands that these are temps and not locals. (Also allowed me to remove the I_Conv_* opcodes.)

Tell inliner to try to inline candidate asm.js/wasm functions. Required some special handling for functions with no parameters and non-var arguments, and skipping function object bailout check.

Simplify how we emit calls in asm.js bytecode generator to evaluate arguments before emitting argouts. Prior code required complicated tracking of call depth during bytecode gen, which led to complicated call nesting (and subsequently made inlining more complicated).

Track calls for asm.js/wasm during globopt, and change inline argument tracking to use inlinee argout size rather than count for frame size calculation. (This will be needed for stackwalking, which we punted for asm.js in general... but is something we should really get around to.)

For inlining heuristics, we are more aggressive at inlining than normal JS, for the reason that bytecode is already low level, so it will blow up in size less than JS bytecode will.

Preliminary perf indicates overall improvements of about 4.5% in Unity and 1% in Jetstream

Closes #2794

<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/microsoft/chakracore/3681)
<!-- Reviewable:end -->
  • Loading branch information
MikeHolman committed Sep 14, 2017
2 parents 2a4c3e2 + 75f5e8f commit f57d105
Show file tree
Hide file tree
Showing 78 changed files with 1,271 additions and 1,114 deletions.
19 changes: 10 additions & 9 deletions lib/Backend/BackendOpCodeAttrAsmJs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,13 @@
#ifdef ASMJS_PLAT
namespace OpCodeAttrAsmJs
{
// OpSideEffect:
// Opcode has side effect not just to the dst/src on the instruction.
// The opcode cannot be deadstored. (e.g. StFld, LdFld from DOM, call valueOf/toString/getter/setter)
// Doesn't include all "exit" script (e.g. LdThis doesn't have side effect for HostDispatch for exiting script to getting the name space parent)
// OpHasImplicitCall:
// Include all possible exit scripts, call valueOf/toString/getter/setter
// OpSerialized:
// Op is a serialized (indirected) variant of another op code
enum OpCodeAttrEnum
{
None = 0,
OpNoFallThrough = 1 << 0, // Opcode doesn't fallthrough in flow and its always jump to the return from this opcode.
OpHasMultiSizeLayout = 1 << 1,

OpHasProfiled = 1 << 2,
OpProfiled = 1 << 3
};

static const int OpcodeAttributesAsmJs[] =
Expand Down Expand Up @@ -65,6 +58,14 @@ namespace OpCodeAttrAsmJs
return CheckHasFlag( OpHasMultiSizeLayout );
}

bool HasProfiledOp(Js::OpCodeAsmJs opcode)
{
return ((GetOpCodeAttributes(opcode) & OpHasProfiled) != 0);
}
bool IsProfiledOp(Js::OpCodeAsmJs opcode)
{
return ((GetOpCodeAttributes(opcode) & OpProfiled) != 0);
}

}; // OpCodeAttrAsmJs
#endif
2 changes: 2 additions & 0 deletions lib/Backend/BackendOpCodeAttrAsmJs.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,7 @@ namespace OpCodeAttrAsmJs
bool HasFallThrough(Js::OpCodeAsmJs opcode);
// True if the opcode has a small/large layout
bool HasMultiSizeLayout(Js::OpCodeAsmJs opcode);
bool HasProfiledOp(Js::OpCodeAsmJs opcode);
bool IsProfiledOp(Js::OpCodeAsmJs opcode);
};
#endif
39 changes: 24 additions & 15 deletions lib/Backend/Func.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Func::Func(JitArenaAllocator *alloc, JITTimeWorkItem * workItem,
m_canDoInlineArgsOpt(true),
m_doFastPaths(false),
hasBailout(false),
firstIRTemp(0),
hasBailoutInEHRegion(false),
hasInstrNumber(false),
maintainByteCodeOffset(true),
Expand All @@ -78,7 +79,7 @@ Func::Func(JitArenaAllocator *alloc, JITTimeWorkItem * workItem,
hasAnyStackNestedFunc(false),
hasMarkTempObjects(false),
postCallByteCodeOffset(postCallByteCodeOffset),
maxInlineeArgOutCount(0),
maxInlineeArgOutSize(0),
returnValueRegSlot(returnValueRegSlot),
firstActualStackOffset(-1),
m_localVarSlotsOffset(Js::Constants::InvalidOffset),
Expand Down Expand Up @@ -744,19 +745,27 @@ void Func::SetFirstArgOffset(IR::Instr* inlineeStart)
int32 lastOffset;

IR::Instr* arg = inlineeStart->GetNextArg();
const auto lastArgOutStackSym = arg->GetDst()->AsSymOpnd()->m_sym->AsStackSym();
lastOffset = lastArgOutStackSym->m_offset;
Assert(lastArgOutStackSym->m_isSingleDef);
const auto secondLastArgOutOpnd = lastArgOutStackSym->m_instrDef->GetSrc2();
if (secondLastArgOutOpnd->IsSymOpnd())
{
const auto secondLastOffset = secondLastArgOutOpnd->AsSymOpnd()->m_sym->AsStackSym()->m_offset;
if (secondLastOffset > lastOffset)
if (arg)
{
const auto lastArgOutStackSym = arg->GetDst()->AsSymOpnd()->m_sym->AsStackSym();
lastOffset = lastArgOutStackSym->m_offset;
Assert(lastArgOutStackSym->m_isSingleDef);
const auto secondLastArgOutOpnd = lastArgOutStackSym->m_instrDef->GetSrc2();
if (secondLastArgOutOpnd->IsSymOpnd())
{
lastOffset = secondLastOffset;
const auto secondLastOffset = secondLastArgOutOpnd->AsSymOpnd()->m_sym->AsStackSym()->m_offset;
if (secondLastOffset > lastOffset)
{
lastOffset = secondLastOffset;
}
}
lastOffset += MachPtr;
}
else
{
Assert(this->GetTopFunc()->GetJITFunctionBody()->IsAsmJsMode());
lastOffset = MachPtr;
}
lastOffset += MachPtr;
int32 firstActualStackOffset = lastOffset - ((this->actualCount + Js::Constants::InlineeMetaArgCount) * MachPtr);
Assert((this->firstActualStackOffset == -1) || (this->firstActualStackOffset == firstActualStackOffset));
this->firstActualStackOffset = firstActualStackOffset;
Expand Down Expand Up @@ -918,7 +927,7 @@ Int64RegPair Func::FindOrCreateInt64Pair(IR::Opnd* opnd)
{
Js::ArgSlot slotNumber = stackSym->GetArgSlotNum();
symPair.low = StackSym::NewArgSlotSym(slotNumber, this, pairType);
symPair.high = StackSym::NewArgSlotSym(slotNumber + 1, this, pairType);
symPair.high = StackSym::NewArgSlotSym(slotNumber, this, pairType);
}
else
{
Expand Down Expand Up @@ -1363,11 +1372,11 @@ Func::EnsureLoopParamSym()
}

void
Func::UpdateMaxInlineeArgOutCount(uint inlineeArgOutCount)
Func::UpdateMaxInlineeArgOutSize(uint inlineeArgOutSize)
{
if (maxInlineeArgOutCount < inlineeArgOutCount)
if (this->maxInlineeArgOutSize < inlineeArgOutSize)
{
maxInlineeArgOutCount = inlineeArgOutCount;
this->maxInlineeArgOutSize = inlineeArgOutSize;
}
}

Expand Down
13 changes: 7 additions & 6 deletions lib/Backend/Func.h
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,7 @@ static const unsigned __int64 c_debugFillPattern8 = 0xcececececececece;
uint32 inlineDepth;
uint32 postCallByteCodeOffset;
Js::RegSlot returnValueRegSlot;
Js::RegSlot firstIRTemp;
Js::ArgSlot actualCount;
int32 firstActualStackOffset;
uint32 tryCatchNestingLevel;
Expand Down Expand Up @@ -760,8 +761,8 @@ static const unsigned __int64 c_debugFillPattern8 = 0xcececececececece;
bool DoMaintainByteCodeOffset() const { return this->HasByteCodeOffset() && this->GetTopFunc()->maintainByteCodeOffset; }
void StopMaintainByteCodeOffset() { this->GetTopFunc()->maintainByteCodeOffset = false; }
Func * GetParentFunc() const { return parentFunc; }
uint GetMaxInlineeArgOutCount() const { return maxInlineeArgOutCount; }
void UpdateMaxInlineeArgOutCount(uint inlineeArgOutCount);
uint GetMaxInlineeArgOutSize() const { return this->maxInlineeArgOutSize; }
void UpdateMaxInlineeArgOutSize(uint inlineeArgOutSize);
#if DBG_DUMP
ptrdiff_t m_codeSize;
#endif
Expand Down Expand Up @@ -983,10 +984,10 @@ static const unsigned __int64 c_debugFillPattern8 = 0xcececececececece;
#if defined(_M_ARM32_OR_ARM64)
int32 GetInlineeArgumentStackSize()
{
int32 count = this->GetMaxInlineeArgOutCount();
if (count)
int32 size = this->GetMaxInlineeArgOutSize();
if (size)
{
return ((count + 1) * MachPtr); // +1 for the dedicated zero out argc slot
return size + MachPtr; // +1 for the dedicated zero out argc slot
}
return 0;
}
Expand Down Expand Up @@ -1014,7 +1015,7 @@ static const unsigned __int64 c_debugFillPattern8 = 0xcececececececece;
#endif
Func * const parentFunc;
StackSym * m_inlineeFrameStartSym;
uint maxInlineeArgOutCount;
uint maxInlineeArgOutSize;
const bool m_isBackgroundJIT;
bool hasInstrNumber;
bool maintainByteCodeOffset;
Expand Down
23 changes: 9 additions & 14 deletions lib/Backend/GlobOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,7 @@ GlobOpt::OptArguments(IR::Instr *instr)

if (instr->HasAnyLoadHeapArgsOpCode())
{
#ifdef ENABLE_DEBUG_CONFIG_OPTIONS
if (instr->m_func->IsStackArgsEnabled())
{
if (instr->GetSrc1()->IsRegOpnd() && instr->m_func->GetJITFunctionBody()->GetInParamsCount() > 1)
Expand All @@ -1593,6 +1594,7 @@ GlobOpt::OptArguments(IR::Instr *instr)
}
}
}
#endif

if (instr->m_func->GetJITFunctionBody()->GetInParamsCount() != 1 && !instr->m_func->IsStackArgsEnabled())
{
Expand Down Expand Up @@ -2632,11 +2634,8 @@ GlobOpt::OptInstr(IR::Instr *&instr, bool* isInstrRemoved)
}

// Track calls after any pre-op bailouts have been inserted before the call, because they will need to restore out params.
// We don't inline in asmjs and hence we don't need to track calls in asmjs too, skipping this step for asmjs.
if (!GetIsAsmJSFunc())
{
this->TrackCalls(instr);
}

this->TrackCalls(instr);

if (instr->GetSrc1())
{
Expand Down Expand Up @@ -6771,17 +6770,13 @@ GlobOpt::OptConstFoldBranch(IR::Instr *instr, Value *src1Val, Value*src2Val, Val
// this path would probably work outside of asm.js, but we should verify that if we ever hit this scenario
Assert(GetIsAsmJSFunc());
constVal = 0;
if (src1Val->GetValueInfo()->TryGetIntConstantValue(&constVal) && constVal != 0)
if (!src1Val->GetValueInfo()->TryGetIntConstantValue(&constVal))
{
instr->FreeSrc1();
if (instr->GetSrc2())
{
instr->FreeSrc2();
}
instr->m_opcode = Js::OpCode::Nop;
return true;
return false;
}
return false;

result = constVal == 0;
break;

default:
return false;
Expand Down
2 changes: 1 addition & 1 deletion lib/Backend/GlobOpt.h
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ class GlobOpt
IR::Instr * GetExtendedArg(IR::Instr *instr);
int GetBoundCheckOffsetForSimd(ValueType arrValueType, const IR::Instr *instr, const int oldOffset = -1);

IR::Instr * OptNewScObject(IR::Instr** instrPtr, Value* srcVal);
void OptNewScObject(IR::Instr** instrPtr, Value* srcVal);
template <typename T>
bool OptConstFoldBinaryWasm(IR::Instr * *pInstr, const Value* src1, const Value* src2, Value **pDstVal);
template <typename T>
Expand Down
32 changes: 21 additions & 11 deletions lib/Backend/GlobOptBailOut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ GlobOpt::CaptureArguments(BasicBlock *block, BailOutInfo * bailOutInfo, JitArena
void
GlobOpt::TrackByteCodeSymUsed(IR::Instr * instr, BVSparse<JitArenaAllocator> * instrByteCodeStackSymUsed, PropertySym **pPropertySym)
{
if(instr->m_func->GetJITFunctionBody()->IsAsmJsMode())
{
return;
}
IR::Opnd * src = instr->GetSrc1();
if (src)
{
Expand Down Expand Up @@ -427,7 +431,7 @@ GlobOpt::MarkNonByteCodeUsed(IR::Opnd * opnd)
void
GlobOpt::CaptureByteCodeSymUses(IR::Instr * instr)
{
if (this->byteCodeUses)
if (this->byteCodeUses || this->func->GetJITFunctionBody()->IsAsmJsMode())
{
// We already captured it before.
return;
Expand Down Expand Up @@ -493,7 +497,8 @@ GlobOpt::TrackCalls(IR::Instr * instr)
Assert(stackSym->IsArgSlotSym());
if (stackSym->m_isInlinedArgSlot)
{
this->currentBlock->globOptData.inlinedArgOutCount++;
uint size = TySize[instr->GetDst()->GetType()];
this->currentBlock->globOptData.inlinedArgOutSize += size < MachPtr ? MachPtr : size;
// We want to update the offsets only once: don't do in prepass.
if (!this->IsLoopPrePass() && stackSym->m_offset >= 0)
{
Expand Down Expand Up @@ -523,7 +528,7 @@ GlobOpt::TrackCalls(IR::Instr * instr)
this->currentBlock->globOptData.curFunc = instr->m_func;
this->currentBlock->globOptData.curFunc = instr->m_func;

this->func->UpdateMaxInlineeArgOutCount(this->currentBlock->globOptData.inlinedArgOutCount);
this->func->UpdateMaxInlineeArgOutSize(this->currentBlock->globOptData.inlinedArgOutSize);
this->EndTrackCall(instr);

if (DoInlineArgsOpt(instr->m_func))
Expand Down Expand Up @@ -560,8 +565,8 @@ GlobOpt::TrackCalls(IR::Instr * instr)
}
EndTrackingOfArgObjSymsForInlinee();

Assert(this->currentBlock->globOptData.inlinedArgOutCount >= instr->GetArgOutCount(/*getInterpreterArgOutCount*/ false));
this->currentBlock->globOptData.inlinedArgOutCount -= instr->GetArgOutCount(/*getInterpreterArgOutCount*/ false);
Assert(this->currentBlock->globOptData.inlinedArgOutSize >= instr->GetArgOutSize(/*getInterpreterArgOutCount*/ false));
this->currentBlock->globOptData.inlinedArgOutSize -= instr->GetArgOutSize(/*getInterpreterArgOutCount*/ false);
break;

case Js::OpCode::InlineeMetaArg:
Expand All @@ -578,7 +583,7 @@ GlobOpt::TrackCalls(IR::Instr * instr)
Func * currentFunc = instr->m_func->GetParentFunc();
stackSym->FixupStackOffset(currentFunc);
}
this->currentBlock->globOptData.inlinedArgOutCount++;
this->currentBlock->globOptData.inlinedArgOutSize += MachPtr;
break;
}

Expand Down Expand Up @@ -651,8 +656,8 @@ GlobOpt::TrackCalls(IR::Instr * instr)
this->EndTrackCall(instr);
}

Assert(this->currentBlock->globOptData.inlinedArgOutCount >= instr->GetArgOutCount(/*getInterpreterArgOutCount*/ false));
this->currentBlock->globOptData.inlinedArgOutCount -= instr->GetArgOutCount(/*getInterpreterArgOutCount*/ false);
Assert(this->currentBlock->globOptData.inlinedArgOutSize >= instr->GetArgOutSize(/*getInterpreterArgOutCount*/ false));
this->currentBlock->globOptData.inlinedArgOutSize -= instr->GetArgOutSize(/*getInterpreterArgOutCount*/ false);

this->inInlinedBuiltIn = false;
break;
Expand Down Expand Up @@ -735,8 +740,9 @@ void GlobOpt::RecordInlineeFrameInfo(IR::Instr* inlineeEnd)
{
frameInfoValue = InlineFrameInfoValue(argOpnd->GetConstValue());
}
else if (argSym->IsConst())
else if (argSym->IsConst() && !argSym->IsInt64Const())
{
// InlineFrameInfo doesn't currently support Int64Const
frameInfoValue = InlineFrameInfoValue(argSym->GetConstValueForBailout());
}
else
Expand Down Expand Up @@ -784,7 +790,7 @@ void GlobOpt::RecordInlineeFrameInfo(IR::Instr* inlineeEnd)
Assert(globOptData.liveVarSyms->Test(argSym->m_id));
}

if (argSym->IsConst())
if (argSym->IsConst() && !argSym->IsInt64Const())
{
frameInfoValue = InlineFrameInfoValue(argSym->GetConstValueForBailout());
}
Expand Down Expand Up @@ -1043,7 +1049,11 @@ IR::ByteCodeUsesInstr *
GlobOpt::InsertByteCodeUses(IR::Instr * instr, bool includeDef)
{
IR::ByteCodeUsesInstr * byteCodeUsesInstr = nullptr;
Assert(this->byteCodeUses);
if (!this->byteCodeUses)
{
Assert(this->isAsmJSFunc);
return nullptr;
}
IR::RegOpnd * dstOpnd = nullptr;
if (includeDef)
{
Expand Down
12 changes: 6 additions & 6 deletions lib/Backend/GlobOptBlockData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ GlobOptBlockData::NullOutBlockData(GlobOpt* globOpt, Func* func)
this->startCallCount = 0;
this->argOutCount = 0;
this->totalOutParamCount = 0;
this->inlinedArgOutCount = 0;
this->inlinedArgOutSize = 0;
this->hasCSECandidates = false;
this->curFunc = func;

Expand Down Expand Up @@ -84,7 +84,7 @@ GlobOptBlockData::InitBlockData(GlobOpt* globOpt, Func* func)
this->startCallCount = 0;
this->argOutCount = 0;
this->totalOutParamCount = 0;
this->inlinedArgOutCount = 0;
this->inlinedArgOutSize = 0;
this->hasCSECandidates = false;
this->curFunc = func;

Expand Down Expand Up @@ -137,7 +137,7 @@ GlobOptBlockData::ReuseBlockData(GlobOptBlockData *fromData)
this->startCallCount = fromData->startCallCount;
this->argOutCount = fromData->argOutCount;
this->totalOutParamCount = fromData->totalOutParamCount;
this->inlinedArgOutCount = fromData->inlinedArgOutCount;
this->inlinedArgOutSize = fromData->inlinedArgOutSize;
this->hasCSECandidates = fromData->hasCSECandidates;

this->stackLiteralInitFldDataMap = fromData->stackLiteralInitFldDataMap;
Expand Down Expand Up @@ -180,7 +180,7 @@ GlobOptBlockData::CopyBlockData(GlobOptBlockData *fromData)
this->startCallCount = fromData->startCallCount;
this->argOutCount = fromData->argOutCount;
this->totalOutParamCount = fromData->totalOutParamCount;
this->inlinedArgOutCount = fromData->inlinedArgOutCount;
this->inlinedArgOutSize = fromData->inlinedArgOutSize;
this->hasCSECandidates = fromData->hasCSECandidates;

this->changedSyms = fromData->changedSyms;
Expand Down Expand Up @@ -348,7 +348,7 @@ void GlobOptBlockData::CloneBlockData(BasicBlock *const toBlockContext, BasicBlo
this->startCallCount = fromData->startCallCount;
this->argOutCount = fromData->argOutCount;
this->totalOutParamCount = fromData->totalOutParamCount;
this->inlinedArgOutCount = fromData->inlinedArgOutCount;
this->inlinedArgOutSize = fromData->inlinedArgOutSize;
this->hasCSECandidates = fromData->hasCSECandidates;

// Although we don't need the data on loop pre pass, we need to do it for the loop header
Expand Down Expand Up @@ -835,7 +835,7 @@ GlobOptBlockData::MergeBlockData(
Assert(this->startCallCount == fromData->startCallCount);
Assert(this->argOutCount == fromData->argOutCount);
Assert(this->totalOutParamCount == fromData->totalOutParamCount);
Assert(this->inlinedArgOutCount == fromData->inlinedArgOutCount);
Assert(this->inlinedArgOutSize == fromData->inlinedArgOutSize);

// stackLiteralInitFldDataMap is a union of the stack literal from two path.
// Although we don't need the data on loop prepass, we need to do it for the loop header
Expand Down
2 changes: 1 addition & 1 deletion lib/Backend/GlobOptBlockData.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ class GlobOptBlockData
CapturedValues * capturedValues;
BVSparse<JitArenaAllocator> * changedSyms;

uint inlinedArgOutCount;
uint inlinedArgOutSize;

bool hasCSECandidates;

Expand Down
Loading

0 comments on commit f57d105

Please sign in to comment.