diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 7e6ef388a8fca8..32c89100205049 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -376,15 +376,17 @@ When an exception occurs, the VM is invoked to do some processing. If the except The VM sets the frame register to be the same as the parent function. This allows the funclets to access local variables using frame-relative addresses. -For filter funclets and on CoreCLR/AMD64 for all funclets, all other register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters and the frame register. +For filter funclets, all other register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters and the frame register. -For other funclets on all platforms except CoreCLR/AMD64, all non-volatile registers are restored to their values at the exception point. The JIT codegen [does not take advantage of it currently](https://github.com/dotnet/runtime/pull/114630#issuecomment-2810210759). +For other funclets, all non-volatile registers are restored to their values at the exception point. The JIT codegen [does not take advantage of it currently](https://github.com/dotnet/runtime/pull/114630#issuecomment-2810210759). ### Registers on return from a funclet When a funclet finishes execution, and the VM returns execution to the function (or an enclosing funclet, if there is EH clause nesting), the non-volatile registers are restored to the values they held at the exception point. Note that the volatile registers have been trashed. -Any register value changes made in the funclet are lost. If a funclet wants to make a variable change known to the main function (or the funclet that contains the "try" region), that variable change needs to be made to the shared main function stack frame. +Any register value changes made in the funclet are lost. If a funclet wants to make a variable change known to the main function (or the funclet that contains the "try" region), that variable change needs to be made to the shared main function stack frame. This not a fundamental limitation. If necessary, the runtime can be updated to preserve non-volatile register changes made in funclets. + +Funclets are not required to preserve non-volatile registers. ## Windows/x86 EH considerations diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index db3480acadc98a..cd481e46fed26d 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* 63dcb8b8-1f9d-43d8-bb09-bf5d8bf85ad4 */ - 0x63dcb8b8, - 0x1f9d, - 0x43d8, - {0xbb, 0x09, 0xbf, 0x5d, 0x8b, 0xf8, 0x5a, 0xd4} +constexpr GUID JITEEVersionIdentifier = { /* 26d0dde8-bc9d-4543-9b9a-57ad8b1acdc0 */ + 0x26d0dde8, + 0xbc9d, + 0x4543, + {0x9b, 0x9a, 0x57, 0xad, 0x8b, 0x1a, 0xcd, 0xc0} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 69390fe6c340a6..1df741994254ce 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10859,24 +10859,15 @@ void CodeGen::genFnEpilog(BasicBlock* block) * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) * finally/fault: none * - * First parameter (rcx/rdi) is a placeholder for establisher frame which is no longer used. - * * The AMD64 funclet prolog sequence is: * - * push ebp - * push callee-saved regs - * ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use - * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for - * ; the entire function. - * sub sp, XXX ; Establish the rest of the frame. + * sub sp, XXX ; Establish the frame. * ; XXX is determined by lvaOutgoingArgSpaceSize, aligned up to preserve stack alignment. * ; If we push an odd number of registers, we also generate this, to keep the stack aligned. * * The epilog sequence is then: * * add rsp, XXX - * pop callee-saved regs ; if necessary - * pop rbp * ret * * The funclet frame is thus: @@ -10888,10 +10879,6 @@ void CodeGen::genFnEpilog(BasicBlock* block) * +=======================+ <---- Caller's SP * | Return address | * |-----------------------| - * | Saved EBP | - * |-----------------------| - * |Callee saved registers | - * |-----------------------| * ~ possible 8 byte pad ~ * ~ for alignment ~ * |-----------------------| @@ -10902,10 +10889,6 @@ void CodeGen::genFnEpilog(BasicBlock* block) * | | downward | * V * - * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this - * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64 - * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h - * "FRAMEPTR OFFSETS" for details. */ void CodeGen::genFuncletProlog(BasicBlock* block) @@ -10928,19 +10911,11 @@ void CodeGen::genFuncletProlog(BasicBlock* block) compiler->unwindBegProlog(); - // We need to push ebp, since it's callee-saved. - // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't - // keep track of that on a per-funclet basis, so we push the same set as in the main function. + // We do not need to push callee-saved registers. The runtime takes care of preserving them. // We do not need to allocate fixed-size frame, since nothing else // is stored here (all temps are allocated in the parent frame). // We do need to allocate the outgoing argument space, in case there are calls here. - inst_RV(INS_push, REG_FPBASE, TYP_REF); - compiler->unwindPush(REG_FPBASE); - - // Callee saved int registers are pushed to stack. - genPushCalleeSavedRegisters(); - regMaskTP maskArgRegsLiveIn; if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT)) { @@ -10951,14 +10926,8 @@ void CodeGen::genFuncletProlog(BasicBlock* block) maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2; } - regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed - bool initRegZeroed = false; - - genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn); - - // Callee saved float registers are copied to stack in their assigned stack slots - // after allocating space for them as part of funclet frame. - genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta); + bool initRegZeroed = false; + genAllocLclFrame(genFuncletInfo.fiSpDelta, REG_NA, &initRegZeroed, maskArgRegsLiveIn); // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); @@ -10982,12 +10951,7 @@ void CodeGen::genFuncletEpilog() ScopedSetVariable _setGeneratingEpilog(&compiler->compGeneratingEpilog, true); - // Restore callee saved XMM regs from their stack slots before modifying SP - // to position at callee saved int regs. - genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta); inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE); - genPopCalleeSavedRegisters(); - inst_RV(INS_pop, REG_EBP, TYP_I_IMPL); instGen_Return(0); } @@ -11009,7 +10973,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert(isFramePointerUsed()); assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be // finalized - assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); #ifndef UNIX_AMD64_ABI @@ -11023,29 +10986,12 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // How much stack do we allocate in the funclet? // We need to 16-byte align the stack. - unsigned totalFrameSize = - REGSIZE_BYTES // return address - + REGSIZE_BYTES // pushed EBP - + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP - - // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement. - // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary. - unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES; - unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0; - - totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs - + calleeFPRegsSavedSize // pushed callee-saved float regs - // below calculated 'pad' will go here - + compiler->lvaOutgoingArgSpaceSize // outgoing arg space - ; + unsigned totalFrameSize = REGSIZE_BYTES // return address + + compiler->lvaOutgoingArgSpaceSize; unsigned pad = AlignmentPad(totalFrameSize, 16); - genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary - + calleeFPRegsSavedSize // Callee saved xmm regs - + pad // padding - + compiler->lvaOutgoingArgSpaceSize // outgoing arg space - ; + genFuncletInfo.fiSpDelta = pad + compiler->lvaOutgoingArgSpaceSize; #ifdef DEBUG if (verbose) diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S index cffacee7b358c0..9df1f73b3ab410 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S @@ -245,6 +245,9 @@ NESTED_END RhpRethrow, _TEXT alloc_stack stack_alloc_size + // Mirror clearing of AVX state done by regular method prologs + vzeroupper + END_PROLOGUE .endm @@ -252,6 +255,9 @@ NESTED_END RhpRethrow, _TEXT // Epilogue of all funclet calling helpers (RhpCallXXXXFunclet) // .macro FUNCLET_CALL_EPILOGUE + // Mirror clearing of AVX state done by regular method epilogs + vzeroupper + free_stack stack_alloc_size pop_nonvol_reg rbp @@ -313,23 +319,6 @@ NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] mov r15, [rax] -#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger - // trash the values at the old homes to make sure nobody uses them - mov rcx, 0xbaaddeed - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] - mov [rax], rcx -#endif - mov rdi, [rsp + locArg0] // rsi <- exception object call qword ptr [rsp + locArg1] // call handler funclet @@ -452,42 +441,10 @@ NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] mov r15, [rax] -#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger - // trash the values at the old homes to make sure nobody uses them - mov rcx, 0xbaaddeed - mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR12] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR13] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR14] - mov [rax], rcx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] - mov [rax], rcx -#endif - call qword ptr [rsp + locArg0] // handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 - mov rsi, [rsp + locArg1] // rsi <- regdisplay - - mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbx] - mov [rax] , rbx - mov rax, [rsi + OFFSETOF__REGDISPLAY__pRbp] - mov [rax] , rbp - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR12] - mov [rax] , r12 - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR13] - mov [rax] , r13 - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR14] - mov [rax] , r14 - mov rax, [rsi + OFFSETOF__REGDISPLAY__pR15] - mov [rax] , r15 - mov rax, [rsp + locThread] // rax <- Thread* lock or dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc @@ -585,23 +542,6 @@ NESTED_ENTRY RhpCallPropagateExceptionCallback, _TEXT, NoHandler mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] mov r15, [rax] -#if 0 // _DEBUG // @TODO: temporarily removed because trashing RBP breaks the debugger - // trash the values at the old homes to make sure nobody uses them - mov rcx, 0xbaaddeed - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] - mov [rax], rcx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] - mov [rax], rcx -#endif - #ifdef _DEBUG // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we // have to spill all the preserved registers and then refill them after the call. diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm index 4d225ba46054d0..b8f0582b4f0456 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm @@ -308,6 +308,9 @@ FUNCLET_CALL_PROLOGUE macro localsCount, alignStack alloc_stack stack_alloc_size + ;; Mirror clearing of AVX state done by regular method prologs + vzeroupper + save_xmm128_postrsp xmm6, (arguments_scratch_area_size + 0 * 10h) save_xmm128_postrsp xmm7, (arguments_scratch_area_size + 1 * 10h) save_xmm128_postrsp xmm8, (arguments_scratch_area_size + 2 * 10h) @@ -326,6 +329,9 @@ endm ;; Epilogue of all funclet calling helpers (RhpCallXXXXFunclet) ;; FUNCLET_CALL_EPILOGUE macro + ;; Mirror clearing of AVX state done by regular method epilogs + vzeroupper + movdqa xmm6, [rsp + arguments_scratch_area_size + 0 * 10h] movdqa xmm7, [rsp + arguments_scratch_area_size + 1 * 10h] movdqa xmm8, [rsp + arguments_scratch_area_size + 2 * 10h] @@ -401,27 +407,6 @@ NESTED_ENTRY RhpCallCatchFunclet, _TEXT mov rax, [r8 + OFFSETOF__REGDISPLAY__pR15] mov r15, [rax] -if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the debugger - ;; trash the values at the old homes to make sure nobody uses them - mov r9, 0baaddeedh - mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pRsi] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pRdi] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pR12] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pR13] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pR14] - mov [rax], r9 - mov rax, [r8 + OFFSETOF__REGDISPLAY__pR15] - mov [rax], r9 -endif - movdqa xmm6, [r8 + OFFSETOF__REGDISPLAY__Xmm + 0*10h] movdqa xmm7, [r8 + OFFSETOF__REGDISPLAY__Xmm + 1*10h] movdqa xmm8, [r8 + OFFSETOF__REGDISPLAY__Xmm + 2*10h] @@ -617,62 +602,10 @@ NESTED_ENTRY RhpCallFinallyFunclet, _TEXT movdqa xmm14,[rdx + OFFSETOF__REGDISPLAY__Xmm + 8*10h] movdqa xmm15,[rdx + OFFSETOF__REGDISPLAY__Xmm + 9*10h] -if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the debugger - ;; trash the values at the old homes to make sure nobody uses them - mov r9, 0baaddeedh - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRsi] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRdi] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] - mov [rax], r9 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] - mov [rax], r9 -endif - call qword ptr [rsp + rsp_offsetof_arguments + 0h] ;; handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 - mov rdx, [rsp + rsp_offsetof_arguments + 8h] ;; rdx <- regdisplay - - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbx] - mov [rax] , rbx - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] - mov [rax] , rbp - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRsi] - mov [rax] , rsi - mov rax, [rdx + OFFSETOF__REGDISPLAY__pRdi] - mov [rax] , rdi - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR12] - mov [rax] , r12 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR13] - mov [rax] , r13 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR14] - mov [rax] , r14 - mov rax, [rdx + OFFSETOF__REGDISPLAY__pR15] - mov [rax] , r15 - - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 0*10h], xmm6 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 1*10h], xmm7 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 2*10h], xmm8 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 3*10h], xmm9 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 4*10h], xmm10 - - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 5*10h], xmm11 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 6*10h], xmm12 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 7*10h], xmm13 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 8*10h], xmm14 - movdqa [rdx + OFFSETOF__REGDISPLAY__Xmm + 9*10h], xmm15 - mov rax, [rsp + rsp_offsetof_thread] ;; rax <- Thread* lock or dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index cb1156e38b3264..b80a2e51c69809 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -509,6 +509,60 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT jmp METHODDESC_REGISTER LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT +;; +;; Prologue of all funclet calling helpers (CallXXXXFunclet) +;; +FUNCLET_CALL_PROLOGUE macro localsCount, alignStack + PUSH_CALLEE_SAVED_REGISTERS + + arguments_scratch_area_size = 20h + xmm_save_area_size = 10 * 10h ;; xmm6..xmm15 save area + stack_alloc_size = arguments_scratch_area_size + localsCount * 8 + alignStack * 8 + xmm_save_area_size + rsp_offsetof_arguments = stack_alloc_size + 8*8h + 8h + rsp_offsetof_locals = arguments_scratch_area_size + xmm_save_area_size + + alloc_stack stack_alloc_size + + ;; Mirror clearing of AVX state done by regular method prologs + vzeroupper + + save_xmm128_postrsp xmm6, (arguments_scratch_area_size + 0 * 10h) + save_xmm128_postrsp xmm7, (arguments_scratch_area_size + 1 * 10h) + save_xmm128_postrsp xmm8, (arguments_scratch_area_size + 2 * 10h) + save_xmm128_postrsp xmm9, (arguments_scratch_area_size + 3 * 10h) + save_xmm128_postrsp xmm10, (arguments_scratch_area_size + 4 * 10h) + save_xmm128_postrsp xmm11, (arguments_scratch_area_size + 5 * 10h) + save_xmm128_postrsp xmm12, (arguments_scratch_area_size + 6 * 10h) + save_xmm128_postrsp xmm13, (arguments_scratch_area_size + 7 * 10h) + save_xmm128_postrsp xmm14, (arguments_scratch_area_size + 8 * 10h) + save_xmm128_postrsp xmm15, (arguments_scratch_area_size + 9 * 10h) + + END_PROLOGUE +endm + +;; +;; Epilogue of all funclet calling helpers (CallXXXXFunclet) +;; +FUNCLET_CALL_EPILOGUE macro + ;; Mirror clearing of AVX state done by regular method epilogs + vzeroupper + + movdqa xmm6, [rsp + arguments_scratch_area_size + 0 * 10h] + movdqa xmm7, [rsp + arguments_scratch_area_size + 1 * 10h] + movdqa xmm8, [rsp + arguments_scratch_area_size + 2 * 10h] + movdqa xmm9, [rsp + arguments_scratch_area_size + 3 * 10h] + movdqa xmm10, [rsp + arguments_scratch_area_size + 4 * 10h] + movdqa xmm11, [rsp + arguments_scratch_area_size + 5 * 10h] + movdqa xmm12, [rsp + arguments_scratch_area_size + 6 * 10h] + movdqa xmm13, [rsp + arguments_scratch_area_size + 7 * 10h] + movdqa xmm14, [rsp + arguments_scratch_area_size + 8 * 10h] + movdqa xmm15, [rsp + arguments_scratch_area_size + 9 * 10h] + + add rsp, stack_alloc_size + + POP_CALLEE_SAVED_REGISTERS +endm + ; This helper enables us to call into a funclet after restoring Fp register NESTED_ENTRY CallEHFunclet, _TEXT ; On entry: @@ -519,19 +573,35 @@ NESTED_ENTRY CallEHFunclet, _TEXT ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. ; - push_nonvol_reg rbp - alloc_stack 20h ; argument scratch space for the call - END_PROLOGUE + FUNCLET_CALL_PROLOGUE 0, 1 - ; Restore RBP + ; Restore RBX, RBP, RSI, RDI, R12, R13, R14, R15 from CONTEXT mov rbp, [r8 + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] - ; Save the SP of this function. + mov rsi, [r8 + OFFSETOF__CONTEXT__Rsi - OFFSETOF__CONTEXT__Rbx] + mov rdi, [r8 + OFFSETOF__CONTEXT__Rdi - OFFSETOF__CONTEXT__Rbx] + mov r12, [r8 + OFFSETOF__CONTEXT__R12 - OFFSETOF__CONTEXT__Rbx] + mov r13, [r8 + OFFSETOF__CONTEXT__R13 - OFFSETOF__CONTEXT__Rbx] + mov r14, [r8 + OFFSETOF__CONTEXT__R14 - OFFSETOF__CONTEXT__Rbx] + mov r15, [r8 + OFFSETOF__CONTEXT__R15 - OFFSETOF__CONTEXT__Rbx] + + ; Restore XMM registers from CONTEXT + movdqa xmm6, [r8 + OFFSETOF__CONTEXT__Xmm6 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm7, [r8 + OFFSETOF__CONTEXT__Xmm7 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm8, [r8 + OFFSETOF__CONTEXT__Xmm8 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm9, [r8 + OFFSETOF__CONTEXT__Xmm9 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm10, [r8 + OFFSETOF__CONTEXT__Xmm10 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm11, [r8 + OFFSETOF__CONTEXT__Xmm11 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm12, [r8 + OFFSETOF__CONTEXT__Xmm12 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm13, [r8 + OFFSETOF__CONTEXT__Xmm13 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm14, [r8 + OFFSETOF__CONTEXT__Xmm14 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm15, [r8 + OFFSETOF__CONTEXT__Xmm15 - OFFSETOF__CONTEXT__Rbx] + + ; Save the SP of this function. mov [r9], rsp ; Invoke the funclet call rdx - add rsp, 20h - pop rbp + FUNCLET_CALL_EPILOGUE ret NESTED_END CallEHFunclet, _TEXT @@ -546,9 +616,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. ; - push_nonvol_reg rbp - alloc_stack 20h ; argument scratch space for the call - END_PROLOGUE + FUNCLET_CALL_PROLOGUE 0, 1 ; Save the SP of this function mov [r9], rsp @@ -557,9 +625,8 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; Invoke the filter funclet call r8 - add rsp, 20h - pop rbp + FUNCLET_CALL_EPILOGUE ret NESTED_END CallEHFilterFunclet, _TEXT - end \ No newline at end of file + end diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 08a2db5cae34ac..c90c535a493840 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -363,6 +363,44 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT jmp METHODDESC_REGISTER LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT +// +// Prologue of all funclet calling helpers (CallXXXXFunclet) +// +.macro FUNCLET_CALL_PROLOGUE localsCount, alignStack + push_nonvol_reg r15 // save preserved regs for OS stackwalker + push_nonvol_reg r14 // ... + push_nonvol_reg r13 // ... + push_nonvol_reg r12 // ... + push_nonvol_reg rbx // ... + push_nonvol_reg rbp // ... + + stack_alloc_size = \localsCount * 8 + \alignStack * 8 + + alloc_stack stack_alloc_size + + // Mirror clearing of AVX state done by regular method prologs + vzeroupper + + END_PROLOGUE +.endm + +// +// Epilogue of all funclet calling helpers (CallXXXXFunclet) +// +.macro FUNCLET_CALL_EPILOGUE + // Mirror clearing of AVX state done by regular method epilogs + vzeroupper + + free_stack stack_alloc_size + + pop_nonvol_reg rbp + pop_nonvol_reg rbx + pop_nonvol_reg r12 + pop_nonvol_reg r13 + pop_nonvol_reg r14 + pop_nonvol_reg r15 +.endm + // This helper enables us to call into a funclet after restoring Fp register NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // On entry: @@ -373,16 +411,21 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // - push_nonvol_reg rbp + FUNCLET_CALL_PROLOGUE 0, 1 - // Restore RBP + // Restore RBX, RBP, R12, R13, R14, R15 from CONTEXT mov rbp, [rdx + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] + mov r12, [rdx + OFFSETOF__CONTEXT__R12 - OFFSETOF__CONTEXT__Rbx] + mov r13, [rdx + OFFSETOF__CONTEXT__R13 - OFFSETOF__CONTEXT__Rbx] + mov r14, [rdx + OFFSETOF__CONTEXT__R14 - OFFSETOF__CONTEXT__Rbx] + mov r15, [rdx + OFFSETOF__CONTEXT__R15 - OFFSETOF__CONTEXT__Rbx] + // Save the SP of this function. mov [rcx], rsp // Invoke the funclet call rsi - pop_nonvol_reg rbp + FUNCLET_CALL_EPILOGUE ret NESTED_END CallEHFunclet, _TEXT @@ -397,7 +440,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // - push_nonvol_reg rbp + FUNCLET_CALL_PROLOGUE 0, 1 // Restore RBP mov rbp, rsi @@ -406,6 +449,6 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Invoke the filter funclet call rdx - pop_nonvol_reg rbp + FUNCLET_CALL_EPILOGUE ret NESTED_END CallEHFilterFunclet, _TEXT