From 06d10b8d1a0f4bbd592325e1a0f284fd61100b91 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Sun, 7 Jan 2024 13:42:59 -0500 Subject: [PATCH 1/4] [CodeGen][X86] Fix lowering of tailcalls when `-ms-hotpatch` is used Previously, tail jump pseudo-opcodes were skipped by the `encodeInstruction()` call inside `X86AsmPrinter::LowerPATCHABLE_OP`. This caused emission of a 2-byte NOP and dropping of the tail jump Also, the `PatchableFunction` pass didn't properly update the call site info for the existing MachineInstr, after wrapping it into a `TargetOpcode::PATCHABLE_OP`. --- llvm/lib/CodeGen/PatchableFunction.cpp | 3 + llvm/lib/Target/X86/X86MCInstLower.cpp | 8 ++- .../X86/patchable-prologue-tailcall.ll | 69 +++++++++++++++++++ 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp index 9449f143366f0f..768e8dff2efc24 100644 --- a/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/llvm/lib/CodeGen/PatchableFunction.cpp @@ -87,6 +87,9 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { for (auto &MO : FirstActualI->operands()) MIB.add(MO); + if (FirstActualI->shouldUpdateCallSiteInfo()) + MF.eraseCallSiteInfo(&*FirstActualI); + FirstActualI->eraseFromParent(); MF.ensureAlignment(Align(16)); return true; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index e1a67f61e76640..93aa0fc23eb9f2 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -959,7 +959,8 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, bool EmptyInst = (Opcode == TargetOpcode::PATCHABLE_OP); MCInst MCI; - MCI.setOpcode(Opcode); + // Make sure below we don't encode pseudo tailcalls. + MCI.setOpcode(convertTailJumpOpcode(Opcode)); for (auto &MO : drop_begin(MI.operands(), 2)) if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) MCI.addOperand(*MaybeOperand); @@ -994,8 +995,11 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, (void)NopSize; } } - if (!EmptyInst) + if (!EmptyInst) { + if (Opcode != convertTailJumpOpcode(Opcode)) + OutStreamer->AddComment("TAILCALL"); OutStreamer->emitInstruction(MCI, getSubtargetInfo()); + } } // Lower a stackmap of the form: diff --git a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll new file mode 100644 index 00000000000000..ab3ff5852a74b0 --- /dev/null +++ b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll @@ -0,0 +1,69 @@ +; RUN: llc -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK + +; CHECK: "?mi_new_test@@YAPEAX_K@Z": +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: jmp mi_new + +; CHECK: "?builtin_malloc_test@@YAPEAX_K@Z": +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: jmp malloc + +; // Built with: clang-cl.exe /c patchable-prologue-tailcall.cpp /O2 /hotpatch -Xclang -emit-llvm +; +; typedef unsigned long long size_t; +; +; extern "C" { +; void* mi_new(size_t size); +; } +; +; void *mi_new_test(size_t count) +; { +; return mi_new(count); +; } +; +; void *builtin_malloc_test(size_t count) +; { +; return __builtin_malloc(count); +; } + +; ModuleID = 'patchable-prologue-tailcall.cpp' +source_filename = "patchable-prologue-tailcall.cpp" +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc19.38.33133" + +; Function Attrs: mustprogress nounwind sspstrong uwtable +define dso_local noundef ptr @"?mi_new_test@@YAPEAX_K@Z"(i64 noundef %count) local_unnamed_addr #0 { +entry: + %call = tail call ptr @mi_new(i64 noundef %count) #4 + ret ptr %call +} + +declare dso_local ptr @mi_new(i64 noundef) local_unnamed_addr #1 + +; Function Attrs: mustprogress nofree nounwind sspstrong willreturn memory(inaccessiblemem: readwrite) uwtable +define dso_local noalias noundef ptr @"?builtin_malloc_test@@YAPEAX_K@Z"(i64 noundef %count) local_unnamed_addr #2 { +entry: + %call = tail call ptr @malloc(i64 noundef %count) #4 + ret ptr %call +} + +; Function Attrs: mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) +declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #3 + +attributes #0 = { mustprogress nounwind sspstrong uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "patchable-function"="prologue-short-redirect" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { mustprogress nofree nounwind sspstrong willreturn memory(inaccessiblemem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "patchable-function"="prologue-short-redirect" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind } + +!llvm.linker.options = !{!0, !1} +!llvm.module.flags = !{!2, !3, !4, !5} +!llvm.ident = !{!6} + +!0 = !{!"/DEFAULTLIB:libcmt.lib"} +!1 = !{!"/DEFAULTLIB:oldnames.lib"} +!2 = !{i32 1, !"wchar_size", i32 2} +!3 = !{i32 8, !"PIC Level", i32 2} +!4 = !{i32 7, !"uwtable", i32 2} +!5 = !{i32 1, !"MaxTLSAlign", i32 65536} +!6 = !{!"clang version 18.0.0git (https://github.com/llvm/llvm-project.git 0ccf9e29e9626a3a6813b35608c8959178c50a6f)"} From 7ef9c22d2f5c62070be981675b74d9d379aa7ce2 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Wed, 10 Jan 2024 08:42:45 -0500 Subject: [PATCH 2/4] Change how `"patchable-function"="prologue-short-redirect"`works. Instead of `PATCHABLE_OP` wrapping the first `MachineInstr`, it now inserts itself before it, leaving the instruction unaltered. At lowering time in `X86AsmPrinter`, we now "look ahead" for the next non-pseudo `MachineInstr` and lower+encode it, to inspect its size. If the size is below what `PATCHABLE_OP` expects, it inserts NOPs; otherwise it does nothing --- llvm/include/llvm/Support/TargetOpcodes.def | 11 +--- llvm/lib/CodeGen/PatchableFunction.cpp | 55 ++++--------------- llvm/lib/Target/X86/X86MCInstLower.cpp | 41 +++++--------- .../X86/patchable-prologue-tailcall.ll | 42 +++++--------- llvm/test/CodeGen/X86/patchable-prologue.ll | 4 +- 5 files changed, 46 insertions(+), 107 deletions(-) diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 3824b1c66951e8..669cac2a9f7e65 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -170,15 +170,10 @@ HANDLE_TARGET_OPCODE(LOCAL_ESCAPE) /// comparisons into existing memory operations. HANDLE_TARGET_OPCODE(FAULTING_OP) -/// Wraps a machine instruction to add patchability constraints. An -/// instruction wrapped in PATCHABLE_OP has to either have a minimum +/// Precedes a machine instruction to add patchability constraints. An +/// instruction after PATCHABLE_OP has to either have a minimum /// size or be preceded with a nop of that size. The first operand is -/// an immediate denoting the minimum size of the instruction, the -/// second operand is an immediate denoting the opcode of the original -/// instruction. The rest of the operands are the operands of the -/// original instruction. -/// PATCHABLE_OP can be used as second operand to only insert a nop of -/// required size. +/// an immediate denoting the minimum size of the instruction. HANDLE_TARGET_OPCODE(PATCHABLE_OP) /// This is a marker instruction which gets translated into a nop sled, useful diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp index 768e8dff2efc24..75c2dfeca58d5a 100644 --- a/llvm/lib/CodeGen/PatchableFunction.cpp +++ b/llvm/lib/CodeGen/PatchableFunction.cpp @@ -38,61 +38,28 @@ struct PatchableFunction : public MachineFunctionPass { } bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) { + MachineBasicBlock &FirstMBB = *MF.begin(); + if (MF.getFunction().hasFnAttribute("patchable-function-entry")) { - MachineBasicBlock &FirstMBB = *MF.begin(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); // The initial .loc covers PATCHABLE_FUNCTION_ENTER. BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(), TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER)); return true; - } - - if (!MF.getFunction().hasFnAttribute("patchable-function")) - return false; - + } else if (MF.getFunction().hasFnAttribute("patchable-function")) { #ifndef NDEBUG - Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function"); - StringRef PatchType = PatchAttr.getValueAsString(); - assert(PatchType == "prologue-short-redirect" && "Only possibility today!"); + Attribute PatchAttr = MF.getFunction().getFnAttribute("patchable-function"); + StringRef PatchType = PatchAttr.getValueAsString(); + assert(PatchType == "prologue-short-redirect" && "Only possibility today!"); #endif - - auto &FirstMBB = *MF.begin(); - auto *TII = MF.getSubtarget().getInstrInfo(); - - MachineBasicBlock::iterator FirstActualI = llvm::find_if( - FirstMBB, [](const MachineInstr &MI) { return !MI.isMetaInstruction(); }); - - if (FirstActualI == FirstMBB.end()) { - // As of Microsoft documentation on /hotpatch feature, we must ensure that - // "the first instruction of each function is at least two bytes, and no - // jump within the function goes to the first instruction" - - // When the first MBB is empty, insert a patchable no-op. This ensures the - // first instruction is patchable in two special cases: - // - the function is empty (e.g. unreachable) - // - the function jumps back to the first instruction, which is in a - // successor MBB. - BuildMI(&FirstMBB, DebugLoc(), TII->get(TargetOpcode::PATCHABLE_OP)) - .addImm(2) - .addImm(TargetOpcode::PATCHABLE_OP); + auto *TII = MF.getSubtarget().getInstrInfo(); + BuildMI(FirstMBB, FirstMBB.begin(), DebugLoc(), + TII->get(TargetOpcode::PATCHABLE_OP)) + .addImm(2); MF.ensureAlignment(Align(16)); return true; } - - auto MIB = BuildMI(FirstMBB, FirstActualI, FirstActualI->getDebugLoc(), - TII->get(TargetOpcode::PATCHABLE_OP)) - .addImm(2) - .addImm(FirstActualI->getOpcode()); - - for (auto &MO : FirstActualI->operands()) - MIB.add(MO); - - if (FirstActualI->shouldUpdateCallSiteInfo()) - MF.eraseCallSiteInfo(&*FirstActualI); - - FirstActualI->eraseFromParent(); - MF.ensureAlignment(Align(16)); - return true; + return false; } char PatchableFunction::ID = 0; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 93aa0fc23eb9f2..1122d31593c4ef 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -948,25 +948,26 @@ void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) { void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, X86MCInstLower &MCIL) { - // PATCHABLE_OP minsize, opcode, operands + // PATCHABLE_OP minsize NoAutoPaddingScope NoPadScope(*OutStreamer); - unsigned MinSize = MI.getOperand(0).getImm(); - unsigned Opcode = MI.getOperand(1).getImm(); - // Opcode PATCHABLE_OP is a special case: there is no instruction to wrap, - // simply emit a nop of size MinSize. - bool EmptyInst = (Opcode == TargetOpcode::PATCHABLE_OP); - - MCInst MCI; - // Make sure below we don't encode pseudo tailcalls. - MCI.setOpcode(convertTailJumpOpcode(Opcode)); - for (auto &MO : drop_begin(MI.operands(), 2)) - if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO)) - MCI.addOperand(*MaybeOperand); + // Find the next MachineInstr in this MBB. + const MachineInstr *NextMI = MI.getNextNode(); + while (NextMI) { + if (!NextMI->isMetaInstruction()) + break; + NextMI = NextMI->getNextNode(); + } SmallString<256> Code; - if (!EmptyInst) { + unsigned MinSize = MI.getOperand(0).getImm(); + + if (NextMI) { + // Lower the next MachineInstr to find its byte size. + MCInst MCI; + MCIL.Lower(NextMI, MCI); + SmallVector Fixups; CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo()); } @@ -982,24 +983,12 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, OutStreamer->emitInstruction( MCInstBuilder(X86::MOV32rr_REV).addReg(X86::EDI).addReg(X86::EDI), *Subtarget); - } else if (MinSize == 2 && Opcode == X86::PUSH64r) { - // This is an optimization that lets us get away without emitting a nop in - // many cases. - // - // NB! In some cases the encoding for PUSH64r (e.g. PUSH64r %r9) takes two - // bytes too, so the check on MinSize is important. - MCI.setOpcode(X86::PUSH64rmr); } else { unsigned NopSize = emitNop(*OutStreamer, MinSize, Subtarget); assert(NopSize == MinSize && "Could not implement MinSize!"); (void)NopSize; } } - if (!EmptyInst) { - if (Opcode != convertTailJumpOpcode(Opcode)) - OutStreamer->AddComment("TAILCALL"); - OutStreamer->emitInstruction(MCI, getSubtargetInfo()); - } } // Lower a stackmap of the form: diff --git a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll index ab3ff5852a74b0..38a1ce92ff5588 100644 --- a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll +++ b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll @@ -2,11 +2,11 @@ ; CHECK: "?mi_new_test@@YAPEAX_K@Z": ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: jmp mi_new +; CHECK-NEXT: jmp mi_new # TAILCALL ; CHECK: "?builtin_malloc_test@@YAPEAX_K@Z": ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: jmp malloc +; CHECK-NEXT: jmp malloc # TAILCALL ; // Built with: clang-cl.exe /c patchable-prologue-tailcall.cpp /O2 /hotpatch -Xclang -emit-llvm ; @@ -26,44 +26,30 @@ ; return __builtin_malloc(count); ; } -; ModuleID = 'patchable-prologue-tailcall.cpp' -source_filename = "patchable-prologue-tailcall.cpp" target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.38.33133" -; Function Attrs: mustprogress nounwind sspstrong uwtable -define dso_local noundef ptr @"?mi_new_test@@YAPEAX_K@Z"(i64 noundef %count) local_unnamed_addr #0 { +define dso_local noundef ptr @"?mi_new_test@@YAPEAX_K@Z"(i64 noundef %count) local_unnamed_addr "patchable-function"="prologue-short-redirect" { entry: - %call = tail call ptr @mi_new(i64 noundef %count) #4 + %call = tail call ptr @mi_new(i64 noundef %count) ret ptr %call } -declare dso_local ptr @mi_new(i64 noundef) local_unnamed_addr #1 +declare dso_local ptr @mi_new(i64 noundef) local_unnamed_addr -; Function Attrs: mustprogress nofree nounwind sspstrong willreturn memory(inaccessiblemem: readwrite) uwtable -define dso_local noalias noundef ptr @"?builtin_malloc_test@@YAPEAX_K@Z"(i64 noundef %count) local_unnamed_addr #2 { +define dso_local noalias noundef ptr @"?builtin_malloc_test@@YAPEAX_K@Z"(i64 noundef %count) local_unnamed_addr "patchable-function"="prologue-short-redirect" { entry: - %call = tail call ptr @malloc(i64 noundef %count) #4 + %call = tail call ptr @malloc(i64 noundef %count) ret ptr %call } -; Function Attrs: mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) -declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #3 +declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #0 -attributes #0 = { mustprogress nounwind sspstrong uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "patchable-function"="prologue-short-redirect" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { mustprogress nofree nounwind sspstrong willreturn memory(inaccessiblemem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "patchable-function"="prologue-short-redirect" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #3 = { mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #4 = { nounwind } +attributes #0 = { allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" } -!llvm.linker.options = !{!0, !1} -!llvm.module.flags = !{!2, !3, !4, !5} -!llvm.ident = !{!6} +!llvm.module.flags = !{!0, !1, !2, !3} -!0 = !{!"/DEFAULTLIB:libcmt.lib"} -!1 = !{!"/DEFAULTLIB:oldnames.lib"} -!2 = !{i32 1, !"wchar_size", i32 2} -!3 = !{i32 8, !"PIC Level", i32 2} -!4 = !{i32 7, !"uwtable", i32 2} -!5 = !{i32 1, !"MaxTLSAlign", i32 65536} -!6 = !{!"clang version 18.0.0git (https://github.com/llvm/llvm-project.git 0ccf9e29e9626a3a6813b35608c8959178c50a6f)"} +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"uwtable", i32 2} +!3 = !{i32 1, !"MaxTLSAlign", i32 65536} diff --git a/llvm/test/CodeGen/X86/patchable-prologue.ll b/llvm/test/CodeGen/X86/patchable-prologue.ll index f1a39777a40bcc..71a392845fdea3 100644 --- a/llvm/test/CodeGen/X86/patchable-prologue.ll +++ b/llvm/test/CodeGen/X86/patchable-prologue.ll @@ -32,7 +32,8 @@ define void @f0() "patchable-function"="prologue-short-redirect" { define void @f1() "patchable-function"="prologue-short-redirect" "frame-pointer"="all" { ; CHECK-LABEL: _f1 -; CHECK-NEXT: ff f5 pushq %rbp +; CHECK-NEXT: 66 90 nop +; CHECK-NEXT: 55 pushq %rbp ; CHECK-ALIGN: .p2align 4, 0x90 ; CHECK-ALIGN: _f1: @@ -47,6 +48,7 @@ define void @f1() "patchable-function"="prologue-short-redirect" "frame-pointer" ; 64: f1: ; 64-NEXT: .seh_proc f1 ; 64-NEXT: # %bb.0: +; 64-NEXT: xchgw %ax, %ax ; 64-NEXT: pushq %rbp ret void From f83ecee291542bdc4008b26fc323f0ebe32a6742 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Wed, 10 Jan 2024 08:47:33 -0500 Subject: [PATCH 3/4] Documentation. --- llvm/include/llvm/Support/TargetOpcodes.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 669cac2a9f7e65..a3c38a56561c1a 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -173,7 +173,7 @@ HANDLE_TARGET_OPCODE(FAULTING_OP) /// Precedes a machine instruction to add patchability constraints. An /// instruction after PATCHABLE_OP has to either have a minimum /// size or be preceded with a nop of that size. The first operand is -/// an immediate denoting the minimum size of the instruction. +/// an immediate denoting the minimum size of the following instruction. HANDLE_TARGET_OPCODE(PATCHABLE_OP) /// This is a marker instruction which gets translated into a nop sled, useful From 342c4d4bd0ab6eb32dd1e2fe39eb10eefbc0a474 Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Mon, 22 Jan 2024 10:33:08 -0500 Subject: [PATCH 4/4] Simplify test and change code as suggested --- llvm/lib/Target/X86/X86MCInstLower.cpp | 14 +++---- .../X86/patchable-prologue-tailcall.ll | 41 +++++-------------- 2 files changed, 15 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 1122d31593c4ef..e4d1d13cc31597 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -952,21 +952,17 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI, NoAutoPaddingScope NoPadScope(*OutStreamer); - // Find the next MachineInstr in this MBB. - const MachineInstr *NextMI = MI.getNextNode(); - while (NextMI) { - if (!NextMI->isMetaInstruction()) - break; - NextMI = NextMI->getNextNode(); - } + auto NextMI = std::find_if(std::next(MI.getIterator()), + MI.getParent()->end().getInstrIterator(), + [](auto &II) { return !II.isMetaInstruction(); }); SmallString<256> Code; unsigned MinSize = MI.getOperand(0).getImm(); - if (NextMI) { + if (NextMI != MI.getParent()->end()) { // Lower the next MachineInstr to find its byte size. MCInst MCI; - MCIL.Lower(NextMI, MCI); + MCIL.Lower(&*NextMI, MCI); SmallVector Fixups; CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo()); diff --git a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll index 38a1ce92ff5588..5e55524fecc9a1 100644 --- a/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll +++ b/llvm/test/CodeGen/X86/patchable-prologue-tailcall.ll @@ -1,49 +1,28 @@ -; RUN: llc -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -verify-machineinstrs -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=CHECK -; CHECK: "?mi_new_test@@YAPEAX_K@Z": +; CHECK: f1: ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: jmp mi_new # TAILCALL +; CHECK-NEXT: jmp f0 # TAILCALL -; CHECK: "?builtin_malloc_test@@YAPEAX_K@Z": +; CHECK: f2: ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: jmp malloc # TAILCALL -; // Built with: clang-cl.exe /c patchable-prologue-tailcall.cpp /O2 /hotpatch -Xclang -emit-llvm -; -; typedef unsigned long long size_t; -; -; extern "C" { -; void* mi_new(size_t size); -; } -; -; void *mi_new_test(size_t count) -; { -; return mi_new(count); -; } -; -; void *builtin_malloc_test(size_t count) -; { -; return __builtin_malloc(count); -; } - -target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" -target triple = "x86_64-pc-windows-msvc19.38.33133" - -define dso_local noundef ptr @"?mi_new_test@@YAPEAX_K@Z"(i64 noundef %count) local_unnamed_addr "patchable-function"="prologue-short-redirect" { +define ptr @f1(i64 %count) "patchable-function"="prologue-short-redirect" { entry: - %call = tail call ptr @mi_new(i64 noundef %count) + %call = tail call ptr @f0(i64 %count) ret ptr %call } -declare dso_local ptr @mi_new(i64 noundef) local_unnamed_addr +declare ptr @f0(i64) -define dso_local noalias noundef ptr @"?builtin_malloc_test@@YAPEAX_K@Z"(i64 noundef %count) local_unnamed_addr "patchable-function"="prologue-short-redirect" { +define noalias ptr @f2(i64 %count) "patchable-function"="prologue-short-redirect" { entry: - %call = tail call ptr @malloc(i64 noundef %count) + %call = tail call ptr @malloc(i64 %count) ret ptr %call } -declare dso_local noalias noundef ptr @malloc(i64 noundef) local_unnamed_addr #0 +declare noalias ptr @malloc(i64) #0 attributes #0 = { allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" }