-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[llvm] cmpxchg16b uses pointer from overwritten rbx #119959
Comments
@llvm/issue-subscribers-backend-x86 Author: Lauri Vasama (vasama)
Reduced IR:
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "amd64-pc-windows-msvc19.41.34123"
%struct.anon = type { [2 x %"struct.(anonymous namespace)::mt_shared_object"], %"class.vsm::atomic_intrusive_ptr", [48 x i8], %"struct.std::atomic.3", [56 x i8], %"struct.std::atomic_flag", [60 x i8] }
%"struct.(anonymous namespace)::mt_shared_object" = type { %"class.vsm::detail::basic_intrusive_refcount", %"struct.std::atomic_flag", ptr, [40 x i8] }
%"class.vsm::detail::basic_intrusive_refcount" = type { %"struct.vsm::detail::intrusive_refcount_base" }
%"struct.vsm::detail::intrusive_refcount_base" = type { %"class.vsm::atomic" }
%"class.vsm::atomic" = type { i64 }
%"class.vsm::atomic_intrusive_ptr" = type { %"class.vsm::atomic.2" }
%"class.vsm::atomic.2" = type { %"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" }
%"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" = type { ptr, i64 }
%"struct.std::atomic.3" = type { %"struct.std::_Atomic_integral_facade.4" }
%"struct.std::_Atomic_integral_facade.4" = type { %"struct.std::_Atomic_integral.5" }
%"struct.std::_Atomic_integral.5" = type { %"struct.std::_Atomic_storage.6" }
%"struct.std::_Atomic_storage.6" = type { %"struct.std::_Atomic_padded.7" }
%"struct.std::_Atomic_padded.7" = type { i64 }
%"struct.std::atomic_flag" = type { %"struct.std::atomic" }
%"struct.std::atomic" = type { %"struct.std::_Atomic_integral_facade" }
%"struct.std::_Atomic_integral_facade" = type { %"struct.std::_Atomic_integral" }
%"struct.std::_Atomic_integral" = type { %"struct.std::_Atomic_storage" }
%"struct.std::_Atomic_storage" = type { %"struct.std::_Atomic_padded" }
%"struct.std::_Atomic_padded" = type { i32 }
define fastcc void @"?test_case@?A0x7E1854EA@@<!-- -->YAXXZ"() #<!-- -->0 personality ptr @<!-- -->__CxxFrameHandler3 {
%1 = alloca [0 x [0 x %struct.anon]], i32 0, align 64
%2 = cmpxchg ptr %1, i128 0, i128 0 monotonic monotonic, align 16
invoke void @"?_Throw_Cpp_error@<!-- -->std@@<!-- -->YAXH@<!-- -->Z"(i32 0)
to label %3 unwind label %4
3: ; preds = %0
unreachable
4: ; preds = %0
%5 = cleanuppad within none []
ret void
}
declare i32 @<!-- -->__CxxFrameHandler3(...)
declare void @"?_Throw_Cpp_error@<!-- -->std@@<!-- -->YAXH@<!-- -->Z"()
; uselistorder directives
uselistorder i32 0, { 1, 0 }
attributes #<!-- -->0 = { "target-cpu"="nehalem" } Here is the resulting object code: 0000000000000000 <?test_case@?A0x7E1854EA@@<!-- -->YAXXZ>:
0: 55 push rbp
1: 53 push rbx
2: 48 83 ec 68 sub rsp, 0x68
6: 48 8d 6c 24 60 lea rbp, [rsp + 0x60]
b: 48 83 e4 c0 and rsp, -0x40
f: 48 89 e3 mov rbx, rsp
12: 48 89 6b 58 mov qword ptr [rbx + 0x58], rbp
16: 48 c7 45 00 fe ff ff ff mov qword ptr [rbp], -0x2
1e: 49 89 d8 mov r8, rbx
21: 45 31 c9 xor r9d, r9d
24: 31 c0 xor eax, eax
26: 31 d2 xor edx, edx
28: 31 c9 xor ecx, ecx
2a: 4c 89 cb mov rbx, r9
2d: f0 lock
2e: 48 0f c7 4b 40 cmpxchg16b xmmword ptr [rbx + 0x40]
33: 4c 89 c3 mov rbx, r8
36: 31 c9 xor ecx, ecx
38: e8 00 00 00 00 call 0x3d <?test_case@?A0x7E1854EA@@<!-- -->YAXXZ+0x3d>
3d: cc int3
3e: 66 90 nop Note The original unreduced input produces slightly different object code but has the same problem: 00007FF786689459 lea r8,[rbx+100h]
00007FF786689460 mov rax,qword ptr [rbx+140h]
00007FF786689467 mov rdx,qword ptr [rbx+148h]
00007FF78668946E nop
00007FF786689470 mov r9,rbx
00007FF786689473 xor ecx,ecx
00007FF786689475 mov rbx,r8
00007FF786689478 lock cmpxchg16b oword ptr [rbx+140h] |
Reduced IR:
Here is the resulting object code:
(
clang-19 -cc1 -emit-obj -triple "amd64-pc-windows-msvc19.41.34123" -O3 reduced.ll -o - | llvm-objdump-19 -M intel -d -
)Note
mov rbx, r9
followed bycmpxchg16b xmmword ptr [rbx + 0x40]
whererbx
is used after having just been overwritten for the purposes ofcmpxchg16b
which uses it as an input register.The original unreduced input produces slightly different object code but has the same problem:
The text was updated successfully, but these errors were encountered: