Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[llvm] cmpxchg16b uses pointer from overwritten rbx #119959

Open
vasama opened this issue Dec 14, 2024 · 1 comment
Open

[llvm] cmpxchg16b uses pointer from overwritten rbx #119959

vasama opened this issue Dec 14, 2024 · 1 comment

Comments

@vasama
Copy link

vasama commented Dec 14, 2024

Reduced IR:

target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "amd64-pc-windows-msvc19.41.34123"

%struct.anon = type { [2 x %"struct.(anonymous namespace)::mt_shared_object"], %"class.vsm::atomic_intrusive_ptr", [48 x i8], %"struct.std::atomic.3", [56 x i8], %"struct.std::atomic_flag", [60 x i8] }
%"struct.(anonymous namespace)::mt_shared_object" = type { %"class.vsm::detail::basic_intrusive_refcount", %"struct.std::atomic_flag", ptr, [40 x i8] }
%"class.vsm::detail::basic_intrusive_refcount" = type { %"struct.vsm::detail::intrusive_refcount_base" }
%"struct.vsm::detail::intrusive_refcount_base" = type { %"class.vsm::atomic" }
%"class.vsm::atomic" = type { i64 }
%"class.vsm::atomic_intrusive_ptr" = type { %"class.vsm::atomic.2" }
%"class.vsm::atomic.2" = type { %"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" }
%"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" = type { ptr, i64 }
%"struct.std::atomic.3" = type { %"struct.std::_Atomic_integral_facade.4" }
%"struct.std::_Atomic_integral_facade.4" = type { %"struct.std::_Atomic_integral.5" }
%"struct.std::_Atomic_integral.5" = type { %"struct.std::_Atomic_storage.6" }
%"struct.std::_Atomic_storage.6" = type { %"struct.std::_Atomic_padded.7" }
%"struct.std::_Atomic_padded.7" = type { i64 }
%"struct.std::atomic_flag" = type { %"struct.std::atomic" }
%"struct.std::atomic" = type { %"struct.std::_Atomic_integral_facade" }
%"struct.std::_Atomic_integral_facade" = type { %"struct.std::_Atomic_integral" }
%"struct.std::_Atomic_integral" = type { %"struct.std::_Atomic_storage" }
%"struct.std::_Atomic_storage" = type { %"struct.std::_Atomic_padded" }
%"struct.std::_Atomic_padded" = type { i32 }

define fastcc void @"?test_case@?A0x7E1854EA@@YAXXZ"() #0 personality ptr @__CxxFrameHandler3 {
  %1 = alloca [0 x [0 x %struct.anon]], i32 0, align 64
  %2 = cmpxchg ptr %1, i128 0, i128 0 monotonic monotonic, align 16
  invoke void @"?_Throw_Cpp_error@std@@YAXH@Z"(i32 0)
          to label %3 unwind label %4

3:                                                ; preds = %0
  unreachable

4:                                                ; preds = %0
  %5 = cleanuppad within none []
  ret void
}

declare i32 @__CxxFrameHandler3(...)

declare void @"?_Throw_Cpp_error@std@@YAXH@Z"()

; uselistorder directives
uselistorder i32 0, { 1, 0 }

attributes #0 = { "target-cpu"="nehalem" }

Here is the resulting object code:
(clang-19 -cc1 -emit-obj -triple "amd64-pc-windows-msvc19.41.34123" -O3 reduced.ll -o - | llvm-objdump-19 -M intel -d -)

0000000000000000 <?test_case@?A0x7E1854EA@@YAXXZ>:
       0: 55                            push    rbp
       1: 53                            push    rbx
       2: 48 83 ec 68                   sub     rsp, 0x68
       6: 48 8d 6c 24 60                lea     rbp, [rsp + 0x60]
       b: 48 83 e4 c0                   and     rsp, -0x40
       f: 48 89 e3                      mov     rbx, rsp
      12: 48 89 6b 58                   mov     qword ptr [rbx + 0x58], rbp
      16: 48 c7 45 00 fe ff ff ff       mov     qword ptr [rbp], -0x2
      1e: 49 89 d8                      mov     r8, rbx
      21: 45 31 c9                      xor     r9d, r9d
      24: 31 c0                         xor     eax, eax
      26: 31 d2                         xor     edx, edx
      28: 31 c9                         xor     ecx, ecx
      2a: 4c 89 cb                      mov     rbx, r9
      2d: f0                            lock
      2e: 48 0f c7 4b 40                cmpxchg16b      xmmword ptr [rbx + 0x40]
      33: 4c 89 c3                      mov     rbx, r8
      36: 31 c9                         xor     ecx, ecx
      38: e8 00 00 00 00                call    0x3d <?test_case@?A0x7E1854EA@@YAXXZ+0x3d>
      3d: cc                            int3
      3e: 66 90                         nop

Note mov rbx, r9 followed by cmpxchg16b xmmword ptr [rbx + 0x40] where rbx is used after having just been overwritten for the purposes of cmpxchg16b which uses it as an input register.

The original unreduced input produces slightly different object code but has the same problem:

00007FF786689459  lea         r8,[rbx+100h]  
00007FF786689460  mov         rax,qword ptr [rbx+140h]  
00007FF786689467  mov         rdx,qword ptr [rbx+148h]  
00007FF78668946E  nop  
00007FF786689470  mov         r9,rbx  
00007FF786689473  xor         ecx,ecx  
00007FF786689475  mov         rbx,r8  
00007FF786689478  lock cmpxchg16b oword ptr [rbx+140h]
@llvmbot
Copy link
Member

llvmbot commented Dec 14, 2024

@llvm/issue-subscribers-backend-x86

Author: Lauri Vasama (vasama)

Reduced IR:
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "amd64-pc-windows-msvc19.41.34123"

%struct.anon = type { [2 x %"struct.(anonymous namespace)::mt_shared_object"], %"class.vsm::atomic_intrusive_ptr", [48 x i8], %"struct.std::atomic.3", [56 x i8], %"struct.std::atomic_flag", [60 x i8] }
%"struct.(anonymous namespace)::mt_shared_object" = type { %"class.vsm::detail::basic_intrusive_refcount", %"struct.std::atomic_flag", ptr, [40 x i8] }
%"class.vsm::detail::basic_intrusive_refcount" = type { %"struct.vsm::detail::intrusive_refcount_base" }
%"struct.vsm::detail::intrusive_refcount_base" = type { %"class.vsm::atomic" }
%"class.vsm::atomic" = type { i64 }
%"class.vsm::atomic_intrusive_ptr" = type { %"class.vsm::atomic.2" }
%"class.vsm::atomic.2" = type { %"struct.vsm::atomic_intrusive_ptr&lt;(anonymous namespace)::mt_shared_object&gt;::atom" }
%"struct.vsm::atomic_intrusive_ptr&lt;(anonymous namespace)::mt_shared_object&gt;::atom" = type { ptr, i64 }
%"struct.std::atomic.3" = type { %"struct.std::_Atomic_integral_facade.4" }
%"struct.std::_Atomic_integral_facade.4" = type { %"struct.std::_Atomic_integral.5" }
%"struct.std::_Atomic_integral.5" = type { %"struct.std::_Atomic_storage.6" }
%"struct.std::_Atomic_storage.6" = type { %"struct.std::_Atomic_padded.7" }
%"struct.std::_Atomic_padded.7" = type { i64 }
%"struct.std::atomic_flag" = type { %"struct.std::atomic" }
%"struct.std::atomic" = type { %"struct.std::_Atomic_integral_facade" }
%"struct.std::_Atomic_integral_facade" = type { %"struct.std::_Atomic_integral" }
%"struct.std::_Atomic_integral" = type { %"struct.std::_Atomic_storage" }
%"struct.std::_Atomic_storage" = type { %"struct.std::_Atomic_padded" }
%"struct.std::_Atomic_padded" = type { i32 }

define fastcc void @"?test_case@?A0x7E1854EA@@<!-- -->YAXXZ"() #<!-- -->0 personality ptr @<!-- -->__CxxFrameHandler3 {
  %1 = alloca [0 x [0 x %struct.anon]], i32 0, align 64
  %2 = cmpxchg ptr %1, i128 0, i128 0 monotonic monotonic, align 16
  invoke void @"?_Throw_Cpp_error@<!-- -->std@@<!-- -->YAXH@<!-- -->Z"(i32 0)
          to label %3 unwind label %4

3:                                                ; preds = %0
  unreachable

4:                                                ; preds = %0
  %5 = cleanuppad within none []
  ret void
}

declare i32 @<!-- -->__CxxFrameHandler3(...)

declare void @"?_Throw_Cpp_error@<!-- -->std@@<!-- -->YAXH@<!-- -->Z"()

; uselistorder directives
uselistorder i32 0, { 1, 0 }

attributes #<!-- -->0 = { "target-cpu"="nehalem" }

Here is the resulting object code:
(clang-19 -cc1 -emit-obj -triple "amd64-pc-windows-msvc19.41.34123" -O3 reduced.ll -o - | llvm-objdump-19 -M intel -d -)

0000000000000000 &lt;?test_case@?A0x7E1854EA@@<!-- -->YAXXZ&gt;:
       0: 55                            push    rbp
       1: 53                            push    rbx
       2: 48 83 ec 68                   sub     rsp, 0x68
       6: 48 8d 6c 24 60                lea     rbp, [rsp + 0x60]
       b: 48 83 e4 c0                   and     rsp, -0x40
       f: 48 89 e3                      mov     rbx, rsp
      12: 48 89 6b 58                   mov     qword ptr [rbx + 0x58], rbp
      16: 48 c7 45 00 fe ff ff ff       mov     qword ptr [rbp], -0x2
      1e: 49 89 d8                      mov     r8, rbx
      21: 45 31 c9                      xor     r9d, r9d
      24: 31 c0                         xor     eax, eax
      26: 31 d2                         xor     edx, edx
      28: 31 c9                         xor     ecx, ecx
      2a: 4c 89 cb                      mov     rbx, r9
      2d: f0                            lock
      2e: 48 0f c7 4b 40                cmpxchg16b      xmmword ptr [rbx + 0x40]
      33: 4c 89 c3                      mov     rbx, r8
      36: 31 c9                         xor     ecx, ecx
      38: e8 00 00 00 00                call    0x3d &lt;?test_case@?A0x7E1854EA@@<!-- -->YAXXZ+0x3d&gt;
      3d: cc                            int3
      3e: 66 90                         nop

Note mov rbx, r9 followed by cmpxchg16b xmmword ptr [rbx + 0x40] where rbx is used after having just been overwritten for the purposes of cmpxchg16b which uses it as an input register.

The original unreduced input produces slightly different object code but has the same problem:

00007FF786689459  lea         r8,[rbx+100h]  
00007FF786689460  mov         rax,qword ptr [rbx+140h]  
00007FF786689467  mov         rdx,qword ptr [rbx+148h]  
00007FF78668946E  nop  
00007FF786689470  mov         r9,rbx  
00007FF786689473  xor         ecx,ecx  
00007FF786689475  mov         rbx,r8  
00007FF786689478  lock cmpxchg16b oword ptr [rbx+140h]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

3 participants