[llvm] cmpxchg16b uses pointer from overwritten rbx #119959

vasama · 2024-12-14T09:19:45Z

Reduced IR:

target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "amd64-pc-windows-msvc19.41.34123"

%struct.anon = type { [2 x %"struct.(anonymous namespace)::mt_shared_object"], %"class.vsm::atomic_intrusive_ptr", [48 x i8], %"struct.std::atomic.3", [56 x i8], %"struct.std::atomic_flag", [60 x i8] }
%"struct.(anonymous namespace)::mt_shared_object" = type { %"class.vsm::detail::basic_intrusive_refcount", %"struct.std::atomic_flag", ptr, [40 x i8] }
%"class.vsm::detail::basic_intrusive_refcount" = type { %"struct.vsm::detail::intrusive_refcount_base" }
%"struct.vsm::detail::intrusive_refcount_base" = type { %"class.vsm::atomic" }
%"class.vsm::atomic" = type { i64 }
%"class.vsm::atomic_intrusive_ptr" = type { %"class.vsm::atomic.2" }
%"class.vsm::atomic.2" = type { %"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" }
%"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" = type { ptr, i64 }
%"struct.std::atomic.3" = type { %"struct.std::_Atomic_integral_facade.4" }
%"struct.std::_Atomic_integral_facade.4" = type { %"struct.std::_Atomic_integral.5" }
%"struct.std::_Atomic_integral.5" = type { %"struct.std::_Atomic_storage.6" }
%"struct.std::_Atomic_storage.6" = type { %"struct.std::_Atomic_padded.7" }
%"struct.std::_Atomic_padded.7" = type { i64 }
%"struct.std::atomic_flag" = type { %"struct.std::atomic" }
%"struct.std::atomic" = type { %"struct.std::_Atomic_integral_facade" }
%"struct.std::_Atomic_integral_facade" = type { %"struct.std::_Atomic_integral" }
%"struct.std::_Atomic_integral" = type { %"struct.std::_Atomic_storage" }
%"struct.std::_Atomic_storage" = type { %"struct.std::_Atomic_padded" }
%"struct.std::_Atomic_padded" = type { i32 }

define fastcc void @"?test_case@?A0x7E1854EA@@YAXXZ"() #0 personality ptr @__CxxFrameHandler3 {
  %1 = alloca [0 x [0 x %struct.anon]], i32 0, align 64
  %2 = cmpxchg ptr %1, i128 0, i128 0 monotonic monotonic, align 16
  invoke void @"?_Throw_Cpp_error@std@@YAXH@Z"(i32 0)
          to label %3 unwind label %4

3:                                                ; preds = %0
  unreachable

4:                                                ; preds = %0
  %5 = cleanuppad within none []
  ret void
}

declare i32 @__CxxFrameHandler3(...)

declare void @"?_Throw_Cpp_error@std@@YAXH@Z"()

; uselistorder directives
uselistorder i32 0, { 1, 0 }

attributes #0 = { "target-cpu"="nehalem" }

Here is the resulting object code:
(clang-19 -cc1 -emit-obj -triple "amd64-pc-windows-msvc19.41.34123" -O3 reduced.ll -o - | llvm-objdump-19 -M intel -d -)

0000000000000000 <?test_case@?A0x7E1854EA@@YAXXZ>:
       0: 55                            push    rbp
       1: 53                            push    rbx
       2: 48 83 ec 68                   sub     rsp, 0x68
       6: 48 8d 6c 24 60                lea     rbp, [rsp + 0x60]
       b: 48 83 e4 c0                   and     rsp, -0x40
       f: 48 89 e3                      mov     rbx, rsp
      12: 48 89 6b 58                   mov     qword ptr [rbx + 0x58], rbp
      16: 48 c7 45 00 fe ff ff ff       mov     qword ptr [rbp], -0x2
      1e: 49 89 d8                      mov     r8, rbx
      21: 45 31 c9                      xor     r9d, r9d
      24: 31 c0                         xor     eax, eax
      26: 31 d2                         xor     edx, edx
      28: 31 c9                         xor     ecx, ecx
      2a: 4c 89 cb                      mov     rbx, r9
      2d: f0                            lock
      2e: 48 0f c7 4b 40                cmpxchg16b      xmmword ptr [rbx + 0x40]
      33: 4c 89 c3                      mov     rbx, r8
      36: 31 c9                         xor     ecx, ecx
      38: e8 00 00 00 00                call    0x3d <?test_case@?A0x7E1854EA@@YAXXZ+0x3d>
      3d: cc                            int3
      3e: 66 90                         nop

Note mov rbx, r9 followed by cmpxchg16b xmmword ptr [rbx + 0x40] where rbx is used after having just been overwritten for the purposes of cmpxchg16b which uses it as an input register.

The original unreduced input produces slightly different object code but has the same problem:

00007FF786689459  lea         r8,[rbx+100h]  
00007FF786689460  mov         rax,qword ptr [rbx+140h]  
00007FF786689467  mov         rdx,qword ptr [rbx+148h]  
00007FF78668946E  nop  
00007FF786689470  mov         r9,rbx  
00007FF786689473  xor         ecx,ecx  
00007FF786689475  mov         rbx,r8  
00007FF786689478  lock cmpxchg16b oword ptr [rbx+140h]

The text was updated successfully, but these errors were encountered:

llvmbot · 2024-12-14T12:05:55Z

@llvm/issue-subscribers-backend-x86

Author: Lauri Vasama (vasama)

Reduced IR:

target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "amd64-pc-windows-msvc19.41.34123"

%struct.anon = type { [2 x %"struct.(anonymous namespace)::mt_shared_object"], %"class.vsm::atomic_intrusive_ptr", [48 x i8], %"struct.std::atomic.3", [56 x i8], %"struct.std::atomic_flag", [60 x i8] }
%"struct.(anonymous namespace)::mt_shared_object" = type { %"class.vsm::detail::basic_intrusive_refcount", %"struct.std::atomic_flag", ptr, [40 x i8] }
%"class.vsm::detail::basic_intrusive_refcount" = type { %"struct.vsm::detail::intrusive_refcount_base" }
%"struct.vsm::detail::intrusive_refcount_base" = type { %"class.vsm::atomic" }
%"class.vsm::atomic" = type { i64 }
%"class.vsm::atomic_intrusive_ptr" = type { %"class.vsm::atomic.2" }
%"class.vsm::atomic.2" = type { %"struct.vsm::atomic_intrusive_ptr&lt;(anonymous namespace)::mt_shared_object&gt;::atom" }
%"struct.vsm::atomic_intrusive_ptr&lt;(anonymous namespace)::mt_shared_object&gt;::atom" = type { ptr, i64 }
%"struct.std::atomic.3" = type { %"struct.std::_Atomic_integral_facade.4" }
%"struct.std::_Atomic_integral_facade.4" = type { %"struct.std::_Atomic_integral.5" }
%"struct.std::_Atomic_integral.5" = type { %"struct.std::_Atomic_storage.6" }
%"struct.std::_Atomic_storage.6" = type { %"struct.std::_Atomic_padded.7" }
%"struct.std::_Atomic_padded.7" = type { i64 }
%"struct.std::atomic_flag" = type { %"struct.std::atomic" }
%"struct.std::atomic" = type { %"struct.std::_Atomic_integral_facade" }
%"struct.std::_Atomic_integral_facade" = type { %"struct.std::_Atomic_integral" }
%"struct.std::_Atomic_integral" = type { %"struct.std::_Atomic_storage" }
%"struct.std::_Atomic_storage" = type { %"struct.std::_Atomic_padded" }
%"struct.std::_Atomic_padded" = type { i32 }

define fastcc void @"?test_case@?A0x7E1854EA@@<!-- -->YAXXZ"() #<!-- -->0 personality ptr @<!-- -->__CxxFrameHandler3 {
  %1 = alloca [0 x [0 x %struct.anon]], i32 0, align 64
  %2 = cmpxchg ptr %1, i128 0, i128 0 monotonic monotonic, align 16
  invoke void @"?_Throw_Cpp_error@<!-- -->std@@<!-- -->YAXH@<!-- -->Z"(i32 0)
          to label %3 unwind label %4

3:                                                ; preds = %0
  unreachable

4:                                                ; preds = %0
  %5 = cleanuppad within none []
  ret void
}

declare i32 @<!-- -->__CxxFrameHandler3(...)

declare void @"?_Throw_Cpp_error@<!-- -->std@@<!-- -->YAXH@<!-- -->Z"()

; uselistorder directives
uselistorder i32 0, { 1, 0 }

attributes #<!-- -->0 = { "target-cpu"="nehalem" }

Here is the resulting object code:
(clang-19 -cc1 -emit-obj -triple "amd64-pc-windows-msvc19.41.34123" -O3 reduced.ll -o - | llvm-objdump-19 -M intel -d -)

0000000000000000 &lt;?test_case@?A0x7E1854EA@@<!-- -->YAXXZ&gt;:
       0: 55                            push    rbp
       1: 53                            push    rbx
       2: 48 83 ec 68                   sub     rsp, 0x68
       6: 48 8d 6c 24 60                lea     rbp, [rsp + 0x60]
       b: 48 83 e4 c0                   and     rsp, -0x40
       f: 48 89 e3                      mov     rbx, rsp
      12: 48 89 6b 58                   mov     qword ptr [rbx + 0x58], rbp
      16: 48 c7 45 00 fe ff ff ff       mov     qword ptr [rbp], -0x2
      1e: 49 89 d8                      mov     r8, rbx
      21: 45 31 c9                      xor     r9d, r9d
      24: 31 c0                         xor     eax, eax
      26: 31 d2                         xor     edx, edx
      28: 31 c9                         xor     ecx, ecx
      2a: 4c 89 cb                      mov     rbx, r9
      2d: f0                            lock
      2e: 48 0f c7 4b 40                cmpxchg16b      xmmword ptr [rbx + 0x40]
      33: 4c 89 c3                      mov     rbx, r8
      36: 31 c9                         xor     ecx, ecx
      38: e8 00 00 00 00                call    0x3d &lt;?test_case@?A0x7E1854EA@@<!-- -->YAXXZ+0x3d&gt;
      3d: cc                            int3
      3e: 66 90                         nop

Note mov rbx, r9 followed by cmpxchg16b xmmword ptr [rbx + 0x40] where rbx is used after having just been overwritten for the purposes of cmpxchg16b which uses it as an input register.

The original unreduced input produces slightly different object code but has the same problem:

00007FF786689459  lea         r8,[rbx+100h]  
00007FF786689460  mov         rax,qword ptr [rbx+140h]  
00007FF786689467  mov         rdx,qword ptr [rbx+148h]  
00007FF78668946E  nop  
00007FF786689470  mov         r9,rbx  
00007FF786689473  xor         ecx,ecx  
00007FF786689475  mov         rbx,r8  
00007FF786689478  lock cmpxchg16b oword ptr [rbx+140h]

github-actions bot added the new issue label Dec 14, 2024

RKSimon added backend:X86 miscompilation and removed new issue labels Dec 14, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[llvm] cmpxchg16b uses pointer from overwritten rbx #119959

[llvm] cmpxchg16b uses pointer from overwritten rbx #119959

vasama commented Dec 14, 2024

llvmbot commented Dec 14, 2024

[llvm] cmpxchg16b uses pointer from overwritten rbx #119959

[llvm] cmpxchg16b uses pointer from overwritten rbx #119959

Comments

vasama commented Dec 14, 2024

llvmbot commented Dec 14, 2024