Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Jan 22, 2026

Also includes a kind of hacky, minimal change to avoid assertions
when softPromoteHalfType is removed to fix kernel arguments
lowered as f16. Half support was never really implemented
for r600, and there just happened to be a few incidental tests
which included a half argument (which were also not even meaningful,
since the function body just folded to nothing due to no callable
function support).

@llvmbot
Copy link
Member

llvmbot commented Jan 22, 2026

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Also includes a kind of hacky, minimal change to avoid assertions
when softPromoteHalfType is removed to fix kernel arguments
lowered as f16. Half support was never really implemented
for r600, and there just happened to be a few incidental tests
which included a half argument (which were also not even meaningful,
since the function body just folded to nothing due to no callable
function support).


Full diff: https://github.com/llvm/llvm-project/pull/177420.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/R600ISelLowering.cpp (+3)
  • (modified) llvm/lib/Target/AMDGPU/R600ISelLowering.h (-2)
  • (modified) llvm/test/CodeGen/AMDGPU/kernel-args.ll (+164)
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 33a23ffb81926..5cd7a61d2c936 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1480,6 +1480,9 @@ SDValue R600TargetLowering::LowerFormalArguments(
       MemVT = MemVT.getVectorElementType();
     }
 
+    if (VT.isInteger() && !MemVT.isInteger())
+      MemVT = MemVT.changeTypeToInteger();
+
     if (AMDGPU::isShader(CallConv)) {
       Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
       SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index bb7fc46a98cbd..661efb8684813 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -117,8 +117,6 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
 
   TargetLowering::AtomicExpansionKind
   shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override;
-
-  bool softPromoteHalfType() const override { return false; }
 };
 
 } // End namespace llvm;
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index a2da8876472ab..0a53b3a906fbe 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -6187,3 +6187,167 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
   store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
   ret void
 }
+
+define amdgpu_kernel void @f16_arg(half %arg, ptr addrspace(1) %ptr) {
+; SI-LABEL: f16_arg:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s6, s[4:5], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s6
+; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: f16_arg:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: f16_arg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s2, s[8:9], 0x0
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x8
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: f16_arg:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 36, #3
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, 0.0,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     AND_INT T0.W, KC0[2].Z, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
+; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
+; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     MOV * T0.Z, 0.0,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: f16_arg:
+; CM:       ; %bb.0:
+; CM-NEXT:    ALU 0, @8, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @6
+; CM-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 6:
+; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 36, #3
+; CM-NEXT:    ALU clause starting at 8:
+; CM-NEXT:     MOV * T0.X, 0.0,
+; CM-NEXT:    ALU clause starting at 9:
+; CM-NEXT:     AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.X, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
+; CM-NEXT:     LSHL T0.X, PV.Z, PV.W,
+; CM-NEXT:     LSHL * T0.W, literal.x, PV.W,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     MOV T0.Y, 0.0,
+; CM-NEXT:     MOV * T0.Z, 0.0,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+  store half %arg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @v2f16_arg(<2 x half> %arg, ptr addrspace(1) %ptr) {
+; SI-LABEL: v2f16_arg:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s6, s[4:5], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s6
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v2f16_arg:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v2f16_arg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s2, s[8:9], 0x0
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x8
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: v2f16_arg:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @10, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @6
+; EG-NEXT:    ALU 4, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 38, #3
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 36, #3
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.X, 0.0,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     LSHL * T0.W, T1.X, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT T0.X, T0.X, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: v2f16_arg:
+; CM:       ; %bb.0:
+; CM-NEXT:    ALU 0, @10, KC0[], KC1[]
+; CM-NEXT:    TEX 1 @6
+; CM-NEXT:    ALU 4, @11, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 6:
+; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 38, #3
+; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 36, #3
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     MOV * T0.X, 0.0,
+; CM-NEXT:    ALU clause starting at 11:
+; CM-NEXT:     LSHL * T0.W, T1.X, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     OR_INT * T0.X, T0.X, PV.W,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+  store <2 x half> %arg, ptr addrspace(1) %ptr
+  ret void
+}

@llvmbot
Copy link
Member

llvmbot commented Jan 22, 2026

@llvm/pr-subscribers-llvm-globalisel

Author: Matt Arsenault (arsenm)

Changes

Also includes a kind of hacky, minimal change to avoid assertions
when softPromoteHalfType is removed to fix kernel arguments
lowered as f16. Half support was never really implemented
for r600, and there just happened to be a few incidental tests
which included a half argument (which were also not even meaningful,
since the function body just folded to nothing due to no callable
function support).


Full diff: https://github.com/llvm/llvm-project/pull/177420.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/R600ISelLowering.cpp (+3)
  • (modified) llvm/lib/Target/AMDGPU/R600ISelLowering.h (-2)
  • (modified) llvm/test/CodeGen/AMDGPU/kernel-args.ll (+164)
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 33a23ffb81926..5cd7a61d2c936 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1480,6 +1480,9 @@ SDValue R600TargetLowering::LowerFormalArguments(
       MemVT = MemVT.getVectorElementType();
     }
 
+    if (VT.isInteger() && !MemVT.isInteger())
+      MemVT = MemVT.changeTypeToInteger();
+
     if (AMDGPU::isShader(CallConv)) {
       Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
       SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index bb7fc46a98cbd..661efb8684813 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -117,8 +117,6 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
 
   TargetLowering::AtomicExpansionKind
   shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override;
-
-  bool softPromoteHalfType() const override { return false; }
 };
 
 } // End namespace llvm;
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index a2da8876472ab..0a53b3a906fbe 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -6187,3 +6187,167 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
   store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
   ret void
 }
+
+define amdgpu_kernel void @f16_arg(half %arg, ptr addrspace(1) %ptr) {
+; SI-LABEL: f16_arg:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s6, s[4:5], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s6
+; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: f16_arg:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: f16_arg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s2, s[8:9], 0x0
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x8
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-NEXT:    global_store_short v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: f16_arg:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @8, KC0[], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 36, #3
+; EG-NEXT:    ALU clause starting at 8:
+; EG-NEXT:     MOV * T0.X, 0.0,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     AND_INT T0.W, KC0[2].Z, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T0.X, literal.y,
+; EG-NEXT:    3(4.203895e-45), 65535(9.183409e-41)
+; EG-NEXT:     LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT:     LSHL T0.X, T1.W, PV.W,
+; EG-NEXT:     LSHL * T0.W, literal.x, PV.W,
+; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT:     MOV T0.Y, 0.0,
+; EG-NEXT:     MOV * T0.Z, 0.0,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: f16_arg:
+; CM:       ; %bb.0:
+; CM-NEXT:    ALU 0, @8, KC0[], KC1[]
+; CM-NEXT:    TEX 0 @6
+; CM-NEXT:    ALU 11, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT MSKOR T0.XW, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 6:
+; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 36, #3
+; CM-NEXT:    ALU clause starting at 8:
+; CM-NEXT:     MOV * T0.X, 0.0,
+; CM-NEXT:    ALU clause starting at 9:
+; CM-NEXT:     AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT:    3(4.203895e-45), 0(0.000000e+00)
+; CM-NEXT:     AND_INT T0.Z, T0.X, literal.x,
+; CM-NEXT:     LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT:    65535(9.183409e-41), 3(4.203895e-45)
+; CM-NEXT:     LSHL T0.X, PV.Z, PV.W,
+; CM-NEXT:     LSHL * T0.W, literal.x, PV.W,
+; CM-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT:     MOV T0.Y, 0.0,
+; CM-NEXT:     MOV * T0.Z, 0.0,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+  store half %arg, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_kernel void @v2f16_arg(<2 x half> %arg, ptr addrspace(1) %ptr) {
+; SI-LABEL: v2f16_arg:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_load_dword s6, s[4:5], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    v_mov_b32_e32 v0, s6
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v2f16_arg:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x24
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    s_endpgm
+;
+; GFX9-LABEL: v2f16_arg:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dword s2, s[8:9], 0x0
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x8
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    s_endpgm
+;
+; EG-LABEL: v2f16_arg:
+; EG:       ; %bb.0:
+; EG-NEXT:    ALU 0, @10, KC0[], KC1[]
+; EG-NEXT:    TEX 1 @6
+; EG-NEXT:    ALU 4, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT:    CF_END
+; EG-NEXT:    PAD
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_16 T1.X, T0.X, 38, #3
+; EG-NEXT:     VTX_READ_16 T0.X, T0.X, 36, #3
+; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     MOV * T0.X, 0.0,
+; EG-NEXT:    ALU clause starting at 11:
+; EG-NEXT:     LSHL * T0.W, T1.X, literal.x,
+; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT:     OR_INT T0.X, T0.X, PV.W,
+; EG-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: v2f16_arg:
+; CM:       ; %bb.0:
+; CM-NEXT:    ALU 0, @10, KC0[], KC1[]
+; CM-NEXT:    TEX 1 @6
+; CM-NEXT:    ALU 4, @11, KC0[CB0:0-32], KC1[]
+; CM-NEXT:    MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT:    CF_END
+; CM-NEXT:    PAD
+; CM-NEXT:    Fetch clause starting at 6:
+; CM-NEXT:     VTX_READ_16 T1.X, T0.X, 38, #3
+; CM-NEXT:     VTX_READ_16 T0.X, T0.X, 36, #3
+; CM-NEXT:    ALU clause starting at 10:
+; CM-NEXT:     MOV * T0.X, 0.0,
+; CM-NEXT:    ALU clause starting at 11:
+; CM-NEXT:     LSHL * T0.W, T1.X, literal.x,
+; CM-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT:     OR_INT * T0.X, T0.X, PV.W,
+; CM-NEXT:     LSHR * T1.X, KC0[2].Z, literal.x,
+; CM-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+  store <2 x half> %arg, ptr addrspace(1) %ptr
+  ret void
+}

@shiltian
Copy link
Contributor

Out of scope question: do we have plans to phase out R600 support in the compiler?

@arsenm
Copy link
Contributor Author

arsenm commented Jan 22, 2026

Not really

@arsenm arsenm force-pushed the users/arsenm/amdgpu/softPromoteHalfType-r600-only branch from a2319dc to 8c1dd3a Compare January 23, 2026 08:06
@arsenm arsenm force-pushed the users/arsenm/r600/remove-softPromoteHalfType branch 2 times, most recently from 074f0b4 to febe138 Compare January 23, 2026 11:19
@arsenm arsenm force-pushed the users/arsenm/amdgpu/softPromoteHalfType-r600-only branch from 8c1dd3a to 2fa99dc Compare January 23, 2026 11:19
@github-actions
Copy link

🪟 Windows x64 Test Results

  • 126682 tests passed
  • 2468 tests skipped

All executed tests passed, but another part of the build failed. Click on a failure below to see the details.

[code=4294967295] bin/mlir-opt.exe
FAILED: [code=4294967295] bin/mlir-opt.exe
cmd.exe /C "cd . && C:\BuildTools\Common7\IDE\CommonExtensions\Microsoft\CMake\CMake\bin\cmake.exe -E vs_link_exe --intdir=tools\mlir\tools\mlir-opt\CMakeFiles\mlir-opt.dir --rc="C:\Program Files (x86)\Windows Kits\10\bin\10.0.19041.0\x64\rc.exe" --mt="C:\Program Files (x86)\Windows Kits\10\bin\10.0.19041.0\x64\mt.exe" --manifests  -- C:\clang\clang-msvc\bin\lld-link.exe /nologo @CMakeFiles\mlir-opt.rsp  /out:bin\mlir-opt.exe /implib:lib\mlir-opt.lib /pdb:bin\mlir-opt.pdb /version:0.0 /MANIFEST:NO /STACK:10000000 /INCREMENTAL:NO /subsystem:console  && cd ."
LINK: command "C:\clang\clang-msvc\bin\lld-link.exe /nologo @CMakeFiles\mlir-opt.rsp /out:bin\mlir-opt.exe /implib:lib\mlir-opt.lib /pdb:bin\mlir-opt.pdb /version:0.0 /MANIFEST:NO /STACK:10000000 /INCREMENTAL:NO /subsystem:console" failed (exit code 1) with the following output:
lld-link: error: undefined symbol: void __cdecl mlir::vector::registerTransformDialectExtension(class mlir::DialectRegistry &)
>>> referenced by MLIRRegisterAllExtensions.lib(RegisterAllExtensions.cpp.obj):(void __cdecl mlir::registerAllExtensions(class mlir::DialectRegistry &))

If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the infrastructure label.

As expected the code is much worse, but more correct.
We could do a better job with source modifier management around
fp16_to_fp/fp_to_fp16.
Also includes a kind of hacky, minimal change to avoid assertions
when softPromoteHalfType is removed to fix kernel arguments
lowered as f16. Half support was never really implemented
for r600, and there just happened to be a few incidental tests
which included a half argument (which were also not even meaningful,
since the function body just folded to nothing due to no callable
function support).
@arsenm arsenm force-pushed the users/arsenm/amdgpu/softPromoteHalfType-r600-only branch from 2fa99dc to 34cfc39 Compare January 26, 2026 14:51
@arsenm arsenm force-pushed the users/arsenm/r600/remove-softPromoteHalfType branch from febe138 to c91ea57 Compare January 26, 2026 14:51
Base automatically changed from users/arsenm/amdgpu/softPromoteHalfType-r600-only to main January 26, 2026 15:23
@arsenm arsenm merged commit 99fab01 into main Jan 26, 2026
14 of 15 checks passed
@arsenm arsenm deleted the users/arsenm/r600/remove-softPromoteHalfType branch January 26, 2026 15:27
boomanaiden154 added a commit that referenced this pull request Jan 26, 2026
This reverts commit 99fab01.

llc was crashing in kernel-args.ll after this patch:
```
.---command stderr------------
| LLVM ERROR: Cannot select: t3: f32,ch = load<(non-temporal dereferenceable invariant load (s16), align 4, addrspace 7), sext from f16> t0, Constant:i32<36>, undef:i32
| In function: f16_arg
| PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug.
| Stack dump:
| 0.	Program arguments: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=r600 -mcpu=redwood
| 1.	Running pass 'Function Pass Manager' on module '<stdin>'.
| 2.	Running pass 'Unnamed pass: implement Pass::getPassName()' on function '@f16_arg'
|  #0 0x0000561402607438 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a7438)
|  #1 0x0000561402604b75 llvm::sys::RunSignalHandlers() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a4b75)
|  #2 0x00005614026081b1 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
|  #3 0x00007f55eb45a050 (/lib/x86_64-linux-gnu/libc.so.6+0x3c050)
|  #4 0x00007f55eb4a8eec (/lib/x86_64-linux-gnu/libc.so.6+0x8aeec)
|  #5 0x00007f55eb459fb2 raise (/lib/x86_64-linux-gnu/libc.so.6+0x3bfb2)
|  #6 0x00007f55eb444472 abort (/lib/x86_64-linux-gnu/libc.so.6+0x26472)
|  #7 0x0000561402567005 llvm::report_fatal_error(llvm::Twine const&, bool) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x8107005)
|  #8 0x00005614023e7ba7 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f87ba7)
|  #9 0x00005614023e6a7d llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f86a7d)
| #10 0x00005614023dae94 llvm::SelectionDAGISel::DoInstructionSelection() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f7ae94)
| #11 0x00005614023d9e6a llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f79e6a)
| #12 0x00005614023d7b5e llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f77b5e)
| #13 0x00005614023d4c30 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f74c30)
| #14 0x00005614023d22e0 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f722e0)
| #15 0x0000561401611793 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x71b1793)
| #16 0x0000561401b790e5 llvm::FPPassManager::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x77190e5)
| #17 0x0000561401b80f72 llvm::FPPassManager::runOnModule(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7720f72)
| #18 0x0000561401b79b56 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7719b56)
| #19 0x00005613ff4858f4 compileModule(char**, llvm::SmallVectorImpl<llvm::PassPlugin>&, llvm::LLVMContext&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&) llc.cpp:0:0
| #20 0x00005613ff482ed3 main (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x5022ed3)
| #21 0x00007f55eb44524a (/lib/x86_64-linux-gnu/libc.so.6+0x2724a)
| #22 0x00007f55eb445305 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x27305)
| #23 0x00005613ff47ea21 _start (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x501ea21)
`-----------------------------
```
@boomanaiden154
Copy link
Contributor

I've reverted this in 38b7176. llc was crashing in one of the AMDGPU codegen tests afterwards. Sorry the any inconvenience.

@arsenm
Copy link
Contributor Author

arsenm commented Jan 26, 2026

I've reverted this in 38b7176. llc was crashing in one of the AMDGPU codegen tests afterwards. Sorry the any inconvenience.

Why did it pass the pre tests?

arsenm added a commit that referenced this pull request Jan 26, 2026
arsenm added a commit that referenced this pull request Jan 26, 2026
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override;

bool softPromoteHalfType() const override { return false; }
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somehow the actual change got lost here

arsenm added a commit that referenced this pull request Jan 26, 2026
This was the core change in #177420, which somehow got lost
before submitting.
arsenm added a commit that referenced this pull request Jan 26, 2026
This was the core change in #177420, which somehow got lost
before submitting.
stomfaig pushed a commit to stomfaig/llvm-project that referenced this pull request Jan 28, 2026
stomfaig pushed a commit to stomfaig/llvm-project that referenced this pull request Jan 28, 2026
This was the core change in llvm#177420, which somehow got lost
before submitting.
Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Jan 29, 2026
Also includes a kind of hacky, minimal change to avoid assertions
when softPromoteHalfType is removed to fix kernel arguments
lowered as f16. Half support was never really implemented
for r600, and there just happened to be a few incidental tests
which included a half argument (which were also not even meaningful,
since the function body just folded to nothing due to no callable
function support).
Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Jan 29, 2026
This reverts commit 99fab01.

llc was crashing in kernel-args.ll after this patch:
```
.---command stderr------------
| LLVM ERROR: Cannot select: t3: f32,ch = load<(non-temporal dereferenceable invariant load (s16), align 4, addrspace 7), sext from f16> t0, Constant:i32<36>, undef:i32
| In function: f16_arg
| PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug.
| Stack dump:
| 0.	Program arguments: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=r600 -mcpu=redwood
| 1.	Running pass 'Function Pass Manager' on module '<stdin>'.
| 2.	Running pass 'Unnamed pass: implement Pass::getPassName()' on function '@f16_arg'
|  #0 0x0000561402607438 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a7438)
|  llvm#1 0x0000561402604b75 llvm::sys::RunSignalHandlers() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a4b75)
|  llvm#2 0x00005614026081b1 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
|  llvm#3 0x00007f55eb45a050 (/lib/x86_64-linux-gnu/libc.so.6+0x3c050)
|  llvm#4 0x00007f55eb4a8eec (/lib/x86_64-linux-gnu/libc.so.6+0x8aeec)
|  llvm#5 0x00007f55eb459fb2 raise (/lib/x86_64-linux-gnu/libc.so.6+0x3bfb2)
|  llvm#6 0x00007f55eb444472 abort (/lib/x86_64-linux-gnu/libc.so.6+0x26472)
|  llvm#7 0x0000561402567005 llvm::report_fatal_error(llvm::Twine const&, bool) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x8107005)
|  llvm#8 0x00005614023e7ba7 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f87ba7)
|  llvm#9 0x00005614023e6a7d llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f86a7d)
| llvm#10 0x00005614023dae94 llvm::SelectionDAGISel::DoInstructionSelection() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f7ae94)
| llvm#11 0x00005614023d9e6a llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f79e6a)
| llvm#12 0x00005614023d7b5e llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f77b5e)
| llvm#13 0x00005614023d4c30 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f74c30)
| llvm#14 0x00005614023d22e0 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f722e0)
| llvm#15 0x0000561401611793 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x71b1793)
| llvm#16 0x0000561401b790e5 llvm::FPPassManager::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x77190e5)
| llvm#17 0x0000561401b80f72 llvm::FPPassManager::runOnModule(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7720f72)
| llvm#18 0x0000561401b79b56 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7719b56)
| llvm#19 0x00005613ff4858f4 compileModule(char**, llvm::SmallVectorImpl<llvm::PassPlugin>&, llvm::LLVMContext&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&) llc.cpp:0:0
| llvm#20 0x00005613ff482ed3 main (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x5022ed3)
| llvm#21 0x00007f55eb44524a (/lib/x86_64-linux-gnu/libc.so.6+0x2724a)
| llvm#22 0x00007f55eb445305 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x27305)
| llvm#23 0x00005613ff47ea21 _start (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x501ea21)
`-----------------------------
```
Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Jan 29, 2026
sshrestha-aa pushed a commit to sshrestha-aa/llvm-project that referenced this pull request Feb 4, 2026
Also includes a kind of hacky, minimal change to avoid assertions
when softPromoteHalfType is removed to fix kernel arguments
lowered as f16. Half support was never really implemented
for r600, and there just happened to be a few incidental tests
which included a half argument (which were also not even meaningful,
since the function body just folded to nothing due to no callable
function support).
sshrestha-aa pushed a commit to sshrestha-aa/llvm-project that referenced this pull request Feb 4, 2026
This reverts commit 99fab01.

llc was crashing in kernel-args.ll after this patch:
```
.---command stderr------------
| LLVM ERROR: Cannot select: t3: f32,ch = load<(non-temporal dereferenceable invariant load (s16), align 4, addrspace 7), sext from f16> t0, Constant:i32<36>, undef:i32
| In function: f16_arg
| PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug.
| Stack dump:
| 0.	Program arguments: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=r600 -mcpu=redwood
| 1.	Running pass 'Function Pass Manager' on module '<stdin>'.
| 2.	Running pass 'Unnamed pass: implement Pass::getPassName()' on function '@f16_arg'
|  #0 0x0000561402607438 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a7438)
|  llvm#1 0x0000561402604b75 llvm::sys::RunSignalHandlers() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a4b75)
|  llvm#2 0x00005614026081b1 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
|  llvm#3 0x00007f55eb45a050 (/lib/x86_64-linux-gnu/libc.so.6+0x3c050)
|  llvm#4 0x00007f55eb4a8eec (/lib/x86_64-linux-gnu/libc.so.6+0x8aeec)
|  llvm#5 0x00007f55eb459fb2 raise (/lib/x86_64-linux-gnu/libc.so.6+0x3bfb2)
|  llvm#6 0x00007f55eb444472 abort (/lib/x86_64-linux-gnu/libc.so.6+0x26472)
|  llvm#7 0x0000561402567005 llvm::report_fatal_error(llvm::Twine const&, bool) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x8107005)
|  llvm#8 0x00005614023e7ba7 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f87ba7)
|  llvm#9 0x00005614023e6a7d llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f86a7d)
| llvm#10 0x00005614023dae94 llvm::SelectionDAGISel::DoInstructionSelection() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f7ae94)
| llvm#11 0x00005614023d9e6a llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f79e6a)
| llvm#12 0x00005614023d7b5e llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f77b5e)
| llvm#13 0x00005614023d4c30 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f74c30)
| llvm#14 0x00005614023d22e0 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f722e0)
| llvm#15 0x0000561401611793 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x71b1793)
| llvm#16 0x0000561401b790e5 llvm::FPPassManager::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x77190e5)
| llvm#17 0x0000561401b80f72 llvm::FPPassManager::runOnModule(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7720f72)
| llvm#18 0x0000561401b79b56 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7719b56)
| llvm#19 0x00005613ff4858f4 compileModule(char**, llvm::SmallVectorImpl<llvm::PassPlugin>&, llvm::LLVMContext&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&) llc.cpp:0:0
| llvm#20 0x00005613ff482ed3 main (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x5022ed3)
| llvm#21 0x00007f55eb44524a (/lib/x86_64-linux-gnu/libc.so.6+0x2724a)
| llvm#22 0x00007f55eb445305 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x27305)
| llvm#23 0x00005613ff47ea21 _start (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x501ea21)
`-----------------------------
```
sshrestha-aa pushed a commit to sshrestha-aa/llvm-project that referenced this pull request Feb 4, 2026
sshrestha-aa pushed a commit to sshrestha-aa/llvm-project that referenced this pull request Feb 4, 2026
This was the core change in llvm#177420, which somehow got lost
before submitting.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants