-
Notifications
You must be signed in to change notification settings - Fork 16.1k
R600: Remove softPromoteHalfType #177420
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
R600: Remove softPromoteHalfType #177420
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesAlso includes a kind of hacky, minimal change to avoid assertions Full diff: https://github.com/llvm/llvm-project/pull/177420.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 33a23ffb81926..5cd7a61d2c936 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1480,6 +1480,9 @@ SDValue R600TargetLowering::LowerFormalArguments(
MemVT = MemVT.getVectorElementType();
}
+ if (VT.isInteger() && !MemVT.isInteger())
+ MemVT = MemVT.changeTypeToInteger();
+
if (AMDGPU::isShader(CallConv)) {
Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index bb7fc46a98cbd..661efb8684813 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -117,8 +117,6 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override;
-
- bool softPromoteHalfType() const override { return false; }
};
} // End namespace llvm;
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index a2da8876472ab..0a53b3a906fbe 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -6187,3 +6187,167 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
ret void
}
+
+define amdgpu_kernel void @f16_arg(half %arg, ptr addrspace(1) %ptr) {
+; SI-LABEL: f16_arg:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dword s6, s[4:5], 0x9
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v0, s6
+; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: f16_arg:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; VI-NEXT: s_load_dword s2, s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: flat_store_short v[0:1], v2
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: f16_arg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[8:9], 0x0
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; EG-LABEL: f16_arg:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT T0.W, KC0[2].Z, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
+; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: LSHL T0.X, T1.W, PV.W,
+; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: f16_arg:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 0, @8, KC0[], KC1[]
+; CM-NEXT: TEX 0 @6
+; CM-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
+; CM-NEXT: ALU clause starting at 8:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.X, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
+; CM-NEXT: LSHL T0.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T0.W, literal.x, PV.W,
+; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT: MOV T0.Y, 0.0,
+; CM-NEXT: MOV * T0.Z, 0.0,
+; CM-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ store half %arg, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_kernel void @v2f16_arg(<2 x half> %arg, ptr addrspace(1) %ptr) {
+; SI-LABEL: v2f16_arg:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dword s6, s[4:5], 0x9
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v0, s6
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: v2f16_arg:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; VI-NEXT: s_load_dword s2, s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v2f16_arg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[8:9], 0x0
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; EG-LABEL: v2f16_arg:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 38, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: LSHL * T0.W, T1.X, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT T0.X, T0.X, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: v2f16_arg:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 0, @10, KC0[], KC1[]
+; CM-NEXT: TEX 1 @6
+; CM-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T1.X, T0.X, 38, #3
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 11:
+; CM-NEXT: LSHL * T0.W, T1.X, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: OR_INT * T0.X, T0.X, PV.W,
+; CM-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ store <2 x half> %arg, ptr addrspace(1) %ptr
+ ret void
+}
|
|
@llvm/pr-subscribers-llvm-globalisel Author: Matt Arsenault (arsenm) ChangesAlso includes a kind of hacky, minimal change to avoid assertions Full diff: https://github.com/llvm/llvm-project/pull/177420.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 33a23ffb81926..5cd7a61d2c936 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1480,6 +1480,9 @@ SDValue R600TargetLowering::LowerFormalArguments(
MemVT = MemVT.getVectorElementType();
}
+ if (VT.isInteger() && !MemVT.isInteger())
+ MemVT = MemVT.changeTypeToInteger();
+
if (AMDGPU::isShader(CallConv)) {
Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index bb7fc46a98cbd..661efb8684813 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -117,8 +117,6 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override;
-
- bool softPromoteHalfType() const override { return false; }
};
} // End namespace llvm;
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index a2da8876472ab..0a53b3a906fbe 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -6187,3 +6187,167 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace
store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
ret void
}
+
+define amdgpu_kernel void @f16_arg(half %arg, ptr addrspace(1) %ptr) {
+; SI-LABEL: f16_arg:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dword s6, s[4:5], 0x9
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v0, s6
+; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: f16_arg:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; VI-NEXT: s_load_dword s2, s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: flat_store_short v[0:1], v2
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: f16_arg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[8:9], 0x0
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_short v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; EG-LABEL: f16_arg:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @8, KC0[], KC1[]
+; EG-NEXT: TEX 0 @6
+; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
+; EG-NEXT: ALU clause starting at 8:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 9:
+; EG-NEXT: AND_INT T0.W, KC0[2].Z, literal.x,
+; EG-NEXT: AND_INT * T1.W, T0.X, literal.y,
+; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
+; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
+; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; EG-NEXT: LSHL T0.X, T1.W, PV.W,
+; EG-NEXT: LSHL * T0.W, literal.x, PV.W,
+; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: f16_arg:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 0, @8, KC0[], KC1[]
+; CM-NEXT: TEX 0 @6
+; CM-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
+; CM-NEXT: ALU clause starting at 8:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 9:
+; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, T0.X, literal.x,
+; CM-NEXT: LSHL * T0.W, PV.W, literal.y,
+; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45)
+; CM-NEXT: LSHL T0.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T0.W, literal.x, PV.W,
+; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00)
+; CM-NEXT: MOV T0.Y, 0.0,
+; CM-NEXT: MOV * T0.Z, 0.0,
+; CM-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ store half %arg, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_kernel void @v2f16_arg(<2 x half> %arg, ptr addrspace(1) %ptr) {
+; SI-LABEL: v2f16_arg:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dword s6, s[4:5], 0x9
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v0, s6
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: v2f16_arg:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; VI-NEXT: s_load_dword s2, s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v2f16_arg:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dword s2, s[8:9], 0x0
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v1, s2
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; EG-LABEL: v2f16_arg:
+; EG: ; %bb.0:
+; EG-NEXT: ALU 0, @10, KC0[], KC1[]
+; EG-NEXT: TEX 1 @6
+; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 38, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 11:
+; EG-NEXT: LSHL * T0.W, T1.X, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: OR_INT T0.X, T0.X, PV.W,
+; EG-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: v2f16_arg:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 0, @10, KC0[], KC1[]
+; CM-NEXT: TEX 1 @6
+; CM-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T1.X, T0.X, 38, #3
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 36, #3
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 11:
+; CM-NEXT: LSHL * T0.W, T1.X, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: OR_INT * T0.X, T0.X, PV.W,
+; CM-NEXT: LSHR * T1.X, KC0[2].Z, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ store <2 x half> %arg, ptr addrspace(1) %ptr
+ ret void
+}
|
|
Out of scope question: do we have plans to phase out R600 support in the compiler? |
|
Not really |
a2319dc to
8c1dd3a
Compare
074f0b4 to
febe138
Compare
8c1dd3a to
2fa99dc
Compare
🪟 Windows x64 Test Results
All executed tests passed, but another part of the build failed. Click on a failure below to see the details. [code=4294967295] bin/mlir-opt.exeIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
As expected the code is much worse, but more correct. We could do a better job with source modifier management around fp16_to_fp/fp_to_fp16.
Also includes a kind of hacky, minimal change to avoid assertions when softPromoteHalfType is removed to fix kernel arguments lowered as f16. Half support was never really implemented for r600, and there just happened to be a few incidental tests which included a half argument (which were also not even meaningful, since the function body just folded to nothing due to no callable function support).
2fa99dc to
34cfc39
Compare
febe138 to
c91ea57
Compare
This reverts commit 99fab01. llc was crashing in kernel-args.ll after this patch: ``` .---command stderr------------ | LLVM ERROR: Cannot select: t3: f32,ch = load<(non-temporal dereferenceable invariant load (s16), align 4, addrspace 7), sext from f16> t0, Constant:i32<36>, undef:i32 | In function: f16_arg | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug. | Stack dump: | 0. Program arguments: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=r600 -mcpu=redwood | 1. Running pass 'Function Pass Manager' on module '<stdin>'. | 2. Running pass 'Unnamed pass: implement Pass::getPassName()' on function '@f16_arg' | #0 0x0000561402607438 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a7438) | #1 0x0000561402604b75 llvm::sys::RunSignalHandlers() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a4b75) | #2 0x00005614026081b1 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0 | #3 0x00007f55eb45a050 (/lib/x86_64-linux-gnu/libc.so.6+0x3c050) | #4 0x00007f55eb4a8eec (/lib/x86_64-linux-gnu/libc.so.6+0x8aeec) | #5 0x00007f55eb459fb2 raise (/lib/x86_64-linux-gnu/libc.so.6+0x3bfb2) | #6 0x00007f55eb444472 abort (/lib/x86_64-linux-gnu/libc.so.6+0x26472) | #7 0x0000561402567005 llvm::report_fatal_error(llvm::Twine const&, bool) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x8107005) | #8 0x00005614023e7ba7 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f87ba7) | #9 0x00005614023e6a7d llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f86a7d) | #10 0x00005614023dae94 llvm::SelectionDAGISel::DoInstructionSelection() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f7ae94) | #11 0x00005614023d9e6a llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f79e6a) | #12 0x00005614023d7b5e llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f77b5e) | #13 0x00005614023d4c30 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f74c30) | #14 0x00005614023d22e0 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f722e0) | #15 0x0000561401611793 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x71b1793) | #16 0x0000561401b790e5 llvm::FPPassManager::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x77190e5) | #17 0x0000561401b80f72 llvm::FPPassManager::runOnModule(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7720f72) | #18 0x0000561401b79b56 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7719b56) | #19 0x00005613ff4858f4 compileModule(char**, llvm::SmallVectorImpl<llvm::PassPlugin>&, llvm::LLVMContext&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&) llc.cpp:0:0 | #20 0x00005613ff482ed3 main (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x5022ed3) | #21 0x00007f55eb44524a (/lib/x86_64-linux-gnu/libc.so.6+0x2724a) | #22 0x00007f55eb445305 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x27305) | #23 0x00005613ff47ea21 _start (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x501ea21) `----------------------------- ```
|
I've reverted this in 38b7176. |
Why did it pass the pre tests? |
| TargetLowering::AtomicExpansionKind | ||
| shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override; | ||
|
|
||
| bool softPromoteHalfType() const override { return false; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Somehow the actual change got lost here
This was the core change in #177420, which somehow got lost before submitting.
This was the core change in #177420, which somehow got lost before submitting.
This reverts commit 38b7176.
This was the core change in llvm#177420, which somehow got lost before submitting.
Also includes a kind of hacky, minimal change to avoid assertions when softPromoteHalfType is removed to fix kernel arguments lowered as f16. Half support was never really implemented for r600, and there just happened to be a few incidental tests which included a half argument (which were also not even meaningful, since the function body just folded to nothing due to no callable function support).
This reverts commit 99fab01. llc was crashing in kernel-args.ll after this patch: ``` .---command stderr------------ | LLVM ERROR: Cannot select: t3: f32,ch = load<(non-temporal dereferenceable invariant load (s16), align 4, addrspace 7), sext from f16> t0, Constant:i32<36>, undef:i32 | In function: f16_arg | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug. | Stack dump: | 0. Program arguments: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=r600 -mcpu=redwood | 1. Running pass 'Function Pass Manager' on module '<stdin>'. | 2. Running pass 'Unnamed pass: implement Pass::getPassName()' on function '@f16_arg' | #0 0x0000561402607438 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a7438) | llvm#1 0x0000561402604b75 llvm::sys::RunSignalHandlers() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a4b75) | llvm#2 0x00005614026081b1 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0 | llvm#3 0x00007f55eb45a050 (/lib/x86_64-linux-gnu/libc.so.6+0x3c050) | llvm#4 0x00007f55eb4a8eec (/lib/x86_64-linux-gnu/libc.so.6+0x8aeec) | llvm#5 0x00007f55eb459fb2 raise (/lib/x86_64-linux-gnu/libc.so.6+0x3bfb2) | llvm#6 0x00007f55eb444472 abort (/lib/x86_64-linux-gnu/libc.so.6+0x26472) | llvm#7 0x0000561402567005 llvm::report_fatal_error(llvm::Twine const&, bool) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x8107005) | llvm#8 0x00005614023e7ba7 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f87ba7) | llvm#9 0x00005614023e6a7d llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f86a7d) | llvm#10 0x00005614023dae94 llvm::SelectionDAGISel::DoInstructionSelection() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f7ae94) | llvm#11 0x00005614023d9e6a llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f79e6a) | llvm#12 0x00005614023d7b5e llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f77b5e) | llvm#13 0x00005614023d4c30 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f74c30) | llvm#14 0x00005614023d22e0 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f722e0) | llvm#15 0x0000561401611793 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x71b1793) | llvm#16 0x0000561401b790e5 llvm::FPPassManager::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x77190e5) | llvm#17 0x0000561401b80f72 llvm::FPPassManager::runOnModule(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7720f72) | llvm#18 0x0000561401b79b56 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7719b56) | llvm#19 0x00005613ff4858f4 compileModule(char**, llvm::SmallVectorImpl<llvm::PassPlugin>&, llvm::LLVMContext&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&) llc.cpp:0:0 | llvm#20 0x00005613ff482ed3 main (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x5022ed3) | llvm#21 0x00007f55eb44524a (/lib/x86_64-linux-gnu/libc.so.6+0x2724a) | llvm#22 0x00007f55eb445305 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x27305) | llvm#23 0x00005613ff47ea21 _start (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x501ea21) `----------------------------- ```
This reverts commit 38b7176.
Also includes a kind of hacky, minimal change to avoid assertions when softPromoteHalfType is removed to fix kernel arguments lowered as f16. Half support was never really implemented for r600, and there just happened to be a few incidental tests which included a half argument (which were also not even meaningful, since the function body just folded to nothing due to no callable function support).
This reverts commit 99fab01. llc was crashing in kernel-args.ll after this patch: ``` .---command stderr------------ | LLVM ERROR: Cannot select: t3: f32,ch = load<(non-temporal dereferenceable invariant load (s16), align 4, addrspace 7), sext from f16> t0, Constant:i32<36>, undef:i32 | In function: f16_arg | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug. | Stack dump: | 0. Program arguments: /b/ml-opt-devrel-x86-64-b1/build/bin/llc -mtriple=r600 -mcpu=redwood | 1. Running pass 'Function Pass Manager' on module '<stdin>'. | 2. Running pass 'Unnamed pass: implement Pass::getPassName()' on function '@f16_arg' | #0 0x0000561402607438 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a7438) | llvm#1 0x0000561402604b75 llvm::sys::RunSignalHandlers() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x81a4b75) | llvm#2 0x00005614026081b1 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0 | llvm#3 0x00007f55eb45a050 (/lib/x86_64-linux-gnu/libc.so.6+0x3c050) | llvm#4 0x00007f55eb4a8eec (/lib/x86_64-linux-gnu/libc.so.6+0x8aeec) | llvm#5 0x00007f55eb459fb2 raise (/lib/x86_64-linux-gnu/libc.so.6+0x3bfb2) | llvm#6 0x00007f55eb444472 abort (/lib/x86_64-linux-gnu/libc.so.6+0x26472) | llvm#7 0x0000561402567005 llvm::report_fatal_error(llvm::Twine const&, bool) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x8107005) | llvm#8 0x00005614023e7ba7 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f87ba7) | llvm#9 0x00005614023e6a7d llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f86a7d) | llvm#10 0x00005614023dae94 llvm::SelectionDAGISel::DoInstructionSelection() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f7ae94) | llvm#11 0x00005614023d9e6a llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f79e6a) | llvm#12 0x00005614023d7b5e llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f77b5e) | llvm#13 0x00005614023d4c30 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f74c30) | llvm#14 0x00005614023d22e0 llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7f722e0) | llvm#15 0x0000561401611793 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x71b1793) | llvm#16 0x0000561401b790e5 llvm::FPPassManager::runOnFunction(llvm::Function&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x77190e5) | llvm#17 0x0000561401b80f72 llvm::FPPassManager::runOnModule(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7720f72) | llvm#18 0x0000561401b79b56 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x7719b56) | llvm#19 0x00005613ff4858f4 compileModule(char**, llvm::SmallVectorImpl<llvm::PassPlugin>&, llvm::LLVMContext&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&) llc.cpp:0:0 | llvm#20 0x00005613ff482ed3 main (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x5022ed3) | llvm#21 0x00007f55eb44524a (/lib/x86_64-linux-gnu/libc.so.6+0x2724a) | llvm#22 0x00007f55eb445305 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x27305) | llvm#23 0x00005613ff47ea21 _start (/b/ml-opt-devrel-x86-64-b1/build/bin/llc+0x501ea21) `----------------------------- ```
This reverts commit 38b7176.
This was the core change in llvm#177420, which somehow got lost before submitting.

Also includes a kind of hacky, minimal change to avoid assertions
when softPromoteHalfType is removed to fix kernel arguments
lowered as f16. Half support was never really implemented
for r600, and there just happened to be a few incidental tests
which included a half argument (which were also not even meaningful,
since the function body just folded to nothing due to no callable
function support).