Skip to content

Commit f4562b9

Browse files
committed
rocm sdot4 works
1 parent 6cc6280 commit f4562b9

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

python/tvm/topi/cuda/tensor_intrin.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,11 @@ def _instr(index):
7171
vec_y = yy.vload(0, dtype=vec_y_dtype)
7272
prev_z = 0 if index == 0 else zz.vload(0)
7373

74-
new_z = tvm.tir.call_pure_extern(zz_dtype, "__dp4a", vec_x, vec_y, prev_z)
74+
# new_z = tvm.tir.call_pure_extern(zz_dtype, "__dp4a", vec_x, vec_y, prev_z)
75+
new_z = tvm.tir.call_llvm_pure_intrin(zz_dtype, "llvm.amdgcn.sdot4", tvm.tir.const(4, "uint32"),
76+
tvm.tir.call_intrin("int32", "tir.reinterpret", vec_x),
77+
tvm.tir.call_intrin("int32", "tir.reinterpret", vec_y),
78+
prev_z, True)
7579
ib.emit(zz.vstore(0, new_z))
7680

7781
return ib.get()

0 commit comments

Comments
 (0)