@@ -89,8 +89,14 @@ inline PrimExpr DispatchShuffle(const PrimExpr& e) {
8989 index = self + delta;
9090 index = Select ((self & (width - 1 )) + delta >= width, self, index);
9191 }
92+ // reinterprete var as int32
93+ bool is_int32 = var.dtype ().is_int () && var.dtype ().bits () == 32 ;
94+ PrimExpr source = is_int32 ? var : reinterpret (DataType::Int (32 ), var);
9295 PrimExpr res = Call (DataType::Int (32 ), builtin::call_pure_extern (),
93- {StringImm (" llvm.amdgcn.ds.bpermute" ), index << 2 , var});
96+ {StringImm (" llvm.amdgcn.ds.bpermute" ), index << 2 , source});
97+ if (!is_int32) {
98+ res = reinterpret (var.dtype (), res);
99+ }
94100 return res;
95101}
96102
@@ -114,73 +120,84 @@ TVM_REGISTER_OP("tir.tvm_warp_shuffle_down")
114120 .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchShuffle);
115121
116122TVM_REGISTER_OP (" tir.floor" )
117- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
123+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
124+ DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1 >);
118125
119126TVM_REGISTER_OP (" tir.ceil" )
120- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
127+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
128+ DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1 >);
121129
122130TVM_REGISTER_OP (" tir.round" )
123- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
131+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
132+ DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1 >);
124133
125134TVM_REGISTER_OP (" tir.nearbyint" )
126- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
135+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
136+ DispatchLLVMPureIntrin<::llvm::Intrinsic::nearbyint, 1 >);
127137
128138TVM_REGISTER_OP (" tir.trunc" )
129- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
139+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
140+ DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1 >);
130141
131142TVM_REGISTER_OP (" tir.fabs" )
132- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
143+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
144+ DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1 >);
133145
134- TVM_REGISTER_OP (" tir.exp" ).set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic " ,
135- DispatchPureExternOCML );
146+ TVM_REGISTER_OP (" tir.exp" ).set_attr<FLowerIntrinsic>(
147+ " rocm.FLowerIntrinsic " , DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1 > );
136148
137149TVM_REGISTER_OP (" tir.exp2" )
138- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
150+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
151+ DispatchLLVMPureIntrin<::llvm::Intrinsic::exp2, 1 >);
139152
140- TVM_REGISTER_OP (" tir.exp10" )
141- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
153+ // TVM_REGISTER_OP("tir.exp10")
154+ // .set_attr<FLowerIntrinsic>("rocm.FLowerIntrinsic",
155+ // DispatchLLVMPureIntrin<::llvm::Intrinsic::exp10, 1>);
142156
143- TVM_REGISTER_OP (" tir.erf" ).set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
144- DispatchPureExternOCML);
157+ // TVM_REGISTER_OP("tir.erf").set_attr<FLowerIntrinsic>("rocm.FLowerIntrinsic",
158+ // DispatchPureExternOCML);
145159
146160TVM_REGISTER_OP (" tir.fma" ).set_attr<FLowerIntrinsic>(
147161 " rocm.FLowerIntrinsic" , DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 3 >);
148162
149- TVM_REGISTER_OP (" tir.log" ).set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic " ,
150- DispatchPureExternOCML );
163+ TVM_REGISTER_OP (" tir.log" ).set_attr<FLowerIntrinsic>(
164+ " rocm.FLowerIntrinsic " , DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1 > );
151165
152166TVM_REGISTER_OP (" tir.log2" )
153- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
167+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
168+ DispatchLLVMPureIntrin<::llvm::Intrinsic::log2, 1 >);
154169
155170TVM_REGISTER_OP (" tir.log10" )
156- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
171+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
172+ DispatchLLVMPureIntrin<::llvm::Intrinsic::log10, 1 >);
157173
158174TVM_REGISTER_OP (" tir.sqrt" )
159- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
175+ .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
176+ DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1 >);
160177
161- TVM_REGISTER_OP (" tir.pow" ).set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic " ,
162- DispatchPureExternOCML );
178+ TVM_REGISTER_OP (" tir.pow" ).set_attr<FLowerIntrinsic>(
179+ " rocm.FLowerIntrinsic " , DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 2 > );
163180
164- TVM_REGISTER_OP (" tir.tanh" )
165- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
181+ // TVM_REGISTER_OP("tir.tanh")
182+ // .set_attr<FLowerIntrinsic>("rocm.FLowerIntrinsic", DispatchPureExternOCML);
166183
167- TVM_REGISTER_OP (" tir.tan" ).set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" ,
168- DispatchPureExternOCML);
184+ // TVM_REGISTER_OP("tir.tan").set_attr<FLowerIntrinsic>("rocm.FLowerIntrinsic",
185+ // DispatchPureExternOCML);
169186
170- TVM_REGISTER_OP (" tir.cos" ).set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic " ,
171- DispatchPureExternOCML );
187+ TVM_REGISTER_OP (" tir.cos" ).set_attr<FLowerIntrinsic>(
188+ " rocm.FLowerIntrinsic " , DispatchLLVMPureIntrin<::llvm::Intrinsic::cos, 1 > );
172189
173- TVM_REGISTER_OP (" tir.cosh" )
174- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
190+ // TVM_REGISTER_OP("tir.cosh")
191+ // .set_attr<FLowerIntrinsic>("rocm.FLowerIntrinsic", DispatchPureExternOCML);
175192
176- TVM_REGISTER_OP (" tir.sin" ).set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic " ,
177- DispatchPureExternOCML );
193+ TVM_REGISTER_OP (" tir.sin" ).set_attr<FLowerIntrinsic>(
194+ " rocm.FLowerIntrinsic " , DispatchLLVMPureIntrin<::llvm::Intrinsic::sin, 1 > );
178195
179- TVM_REGISTER_OP (" tir.sinh" )
180- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
196+ // TVM_REGISTER_OP("tir.sinh")
197+ // .set_attr<FLowerIntrinsic>("rocm.FLowerIntrinsic", DispatchPureExternOCML);
181198
182- TVM_REGISTER_OP (" tir.atan" )
183- .set_attr<FLowerIntrinsic>(" rocm.FLowerIntrinsic" , DispatchPureExternOCML);
199+ // TVM_REGISTER_OP("tir.atan")
200+ // .set_attr<FLowerIntrinsic>("rocm.FLowerIntrinsic", DispatchPureExternOCML);
184201
185202} // namespace llvm
186203} // namespace codegen
0 commit comments