ROCm · asroy · Jun 17, 2022 · Jun 1, 2022 · Jun 2, 2022 · Jun 6, 2022
@@ -1 +1,2 @@
+add_example_executable(example_gemm_bias_relu_add_layernorm_xdl_fp16 gemm_bias_relu_add_layernorm_xdl_fp16.cpp)
 add_example_executable(example_gemm_layernorm_xdl_fp16 gemm_layernorm_xdl_fp16.cpp)
@@ -48,6 +48,56 @@ using DeviceGemmReducePtr = std::unique_ptr<DeviceGemmReduce<DPtrsGlobal,
                                                              DxsInElementwiseOperation,
                                                              DxsAccElementwiseOperation>>;
 
+template <typename DPtrsGlobal,
+          typename AElementwiseOperation,
+          typename BElementwiseOperation,
+          typename CElementwiseOperation,
+          typename C1ElementwiseOperation,
+          typename DxsInElementwiseOperation,
+          typename DxsAccElementwiseOperation>
+struct DeviceGemmBiasAddReduce : public BaseOperator
+{
+    virtual std::unique_ptr<BaseArgument>
+    MakeArgumentPointer(const void* p_a,
+                        const void* p_b,
+                        void* p_c,
+                        const void* p_c0,
+                        const void* p_c1,
+                        DPtrsGlobal p_dxs,
+                        ck::index_t M,
+                        ck::index_t N,
+                        ck::index_t K,
+                        ck::index_t StrideA,
+                        ck::index_t StrideB,
+                        ck::index_t StrideC,
+                        ck::index_t StrideC1,
+                        AElementwiseOperation a_element_op,
+                        BElementwiseOperation b_element_op,
+                        CElementwiseOperation c_element_op,
+                        C1ElementwiseOperation c1_element_op,
+                        DxsInElementwiseOperation dxs_in_element_op,
+                        DxsAccElementwiseOperation dxs_out_element_op,
+                        ck::index_t BatchCount = 1) = 0;
+
+    virtual std::unique_ptr<BaseInvoker> MakeInvokerPointer() = 0;
+};
+
+template <typename DPtrsGlobal,
+          typename AElementwiseOperation,
+          typename BElementwiseOperation,
+          typename CElementwiseOperation,
+          typename C1ElementwiseOperation,
+          typename DxsInElementwiseOperation,
+          typename DxsAccElementwiseOperation>
+using DeviceGemmBiasAddReducePtr =
+    std::unique_ptr<DeviceGemmBiasAddReduce<DPtrsGlobal,
+                                            AElementwiseOperation,
+                                            BElementwiseOperation,
+                                            CElementwiseOperation,
+                                            C1ElementwiseOperation,
+                                            DxsInElementwiseOperation,
+                                            DxsAccElementwiseOperation>>;
+
 } // namespace device
 } // namespace tensor_operation
 } // namespace ck
@@ -144,6 +144,21 @@ struct AddHardswishAdd
     }
 };
 
+struct Relu
+{
+    __host__ __device__ void operator()(float& y, const float& x) const { y = x > 0 ? x : 0; }
+
+    __host__ __device__ void operator()(half_t& y, const half_t& x) const { y = x > 0 ? x : 0; }
+
+    __host__ __device__ void operator()(bhalf_t& y, const bhalf_t& x) const { y = x > 0 ? x : 0; }
+
+    __host__ __device__ void operator()(int32_t& y, const int32_t& x) const { y = x > 0 ? x : 0; }
+
+    __host__ __device__ void operator()(int8_t& y, const int8_t& x) const { y = x > 0 ? x : 0; }
+
+    __host__ __device__ void operator()(double& y, const double& x) const { y = x > 0 ? x : 0; }
+};
+
 struct Normalize
 {
     Normalize(float epsilon = 1e-4) : epsilon_(epsilon) {}
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		add_example_executable(example_gemm_bias_relu_add_layernorm_xdl_fp16 gemm_bias_relu_add_layernorm_xdl_fp16.cpp)
		add_example_executable(example_gemm_layernorm_xdl_fp16 gemm_layernorm_xdl_fp16.cpp)