Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Added launch bounds to the reduce kernels (#16397)
Browse files Browse the repository at this point in the history
* Added launch bounds to the reduce_kernel_M1

* Trigger CI

* Reretrigger the CI
  • Loading branch information
ptrendx authored and DickJC123 committed Oct 31, 2019
1 parent aa1074d commit 979e610
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/operator/tensor/broadcast_reduce-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ __global__ void reduce_lines_kernel(const int N, const int M, const bool addto,
}

template<typename Reducer, int ndim, typename AType, typename DType, typename OType, typename OP>
__launch_bounds__(kMaxThreadsPerBlock)
__global__ void reduce_kernel_M1(const int N, const bool addto,
const DType* __restrict big, OType *small, const Shape<ndim> bshape,
const Shape<ndim> sshape) {
Expand All @@ -277,6 +278,7 @@ __global__ void reduce_kernel_M1(const int N, const bool addto,
}

template<typename Reducer, int ndim, typename DType, typename OP1, typename OP2>
__launch_bounds__(kMaxThreadsPerBlock)
__global__ void reduce_kernel_M1(const int N, const bool addto,
const DType* __restrict big,
const DType* __restrict lhs,
Expand Down

0 comments on commit 979e610

Please sign in to comment.