Skip to content

Commit 37b55f6

Browse files
authored
fix data type (tile-ai#1204)
1 parent 5ec8b20 commit 37b55f6

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/tl_templates/cuda/reduce.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ struct SharedReduceWarp {
7373

7474
unsigned mask = __activemask();
7575
for (int offset = kWarpSize / 2; offset > 0; offset >>= 1) {
76-
T other = __shfl_down_sync(mask, partial, offset);
76+
T other = tl::shfl_down_sync(mask, partial, offset);
7777
partial = Reducer()(partial, other);
7878
}
7979

@@ -159,7 +159,7 @@ template <int threads, bool reverse = false> struct CumSum1D {
159159

160160
#pragma unroll
161161
for (int off = 1; off < SEG; off <<= 1) {
162-
T n = (T)__shfl_down_sync(MASK, val, off);
162+
T n = (T)tl::shfl_down_sync(MASK, val, off);
163163
if (lane < SEG - off)
164164
val += n;
165165
}

0 commit comments

Comments
 (0)