We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent da56c89 commit de91c5cCopy full SHA for de91c5c
python/tvm/dlight/gpu/gemv.py
@@ -469,7 +469,10 @@ def apply(
469
TS, TR = 2, 64
470
elif target.kind.name == "rocm":
471
VEC_C = 4
472
- LOAD_V_SHARED = True
+ # TODO: set LOAD_V_SHARED = False for now
473
+ # rocm might have some issues when load/store of shared do not belong to same data type
474
+ # and only works for certain vector lens, our commonly useful vector lens are in 4
475
+ LOAD_V_SHARED = False
476
LOAD_V_VEC = 8
477
UNROLL = 256
478
if isinstance(len_S, int):
0 commit comments