Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions aiter/ops/mha.py
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,8 @@ def psskddv():
return ret

# basic
ret = alibi_slopes is None
ret = get_gfx() == "gfx942"
ret &= alibi_slopes is None
ret &= bias is None
ret &= dbias is None
ret &= dropout_p == 0.0
Expand Down Expand Up @@ -2035,7 +2036,8 @@ def psskddv():

def can_impl_fmha_v3_bwd():
# basic
ret = alibi_slopes is None
ret = get_gfx() == "gfx942"
ret &= alibi_slopes is None
# ret &= bias is None
# ret &= dbias is None
ret &= dropout_p == 0.0
Expand Down
35 changes: 21 additions & 14 deletions csrc/cpp_itfs/mha_bwd_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,24 +101,31 @@

V2_API = "t = fmha_bwd(traits, args, stream_config);"

V3_MULTI_TARGET_API = """
if (get_gpu_arch() == "gfx942") {
t = gfx942::fmha_bwd_v3(traits, args, stream_config, seqlen_q_padded, seqlen_k_padded, is_v3_api_check);
} else if (get_gpu_arch() == "gfx950") {
t = gfx950::fmha_bwd_v3(traits, args, stream_config, seqlen_q_padded, seqlen_k_padded, is_v3_api_check);
} else {
std::cout << "No supported GPU arch found!" << std::endl;
return -1;
}
"""


def get_v3_api():
v3_call = "fmha_bwd_v3(traits, args, stream_config, seqlen_q_padded, seqlen_k_padded, is_v3_api_check)"
gfx_list = get_gfx_list()
v3_arch_list = [arch for arch in ["gfx942", "gfx950"] if arch in gfx_list]

if len(v3_arch_list) == 0:
return "" # no v3 support
if len(gfx_list) == 1:
return f"t = {gfx_list[0]}::fmha_bwd_v3(traits, args, stream_config, seqlen_q_padded, seqlen_k_padded, is_v3_api_check);"
else:
return V3_MULTI_TARGET_API
return f"t = {gfx_list[0]}::{v3_call};"

api = """{
const std::string gpu_arch = get_gpu_arch();"""
for arch in v3_arch_list:
api = (
api
+ f"""
if (gpu_arch == "{arch}") {{ t = {arch}::{v3_call}; }}"""
)
api = (
api
+ """
}"""
)
return api


V3_API = get_v3_api()
Expand Down
35 changes: 21 additions & 14 deletions csrc/cpp_itfs/mha_fwd_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,24 +163,31 @@

V2_API = """t = fmha_fwd(traits, args, stream_config);"""

V3_MULTI_TARGET_API = """
if (get_gpu_arch() == "gfx942") {
t = gfx942::fmha_fwd_v3(traits, args, stream_config, seqstart_q_padding_ptr, seqstart_k_padding_ptr, is_v3_api_check);
} else if (get_gpu_arch() == "gfx950") {
t = gfx950::fmha_fwd_v3(traits, args, stream_config, seqstart_q_padding_ptr, seqstart_k_padding_ptr, is_v3_api_check);
} else {
std::cout << "No supported GPU arch found!" << std::endl;
return -1;
}
"""


def get_v3_api():
v3_call = "fmha_fwd_v3(traits, args, stream_config, seqstart_q_padding_ptr, seqstart_k_padding_ptr, is_v3_api_check)"
gfx_list = get_gfx_list()
v3_arch_list = [arch for arch in ["gfx942", "gfx950"] if arch in gfx_list]

if len(v3_arch_list) == 0:
return "" # no v3 support
if len(gfx_list) == 1:
return f"t = {gfx_list[0]}::fmha_fwd_v3(traits, args, stream_config, seqstart_q_padding_ptr, seqstart_k_padding_ptr, is_v3_api_check);"
else:
return V3_MULTI_TARGET_API
return f"t = {gfx_list[0]}::{v3_call};"

api = """{
const std::string gpu_arch = get_gpu_arch();"""
for arch in v3_arch_list:
api = (
api
+ f"""
if (gpu_arch == "{arch}") {{ t = {arch}::{v3_call}; }}"""
)
api = (
api
+ """
}"""
)
return api


V3_API = get_v3_api()
Expand Down