Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions projects/rocblas/library/src/handle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,10 @@ Processor _rocblas_handle::getActiveArch()
{
return Processor::gfx900;
}
else if(deviceString.find("gfx90c") != std::string::npos)
{
return Processor::gfx90c;
}
else if(deviceString.find("gfx906") != std::string::npos)
{
return Processor::gfx906;
Expand Down
1 change: 1 addition & 0 deletions projects/rocblas/library/src/include/handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ enum class Processor : int
// only including supported types
gfx803 = 803,
gfx900 = 900,
gfx90c = 912,
gfx906 = 906,
gfx908 = 908,
gfx90a = 910,
Expand Down
4 changes: 4 additions & 0 deletions projects/rocblas/library/src/tensile_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,10 @@ namespace
{
return Tensile::LazyLoadingInit::gfx900;
}
else if(deviceString.find("gfx90c") != std::string::npos)
{
return Tensile::LazyLoadingInit::gfx90c;
}
else if(deviceString.find("gfx906") != std::string::npos)
{
return Tensile::LazyLoadingInit::gfx906;
Expand Down
44 changes: 44 additions & 0 deletions shared/tensile/Tensile/AsmCaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,50 @@ def getCapabilitiesCache(rocmVersion: NamedTuple) -> dict:
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
(9, 0, 12): {'HasAddLshl': True,
'HasAtomicAdd': False,
'HasDirectToLdsDest': False,
'HasDirectToLdsNoDest': True,
'HasExplicitCO': True,
'HasExplicitNC': False,
'HasGLCModifier': True,
'HasNTModifier': False,
'HasLshlOr': True,
'HasMFMA': False,
'HasMFMA_b8': False,
'HasMFMA_bf16_1k': False,
'HasMFMA_bf16_original': False,
'HasMFMA_constSrc': False,
'HasMFMA_f64': False,
'HasMFMA_f8': False,
'HasMFMA_i8_908': False,
'HasMFMA_i8_940': False,
'HasMFMA_vgpr': False,
'HasMFMA_xf32': False,
'HasSMulHi': True,
'HasWMMA': False,
'KernargPreloading': False,
'MaxLgkmcnt': 15,
'MaxVmcnt': 63,
'SupportedISA': True,
'SupportedSource': True,
'VOP3v_dot4_i32_i8': False,
'v_dot2_f32_f16': False,
'v_dot2c_f32_f16': False,
'v_dot4_i32_i8': False,
'v_dot4c_i32_i8': False,
'v_fma_f16': True,
'v_fma_f32': True,
'v_fma_f64': True,
'v_fma_mix_f32': False,
'v_fmac_f16': False,
'v_fmac_f32': False,
'v_mac_f16': True,
'v_mac_f32': True,
'v_mad_mix_f32': True,
'v_mov_b64': False,
'v_pk_fma_f16': True,
'v_pk_fmac_f16': False},
(9, 4, 2): {'HasAddLshl': True,
'HasAtomicAdd': True,
'HasDirectToLdsDest': False,
Expand Down
4 changes: 2 additions & 2 deletions shared/tensile/Tensile/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ class DeveloperWarning(Warning):

globalParameters["MaxFileName"] = 64 # If a file name would be longer than this, shorten it with a hash.
globalParameters["SupportedISA"] = [(8,0,3),
(9,0,0), (9,0,6), (9,0,8), (9,0,10),
(9,0,0), (9,0,6), (9,0,8), (9,0,10), (9,0,12),
(9,4,2), (9,5,0),
(10,1,0), (10,1,1), (10,1,2), (10,3,0), (10,3,1), (10,3,2), (10,3,3), (10,3,4), (10,3,5), (10,3,6),
(11,0,0), (11,0,1), (11,0,2), (11,0,3),
Expand Down Expand Up @@ -317,7 +317,7 @@ class DeveloperWarning(Warning):
# Translate GPU targets to filter filenames in Tensile_LOGIC directory
architectureMap = {
'all':'_', 'gfx000':'none', 'fallback':'hip',
'gfx803':'r9nano', 'gfx900':'vega10', 'gfx900:xnack-':'vega10',
'gfx803':'r9nano', 'gfx900':'vega10', 'gfx900:xnack-':'vega10', 'gfx90c':'vega10',
'gfx906':'vega20', 'gfx906:xnack+':'vega20', 'gfx906:xnack-':'vega20',
'gfx908':'arcturus','gfx908:xnack+':'arcturus', 'gfx908:xnack-':'arcturus',
'gfx90a':'aldebaran', 'gfx90a:xnack+':'aldebaran', 'gfx90a:xnack-':'aldebaran',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ if(NOT BUILD_ADDRESS_SANITIZER)
list(APPEND BASE_ARCHITECTURES
"gfx803"
"gfx900"
"gfx90c"
"gfx906"
"gfx908"
"gfx90a"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ namespace Tensile
//gfx802 = 3,
gfx803 = 803,
gfx900 = 900,
gfx90c = 912,
gfx906 = 906,
gfx908 = 908,
gfx90a = 910,
Expand Down Expand Up @@ -92,6 +93,8 @@ namespace Tensile
return "gfx803";
case AMDGPU::Processor::gfx900:
return "gfx900";
case AMDGPU::Processor::gfx90c:
return "gfx90c";
case AMDGPU::Processor::gfx906:
return "gfx906";
case AMDGPU::Processor::gfx908:
Expand Down Expand Up @@ -156,6 +159,10 @@ namespace Tensile
{
return AMDGPU::Processor::gfx900;
}
else if(deviceString.find("gfx90c") != std::string::npos)
{
return AMDGPU::Processor::gfx90c;
}
else if(deviceString.find("gfx906") != std::string::npos)
{
return AMDGPU::Processor::gfx906;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ namespace Tensile
None,
gfx803,
gfx900,
gfx90c,
gfx906,
gfx908,
gfx90a,
Expand Down Expand Up @@ -80,6 +81,8 @@ namespace Tensile
return "TensileLibrary_*_gfx803";
case LazyLoadingInit::gfx900:
return "TensileLibrary_*_gfx900";
case LazyLoadingInit::gfx90c:
return "TensileLibrary_*_gfx90c";
case LazyLoadingInit::gfx906:
return "TensileLibrary_*_gfx906";
case LazyLoadingInit::gfx908:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ namespace Tensile
{
iot::enumCase(io, value, "gfx803", AMDGPU::Processor::gfx803);
iot::enumCase(io, value, "gfx900", AMDGPU::Processor::gfx900);
iot::enumCase(io, value, "gfx90c", AMDGPU::Processor::gfx90c);
iot::enumCase(io, value, "gfx906", AMDGPU::Processor::gfx906);
iot::enumCase(io, value, "gfx908", AMDGPU::Processor::gfx908);
iot::enumCase(io, value, "gfx90a", AMDGPU::Processor::gfx90a);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Here is the list of optional arguments for invoking the ``TensileCreateLibrary``
* - \-\-architecture=ARCHITECTURE
- Architectures to generate a library for. When specifying multiple options, use quoted and semicolon-delimited
architectures such as \-\-architecture='gfx908;gfx1012'.
Supported architectures include: all; gfx000; gfx803; gfx900; gfx900:xnack-; gfx906; gfx906:xnack+; gfx906:xnack-; gfx908; gfx908:xnack+;
Supported architectures include: all; gfx000; gfx803; gfx900; gfx900:xnack-; gfx90c; gfx906; gfx906:xnack+; gfx906:xnack-; gfx908; gfx908:xnack+;
gfx908:xnack-; gfx90a; gfx90a:xnack+; gfx90a:xnack-; gfx940; gfx940:xnack+; gfx940:xnack-; gfx941; gfx941:xnack+;
gfx941:xnack-; gfx942; gfx942:xnack+; gfx942:xnack-; gfx1010; gfx1011; gfx1012; gfx1030; gfx1031; gfx1032; gfx1034; gfx1035;
gfx1100; gfx1101; gfx1102; gfx1103; gfx1150; gfx1151; gfx1152; gfx1153; gfx1200; gfx1201.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ if(NOT BUILD_ADDRESS_SANITIZER)
list(APPEND BASE_ARCHITECTURES
"gfx803"
"gfx900"
"gfx90c"
"gfx906"
"gfx908"
"gfx90a"
Expand Down
Loading