Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions shared/tensile/Tensile/AsmCaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ def nested_update_(left, right):
(11, 0, 1): {'VOP3v_dot4_i32_i8': True},
(11, 0, 2): {'VOP3v_dot4_i32_i8': True},
(11, 0, 3): {'VOP3v_dot4_i32_i8': True},
(11, 5, 0): {'VOP3v_dot4_i32_i8': True},
(11, 5, 1): {'VOP3v_dot4_i32_i8': True},
(12, 0, 0): {'VOP3v_dot4_i32_i8': True},
(12, 0, 1): {'VOP3v_dot4_i32_i8': True}}
Expand Down
2 changes: 1 addition & 1 deletion shared/tensile/Tensile/Common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2464,7 +2464,7 @@ def assignGlobalParameters( config, capabilitiesCache: Optional[dict] = None ):
if os.name == "nt":
globalParameters["CurrentISA"] = (9,0,6)
printWarning("Failed to detect ISA so forcing (gfx906) on windows")
isasWithDisabledHWMonitor = ((9,4,2), (9,5,0), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (12,0,0), (12,0,1))
isasWithDisabledHWMonitor = ((9,4,2), (9,5,0), (11,0,0), (11,0,1), (11,0,2), (11,0,3), (11,5,0), (11,5,1), (12,0,0), (12,0,1))
if globalParameters["CurrentISA"] in isasWithDisabledHWMonitor:
isaString = ', '.join(map(gfxName, isasWithDisabledHWMonitor))
printWarning(f"HardwareMonitor currently disabled for {isaString}")
Expand Down
16 changes: 15 additions & 1 deletion shared/tensile/Tensile/Source/lib/include/Tensile/AMDGPU.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ namespace Tensile
gfx1102 = 1102,
gfx1103 = 1103,
gfx1150 = 1150,
gfx1151 = 1151
gfx1151 = 1151,
gfx1200 = 1200,
gfx1201 = 1201
};

static std::string toString(Processor p)
Expand Down Expand Up @@ -124,6 +126,10 @@ namespace Tensile
return "gfx1150";
case AMDGPU::Processor::gfx1151:
return "gfx1151";
case AMDGPU::Processor::gfx1200:
return "gfx1200";
case AMDGPU::Processor::gfx1201:
return "gfx1201";
}
return "";
}
Expand Down Expand Up @@ -198,6 +204,14 @@ namespace Tensile
{
return AMDGPU::Processor::gfx1151;
}
else if(deviceString.find("gfx1200") != std::string::npos)
{
return AMDGPU::Processor::gfx1200;
}
else if(deviceString.find("gfx1201") != std::string::npos)
{
return AMDGPU::Processor::gfx1201;
}
else
{
return static_cast<AMDGPU::Processor>(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ namespace Tensile
gfx1103,
gfx1150,
gfx1151,
gfx1200,
gfx1201,
All
};

Expand Down Expand Up @@ -112,6 +114,10 @@ namespace Tensile
return "TensileLibrary_*_gfx1150";
case LazyLoadingInit::gfx1151:
return "TensileLibrary_*_gfx1151";
case LazyLoadingInit::gfx1200:
return "TensileLibrary_*_gfx1200";
case LazyLoadingInit::gfx1201:
return "TensileLibrary_*_gfx1201";
case LazyLoadingInit::None:
return "";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ namespace Tensile
iot::enumCase(io, value, "gfx1103", AMDGPU::Processor::gfx1103);
iot::enumCase(io, value, "gfx1150", AMDGPU::Processor::gfx1150);
iot::enumCase(io, value, "gfx1151", AMDGPU::Processor::gfx1151);
iot::enumCase(io, value, "gfx1200", AMDGPU::Processor::gfx1200);
iot::enumCase(io, value, "gfx1201", AMDGPU::Processor::gfx1201);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Here is the list of optional arguments for invoking the ``TensileCreateLibrary``
Supported architectures include: all; gfx000; gfx803; gfx900; gfx900:xnack-; gfx906; gfx906:xnack+; gfx906:xnack-; gfx908; gfx908:xnack+;
gfx908:xnack-; gfx90a; gfx90a:xnack+; gfx90a:xnack-; gfx940; gfx940:xnack+; gfx940:xnack-; gfx941; gfx941:xnack+;
gfx941:xnack-; gfx942; gfx942:xnack+; gfx942:xnack-; gfx1010; gfx1011; gfx1012; gfx1030; gfx1031; gfx1032; gfx1034; gfx1035;
gfx1100; gfx1101; gfx1102; gfx1103.
gfx1100; gfx1101; gfx1102; gfx1103; gfx1150; gfx1151; gfx1200; gfx1201.

* - \-\-build-client
- Builds Tensile client executable that is used for stand alone benchmarking. This option is set by default.
Expand Down