Skip to content
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
49a55cb
conv:tf32:add all instances
yingluAMD Nov 18, 2025
8e582f0
refact 3d grouped instances code
yingluAMD Nov 18, 2025
23cabd1
add 3dGrouped Bwd/Wrw unitests
yingluAMD Nov 18, 2025
27dd838
add instances for conv grouped(f/b/w)
yingluAMD Nov 19, 2025
ecdcbcb
fix clang-format
yingluAMD Nov 19, 2025
c6bd09e
fix clang-format
yingluAMD Nov 19, 2025
10944e4
fix clang-format
yingluAMD Nov 19, 2025
3b0d76c
fix word errors
yingluAMD Nov 19, 2025
ab5012c
fix word errors
yingluAMD Nov 19, 2025
c2c6043
disable ck bf16/tf32 cases on gfx90a
yingluAMD Nov 20, 2025
0efc849
disable ck bf16/tf32 cases on gfx90a
yingluAMD Nov 20, 2025
d592d71
disable ck bf16/tf32 cases on gfx90a
yingluAMD Nov 20, 2025
8acadb0
code review
yingluAMD Nov 24, 2025
7d72398
Merge branch 'develop' into tf32_instances
yingluAMD Dec 2, 2025
12c5bd4
Revert "Revert "MIOpen:feature:tf32:demonstrate tf32 in conv3d on MI3…
yingluAMD Dec 2, 2025
0b07e5b
bug fix
yingluAMD Dec 2, 2025
3cb1083
bug fix
yingluAMD Dec 2, 2025
bb5d33f
add gfx950 as supported device
yingluAMD Dec 2, 2025
496c639
fix clang-format fail
yingluAMD Dec 2, 2025
c7f1d1b
Merge branch 'develop' into tf32_instances
yingluAMD Dec 11, 2025
008d648
refine unit solver test
yingluAMD Dec 12, 2025
6b2d26b
fix review comments
yingluAMD Dec 12, 2025
786dba9
refeine test code
yingluAMD Dec 12, 2025
a309d7d
update jenkins file
yingluAMD Dec 14, 2025
0a9ce60
Merge branch 'develop' into tf32_instances
yingluAMD Dec 15, 2025
af1f31b
bug fix
yingluAMD Dec 15, 2025
3b7fd51
improment ci
yingluAMD Dec 15, 2025
9ada135
move IsTF32Supported to target properties.hpp
yingluAMD Dec 17, 2025
9c9de81
Merge commit 'f0ecbb525ec' into tf32_instances
yingluAMD Dec 19, 2025
e35745e
Merge branch 'develop' into tf32_instances
BradPepersAMD Dec 20, 2025
12393f2
change 3D test to new style
yingluAMD Dec 22, 2025
2339bd8
fix clang-format
yingluAMD Dec 22, 2025
422ce39
Merge branch 'develop' into tf32_instances
yingluAMD Dec 22, 2025
d74ecf1
decrease problem size to meet threshold
yingluAMD Dec 22, 2025
c9140ee
refine 3DGroupWrw test cases
yingluAMD Dec 22, 2025
0c97f59
Merge branch 'develop' into tf32_instances
yingluAMD Dec 23, 2025
c1bda69
fix applicability
yingluAMD Dec 23, 2025
303e360
Merge branch 'develop' into tf32_instances
yingluAMD Jan 12, 2026
8384fa5
fix merge issues
yingluAMD Jan 13, 2026
2230980
Merge branch 'develop' into tf32_instances
yingluAMD Jan 15, 2026
c0c687a
Merge branch 'develop' into tf32_instances
yingluAMD Jan 19, 2026
c381273
fix merge issues
yingluAMD Jan 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions projects/miopen/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ pipeline {
name: "DATATYPE_FP32",
defaultValue: true,
description: "")
booleanParam(
name: "DATATYPE_TF32",
defaultValue: true,
description: "")
booleanParam(
name: "DATATYPE_FP16",
defaultValue: true,
Expand Down Expand Up @@ -199,6 +203,7 @@ pipeline {
Bf16_flags = " -DMIOPEN_TEST_BFLOAT16=On"
Int8_flags = " -DMIOPEN_TEST_INT8=On"
Full_test = " -DMIOPEN_TEST_ALL=On"
Tf32_flags = " -DMIOPEN_TEST_TF32=On"

gfx908_flags = " -DMIOPEN_INSTALL_GPU_DATABASES=gfx908"
gfx90a_flags = " -DMIOPEN_INSTALL_GPU_DATABASES=gfx90a"
Expand Down Expand Up @@ -499,6 +504,24 @@ pipeline {
}
}
}
stage('TF32 Hip Install All gfx942') {
when {
beforeAgent true
expression { params.TARGET_GFX942 && params.DATATYPE_TF32 }
}
options {
retry(2)
}
agent{ label rocmnode("gfx942") }
steps{
runBuildAndSingleGtestJob(Full_test + Tf32_flags + gfx942_flags, Build_timeout_minutes)
}
post {
always {
cleanWs()
}
}
}
// GFX1101 Tests
stage('Fp16 Hip Install All gfx1101') {
when {
Expand Down
19 changes: 19 additions & 0 deletions projects/miopen/driver/conv_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,13 @@ class ConvDriver : public Driver
constexpr bool is_bfp8 = std::is_same<Tgpu, bfloat8_fnuz>::value;
if(is_bfp8 || is_fp8 || TensorsCasted())
tolerance *= 37.0;

{ // tf32 has same mantissa length as fp16
auto math_type_ = inflags.GetValueInt("math_type");
if(std::is_same_v<Tgpu, float> &&
(miopen::EnvEnableTF32() || (math_type_ == miopenMathDefault)))
tolerance = 8.2e-3;
}
return tolerance;
}

Expand Down Expand Up @@ -864,6 +871,8 @@ int ConvDriver<Tgpu, Tref>::GetandSetData()
warmupConvDesc,
static_cast<int>(miopenConvolutionFindModeNormal)); // Repeat via hidden API.
miopenSetConvolutionGroupCount(warmupConvDesc, group_count);
miopenSetConvolutionAttribute(
warmupConvDesc, MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE, inflags.GetValueInt("math_type"));

int warmup_out_len_size = miopen::deref(warmupInputTensor).GetNumDims();
std::vector<int> warmup_out_len(warmup_out_len_size);
Expand Down Expand Up @@ -1018,6 +1027,8 @@ int ConvDriver<Tgpu, Tref>::AddCmdLineArgs()
"0",
"MIOpen tuning policy (Default=0, or no tuning policy set)",
"int");
// TODO:(LYM) change back to 0 when TF32 is fully supported
inflags.AddInputFlag("math_type", 'M', "1", "math type of compute (Default=1)", "int");

return 0;
}
Expand Down Expand Up @@ -1222,6 +1233,14 @@ int ConvDriver<Tgpu, Tref>::SetConvDescriptorFromCmdLineArgs()
miopenSetTransposeConvNdOutputPadding(convDesc, spatial_dim, trans_output_pads.data());
}

auto math_type_ = inflags.GetValueInt("math_type");
if(math_type_ < miopenMathDefault || math_type_ > miopenMathPedantic)
{
std::cout << "Invalid math_type value: " << math_type_ << std::endl;
exit(0); // NOLINT (concurrency-mt-unsafe)
}
miopenSetConvolutionAttribute(convDesc, MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE, math_type_);

return miopenStatusSuccess;
}

Expand Down
11 changes: 11 additions & 0 deletions projects/miopen/include/miopen/miopen.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,14 @@ typedef enum
miopenStatusVersionMismatch = 10, /*!< Version mismatch of the supplied binary data argment. */
} miopenStatus_t;

typedef enum
{
// TODO:(LYM) temporary use Pedantic as default until TF32 is fully supported
miopenMathDefault = 0, /*!< Use TF32 if possible */
miopenMathPedantic =
1, /*!< Default MathType. Strict IEEE compliance. Don't allow datatype down conversion. */
} miopenMathType_t;

#ifdef MIOPEN_BETA_API
typedef enum
{
Expand Down Expand Up @@ -639,6 +647,9 @@ typedef enum
#else
// miopenReserved1 = 2,
#endif
// TODO:(LYM) temporarily use Pedantic as default until TF32 is fully supported
MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE =
3, /*!< refer to miopenMathType_t,default is miopenMathPedantic >*/
} miopenConvolutionAttrib_t;

/*! @ingroup convolutions
Expand Down
3 changes: 3 additions & 0 deletions projects/miopen/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,9 @@ if(MIOPEN_USE_COMPOSABLEKERNEL)
# Use the aliased targets when we pull CK from /opt/rocm or other place on disc.
set(MIOPEN_CK_LINK_FLAGS composable_kernel::device_conv_operations hip::host)
endif()
if(GPU_TARGETS MATCHES "gfx942" OR GPU_TARGETS MATCHES "gfx950")
target_compile_definitions(MIOpen PRIVATE CK_ENABLE_TF32)
endif()
endif()

if(WIN32)
Expand Down
27 changes: 24 additions & 3 deletions projects/miopen/src/conv/problem_description.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <miopen/datatype.hpp>
#include <miopen/execution_context.hpp>
#include <miopen/tensor_layout.hpp>
#include <miopen/solver/static_ck_common.hpp>

#include <sstream>

Expand Down Expand Up @@ -215,9 +216,14 @@ void ProblemDescription::MakeNetworkConfig(std::string& conf_key) const
ss << 'x' << GetWeightsLayout();
ss << 'x' << GetOutLayout();
}
ss << 'x' << EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType());
const auto data_type =
EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType());
ss << 'x' << data_type;

std::ostringstream optional;
if(data_type == "FP32" && UseTF32())
optional << "TF32" << 'x';

if(const auto ct = GetInCastType())
optional << "ci" << GetDataTypeName(*ct);
if(const auto ct = GetWeightsCastType())
Expand Down Expand Up @@ -271,10 +277,12 @@ void ProblemDescription::Serialize(std::ostream& stream) const
stream << sep << GetWeightsLayout();
stream << sep << GetOutLayout();
}
stream << sep << EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType());
// clang-format on
const auto data_type =
EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType());
stream << sep << data_type;
stream << sep << GetDirectionStr();

// clang-format on
// New performance config entries shall come into variable/optional part of db key.
// This is to support backward compatibility with previous versions of databases.
std::ostringstream optional;
Expand All @@ -290,6 +298,10 @@ void ProblemDescription::Serialize(std::ostream& stream) const
if(const auto ct = GetOutCastType())
optional << "_co" << GetDataTypeName(*ct);

// cx indicates compute datatype
if(data_type == "FP32" && UseTF32())
optional << "_cxTF32";

SerializeStrides(optional, in, out, weights, sep);
}
if(!optional.str().empty())
Expand Down Expand Up @@ -350,6 +362,15 @@ void ProblemDescription::SetupFloats(ExecutionContext& ctx) const
<< "x" << GetDataTypeName(GetOutDataType()));
}

void ProblemDescription::SetupComputeType(const ExecutionContext& ctx) const
{
if(miopen::solver::static_ck::IsTF32Supported(ctx.GetStream().GetDeviceName()) &&
Comment thread
yingluAMD marked this conversation as resolved.
Outdated
conv.EnableTF32())
{
use_tf32 = true;
}
}

std::string ProblemDescription::ComputeLayout(const TensorDescriptor& td) const
{
return td.GetLayout_str();
Expand Down
3 changes: 2 additions & 1 deletion projects/miopen/src/conv/solver_finders.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,8 @@ std::vector<Solution> EvaluateInvokers(const Handle& handle,

MIOPEN_THROW_IF(elapsed <= 0, "Invalid elapsed time detected in EvaluateInvokers");

MIOPEN_LOG_I(sol << ": " << elapsed << (elapsed < best ? " < " : " >= ") << best);
MIOPEN_LOG_I("solution(current vs best):" << sol << ": " << elapsed
<< (elapsed < best ? " < " : " >= ") << best);
if(elapsed < best)
{
best = elapsed;
Expand Down
24 changes: 24 additions & 0 deletions projects/miopen/src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,16 @@ std::size_t ConvolutionDescriptor::GetWorkSpaceSize(ExecutionContext ctx,
return workspace_size;
}

bool ConvolutionDescriptor::EnableTF32() const
{
// TODO:(LYM) change back to && when TF32 is fully supported
if((miopen::EnvEnableTF32() ||
Comment thread
yingluAMD marked this conversation as resolved.
(static_cast<miopenMathType_t>(attribute.Get(MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE)) ==
miopenMathDefault)))
return true;
return false;
}
Comment thread
yingluAMD marked this conversation as resolved.

std::ostream& operator<<(std::ostream& stream, const ConvolutionDescriptor& c)
{
stream << "conv" << c.spatialDim << "d, ";
Expand Down Expand Up @@ -540,6 +550,18 @@ void ConvolutionAttribute::Set(miopenConvolutionAttrib_t attr, int value)
}
fp8rounding_mode.rounding_mode = rounding_mode;
}
else if(attr == MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE)
{
const auto math_type_ = static_cast<miopenMathType_t>(value);
if(math_type_ != miopenMathDefault && math_type_ != miopenMathPedantic)
{
MIOPEN_THROW(miopenStatusBadParm,
"[Set conv attribute] Error: Attempt to set invalid value for "
"MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE: " +
std::to_string(value));
}
math_type.value = math_type_;
}
else
{
MIOPEN_THROW(miopenStatusBadParm,
Expand All @@ -556,6 +578,8 @@ int ConvolutionAttribute::Get(miopenConvolutionAttrib_t attr) const
return static_cast<int>(fp8rounding_mode.rounding_mode);
else if(attr == MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC)
return deterministic.value;
else if(attr == MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE)
return math_type.value;
MIOPEN_THROW(miopenStatusBadParm,
"[Get conv attribute] Error: Attribute [" +
std::to_string(static_cast<int>(attr)) + "] does not exist.");
Expand Down
3 changes: 3 additions & 0 deletions projects/miopen/src/convolution_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ static inline auto MakeFwdCtxAndProblem(miopenHandle_t handle,

auto ctx = ExecutionContext{&miopen::deref(handle)};
problem.SetupFloats(ctx);
problem.SetupComputeType(ctx);
return std::make_tuple(std::move(ctx), std::move(problem));
}

Expand All @@ -86,6 +87,7 @@ static inline auto MakeBwdCtxAndProblem(miopenHandle_t handle,

auto ctx = ExecutionContext{&miopen::deref(handle)};
problem.SetupFloats(ctx);
problem.SetupComputeType(ctx);
return std::make_tuple(std::move(ctx), std::move(problem));
}

Expand All @@ -111,6 +113,7 @@ static inline auto MakeWrWCtxAndProblem(miopenHandle_t handle,

auto ctx = ExecutionContext{&miopen::deref(handle)};
problem.SetupFloats(ctx);
problem.SetupComputeType(ctx);
return std::make_tuple(std::move(ctx), std::move(problem));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
return GetInCastType() || GetWeightsCastType() || GetOutCastType();
}

bool UseTF32() const { return use_tf32; }

// To be used in Solvers that do not implement ALT FP16 kernels.
// Those Solvers must be non-applicable for gfx90a when this function returns true.
bool IsGfx90aFp16altRequired() const
Expand Down Expand Up @@ -395,6 +397,8 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
self.GetInDataType(), self.GetWeightsDataType(), self.GetOutDataType());
f(data_type, "data_type");
f(self.GetDirectionStr(), "direction");
if(data_type == "FP32" && self.UseTF32())
f("TF32", "compute_datatype");
}

template <class Self, class Visitor>
Expand All @@ -406,6 +410,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
}

void SetupFloats(ExecutionContext& ctx) const;
void SetupComputeType(const ExecutionContext& ctx) const;

private:
std::string ComputeLayout(const TensorDescriptor& td) const;
Expand All @@ -425,6 +430,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
Scalar alpha = Scalar(1.0);
Scalar beta = Scalar(0.0);
miopenAlphaBetaCase_t alpha_beta_case = DEFAULT;
mutable bool use_tf32 = false;
};

} // namespace conv
Expand Down
Loading