Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
5f7f803
feature:tf32:demostrate tf32 in conv3d on MI30X platform
yingluAMD Sep 1, 2025
d7d5967
add env&&config joint control
yingluAMD Sep 12, 2025
70be02e
allow fallback to fp32 if one kernel don't support tf32
yingluAMD Sep 15, 2025
39f8bcd
fix unused args
yingluAMD Sep 30, 2025
1b5e886
fix conv test fail
yingluAMD Sep 30, 2025
74201e2
Merge branch 'develop' into conv_tf32_poc
yingluAMD Oct 9, 2025
687f4bf
fix conv naive kernel name error
yingluAMD Oct 10, 2025
625c818
enhance conv test float threshold
yingluAMD Oct 11, 2025
3726c7e
fix clang-format fail
yingluAMD Oct 14, 2025
82c6598
fix clang-format fail
yingluAMD Oct 14, 2025
9b5d9e6
tf32 use same threshold as fp16
yingluAMD Oct 15, 2025
8868b5c
fix another threshold issue
yingluAMD Oct 15, 2025
c7a47ee
try CK bump to fix db_sync test fail
yingluAMD Oct 16, 2025
ae02a18
Merge branch 'develop' into conv_tf32_poc
yingluAMD Oct 16, 2025
92d5ebe
Merge branch 'develop' into conv_tf32_poc
yingluAMD Oct 16, 2025
12b22cd
set fp32 as default rather than tf32 to fix db_sync fail
yingluAMD Oct 17, 2025
a0ff6dc
fix clang-format fail
yingluAMD Oct 17, 2025
c5a5c88
separate tf32 as new datatype in db key
yingluAMD Oct 17, 2025
a2dbed7
Revert "set fp32 as default rather than tf32 to fix db_sync fail"
yingluAMD Oct 17, 2025
e60f994
change problem tf32 set to math_type
yingluAMD Oct 17, 2025
23a9b39
Revert "Revert "set fp32 as default rather than tf32 to fix db_sync f…
yingluAMD Oct 20, 2025
81fbc09
separate tf32 as computedattype key
yingluAMD Oct 20, 2025
462c581
set tf32 fallback in solver level
yingluAMD Oct 22, 2025
cb87f32
fix clang-format fail
yingluAMD Oct 22, 2025
d1b7dcd
Merge branch 'develop' into conv_tf32_poc
yingluAMD Oct 23, 2025
80617ce
fix merge fail
yingluAMD Oct 23, 2025
c134d2b
fix clang-format fail
yingluAMD Oct 23, 2025
ee4319f
resolve review comments
yingluAMD Oct 24, 2025
7c202ea
Revert "enhance conv test float threshold"
yingluAMD Oct 24, 2025
88fc1a0
Merge branch 'develop' into conv_tf32_poc
yingluAMD Oct 27, 2025
e04cc32
fix clang-format fail
yingluAMD Oct 27, 2025
4ddca21
test:add tf32 at groupconv tests
yingluAMD Oct 27, 2025
6737ee3
fix GTest name checker:Name fail
yingluAMD Oct 27, 2025
a1ce410
add tf32 test SKIP on non-gfx942 devices
yingluAMD Oct 28, 2025
5243f30
add tf32 in jenkins
yingluAMD Oct 28, 2025
1604476
add Conv3DGroupFwd solver test
yingluAMD Oct 29, 2025
2eb8034
add SetupComputeType in tests
yingluAMD Oct 29, 2025
2f62649
Merge branch 'develop' into conv_tf32_poc
yingluAMD Nov 4, 2025
c90cb06
Merge branch 'develop' into conv_tf32_poc
yingluAMD Nov 14, 2025
12e13a9
bug:add tf32 in test filter
yingluAMD Nov 14, 2025
ffffa9a
fix:delete new unvalidated case
yingluAMD Nov 14, 2025
f595904
Merge branch 'develop' into conv_tf32_poc
yingluAMD Nov 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions projects/miopen/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,10 @@ pipeline {
name: "DATATYPE_FP32",
defaultValue: true,
description: "")
booleanParam(
name: "DATATYPE_TF32",
defaultValue: true,
description: "")
booleanParam(
name: "DATATYPE_FP16",
defaultValue: true,
Expand Down Expand Up @@ -199,6 +203,7 @@ pipeline {
Bf16_flags = " -DMIOPEN_TEST_BFLOAT16=On"
Int8_flags = " -DMIOPEN_TEST_INT8=On"
Full_test = " -DMIOPEN_TEST_ALL=On"
Tf32_flags = " -DMIOPEN_TEST_TF32=On"

gfx908_flags = " -DMIOPEN_INSTALL_GPU_DATABASES=gfx908"
gfx90a_flags = " -DMIOPEN_INSTALL_GPU_DATABASES=gfx90a"
Expand Down Expand Up @@ -526,6 +531,24 @@ pipeline {
runBuildAndSingleGtestJob(Full_test + gfx1101_flags, Build_timeout_minutes)
}
}
stage('TF32 Hip All gfx942') {
when {
beforeAgent true
expression { params.TARGET_GFX942 && params.DATATYPE_TF32 }
}
options {
retry(2)
}
agent{ label rocmnode("gfx942") }
steps{
runBuildAndSingleGtestJob(Full_test + Tf32_flags + gfx942_flags, Build_timeout_minutes)
}
post {
always {
cleanWs()
}
}
}
}
}
stage("Nightly Tests") {
Expand Down
18 changes: 18 additions & 0 deletions projects/miopen/driver/conv_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,12 @@ class ConvDriver : public Driver
constexpr bool is_bfp8 = std::is_same<Tgpu, bfloat8_fnuz>::value;
if(is_bfp8 || is_fp8 || TensorsCasted())
tolerance *= 37.0;

{ // tf32 has same mantissa length as fp16
auto math_type_ = inflags.GetValueInt("math_type");
if(std::is_same_v<Tgpu, float> && (miopen::EnvEnableTF32() || math_type_))
tolerance = 8.2e-3;
}
return tolerance;
}

Expand Down Expand Up @@ -868,6 +874,8 @@ int ConvDriver<Tgpu, Tref>::GetandSetData()
warmupConvDesc,
static_cast<int>(miopenConvolutionFindModeNormal)); // Repeat via hidden API.
miopenSetConvolutionGroupCount(warmupConvDesc, group_count);
miopenSetConvolutionAttribute(
warmupConvDesc, MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE, inflags.GetValueInt("math_type"));

int warmup_out_len_size = miopen::deref(warmupInputTensor).GetNumDims();
std::vector<int> warmup_out_len(warmup_out_len_size);
Expand Down Expand Up @@ -1022,6 +1030,8 @@ int ConvDriver<Tgpu, Tref>::AddCmdLineArgs()
"0",
"MIOpen tuning policy (Default=0, or no tuning policy set)",
"int");
// TODO:(LYM) change back to 0
inflags.AddInputFlag("math_type", 'M', "1", "math type of compute (Default=1)", "int");

return 0;
}
Expand Down Expand Up @@ -1226,6 +1236,14 @@ int ConvDriver<Tgpu, Tref>::SetConvDescriptorFromCmdLineArgs()
miopenSetTransposeConvNdOutputPadding(convDesc, spatial_dim, trans_output_pads.data());
}

auto math_type_ = inflags.GetValueInt("math_type");
if(math_type_ < miopenMathDefault || math_type_ > miopenMathPedantic)
{
std::cout << "Invalid math_type value: " << math_type_ << std::endl;
exit(0); // NOLINT (concurrency-mt-unsafe)
}
miopenSetConvolutionAttribute(convDesc, MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE, math_type_);

return miopenStatusSuccess;
}

Expand Down
11 changes: 11 additions & 0 deletions projects/miopen/include/miopen/miopen.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,14 @@ typedef enum
miopenStatusVersionMismatch = 10, /*!< Version mismatch of the supplied binary data argment. */
} miopenStatus_t;

typedef enum
{
// TODO:(LYM) temporary use Pedantic as default until TF32 is fully supported
miopenMathDefault = 0, /*!< Use TF32 if possible */
miopenMathPedantic =
1, /*!< Default MathType. Strict IEEE compliance. Don't allow datatype down conversion. */
} miopenMathType_t;

#ifdef MIOPEN_BETA_API
typedef enum
{
Expand Down Expand Up @@ -639,6 +647,9 @@ typedef enum
#else
// miopenReserved1 = 2,
#endif
// TODO:(LYM) temporarily use Pedantic as default until TF32 is fully supported
MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE =
3, /*!< refer to miopenMathType_t,default is miopenMathPedantic >*/
} miopenConvolutionAttrib_t;

/*! @ingroup convolutions
Expand Down
25 changes: 22 additions & 3 deletions projects/miopen/src/conv/problem_description.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,14 @@ void ProblemDescription::MakeNetworkConfig(std::string& conf_key) const
ss << 'x' << GetWeightsLayout();
ss << 'x' << GetOutLayout();
}
ss << 'x' << EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType());
const auto data_type =
EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType());
ss << 'x' << data_type;

std::ostringstream optional;
if(data_type == "FP32" && UseTF32())
optional << "TF32" << 'x';

if(const auto ct = GetInCastType())
optional << "ci" << GetDataTypeName(*ct);
if(const auto ct = GetWeightsCastType())
Expand Down Expand Up @@ -239,10 +244,12 @@ void ProblemDescription::Serialize(std::ostream& stream) const
stream << sep << GetWeightsLayout();
stream << sep << GetOutLayout();
}
stream << sep << EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType());
// clang-format on
const auto data_type =
EncodeDataTypesForKey(GetInDataType(), GetWeightsDataType(), GetOutDataType());
stream << sep << data_type;
stream << sep << GetDirectionStr();

// clang-format on
// New performance config entries shall come into variable/optional part of db key.
// This is to support backward compatibility with previous versions of databases.
std::ostringstream optional;
Expand All @@ -257,6 +264,10 @@ void ProblemDescription::Serialize(std::ostream& stream) const
optional << "_cw" << GetDataTypeName(*ct);
if(const auto ct = GetOutCastType())
optional << "_co" << GetDataTypeName(*ct);

// cx indicates compute datatype
if(data_type == "FP32" && UseTF32())
optional << "_cxTF32";
}
if(!optional.str().empty())
{
Expand Down Expand Up @@ -316,6 +327,14 @@ void ProblemDescription::SetupFloats(ExecutionContext& ctx) const
<< "x" << GetDataTypeName(GetOutDataType()));
}

void ProblemDescription::SetupComputeType(const ExecutionContext& ctx) const
{
if(ctx.GetStream().GetDeviceName() == "gfx942" && conv.EnableTF32())
{
use_tf32 = true;
}
Comment thread
yingluAMD marked this conversation as resolved.
}

std::string ProblemDescription::ComputeLayout(const TensorDescriptor& td) const
{
return td.GetLayout_str();
Expand Down
3 changes: 2 additions & 1 deletion projects/miopen/src/conv/solver_finders.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,8 @@ std::vector<Solution> EvaluateInvokers(const Handle& handle,

MIOPEN_THROW_IF(elapsed <= 0, "Invalid elapsed time detected in EvaluateInvokers");

MIOPEN_LOG_I(sol << ": " << elapsed << (elapsed < best ? " < " : " >= ") << best);
MIOPEN_LOG_I("solution(current vs best):" << sol << ": " << elapsed
<< (elapsed < best ? " < " : " >= ") << best);
if(elapsed < best)
{
best = elapsed;
Expand Down
26 changes: 26 additions & 0 deletions projects/miopen/src/convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,18 @@ std::size_t ConvolutionDescriptor::GetWorkSpaceSize(ExecutionContext ctx,
return workspace_size;
}

bool ConvolutionDescriptor::EnableTF32() const
{
/* true only when both EnvEnableTF32() and (MathType==Default) are true. */
// temporarily disable TF32 until tf32 feature are fully complete validated with database.
// TODO:(LYM) change back to &&
if((miopen::EnvEnableTF32() ||
(static_cast<miopenMathType_t>(attribute.Get(MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE)) ==
miopenMathDefault)))
return true;
return false;
}

std::ostream& operator<<(std::ostream& stream, const ConvolutionDescriptor& c)
{
stream << "conv" << c.spatialDim << "d, ";
Expand Down Expand Up @@ -540,6 +552,18 @@ void ConvolutionAttribute::Set(miopenConvolutionAttrib_t attr, int value)
}
fp8rounding_mode.rounding_mode = rounding_mode;
}
else if(attr == MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE)
{
const auto math_type_ = static_cast<miopenMathType_t>(value);
if(math_type_ != miopenMathDefault && math_type_ != miopenMathPedantic)
{
MIOPEN_THROW(miopenStatusBadParm,
"[Set conv attribute] Error: Attempt to set invalid value for "
"MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE: " +
std::to_string(value));
}
math_type.value = math_type_;
}
else
{
MIOPEN_THROW(miopenStatusBadParm,
Expand All @@ -556,6 +580,8 @@ int ConvolutionAttribute::Get(miopenConvolutionAttrib_t attr) const
return static_cast<int>(fp8rounding_mode.rounding_mode);
else if(attr == MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC)
return deterministic.value;
else if(attr == MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE)
return math_type.value;
MIOPEN_THROW(miopenStatusBadParm,
"[Get conv attribute] Error: Attribute [" +
std::to_string(static_cast<int>(attr)) + "] does not exist.");
Expand Down
3 changes: 3 additions & 0 deletions projects/miopen/src/convolution_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ static inline auto MakeFwdCtxAndProblem(miopenHandle_t handle,

auto ctx = ExecutionContext{&miopen::deref(handle)};
problem.SetupFloats(ctx);
problem.SetupComputeType(ctx);
return std::make_tuple(std::move(ctx), std::move(problem));
}

Expand All @@ -86,6 +87,7 @@ static inline auto MakeBwdCtxAndProblem(miopenHandle_t handle,

auto ctx = ExecutionContext{&miopen::deref(handle)};
problem.SetupFloats(ctx);
problem.SetupComputeType(ctx);
return std::make_tuple(std::move(ctx), std::move(problem));
}

Expand All @@ -111,6 +113,7 @@ static inline auto MakeWrWCtxAndProblem(miopenHandle_t handle,

auto ctx = ExecutionContext{&miopen::deref(handle)};
problem.SetupFloats(ctx);
problem.SetupComputeType(ctx);
return std::make_tuple(std::move(ctx), std::move(problem));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
return GetInCastType() || GetWeightsCastType() || GetOutCastType();
}

bool UseTF32() const { return use_tf32; }

// To be used in Solvers that do not implement ALT FP16 kernels.
// Those Solvers must be non-applicable for gfx90a when this function returns true.
bool IsGfx90aFp16altRequired() const
Expand Down Expand Up @@ -396,6 +398,8 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
self.GetInDataType(), self.GetWeightsDataType(), self.GetOutDataType());
f(data_type, "data_type");
f(self.GetDirectionStr(), "direction");
if(data_type == "FP32" && self.UseTF32())
f("TF32", "compute_datatype");
}

template <class Self, class Visitor>
Expand All @@ -407,6 +411,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
}

void SetupFloats(ExecutionContext& ctx) const;
void SetupComputeType(const ExecutionContext& ctx) const;

private:
std::string ComputeLayout(const TensorDescriptor& td) const;
Expand All @@ -426,6 +431,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase
Scalar alpha = Scalar(1.0);
Scalar beta = Scalar(0.0);
miopenAlphaBetaCase_t alpha_beta_case = DEFAULT;
mutable bool use_tf32 = false;
Comment thread
yingluAMD marked this conversation as resolved.
};

} // namespace conv
Expand Down
10 changes: 6 additions & 4 deletions projects/miopen/src/include/miopen/conv/solvers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4524,13 +4524,15 @@ struct PerformanceConfigHipImplicitGemm3DGroupFwdXdlops
MIOPEN_INTERNALS_EXPORT bool IsValid(const miopen::conv::ProblemDescription&) const;
MIOPEN_INTERNALS_EXPORT bool
operator==(const PerformanceConfigHipImplicitGemm3DGroupFwdXdlops& other) const;
bool UseTF32() const { return use_tf32; }

private:
template <typename DataType>
void Init(const miopen::conv::ProblemDescription&);
template <typename DataType>
template <typename DataType, typename ComputeType = DataType>
bool Init(const miopen::conv::ProblemDescription&);
template <typename DataType, typename ComputeType = DataType>
bool CheckIsSupportCKArgs(const miopen::conv::ProblemDescription&) const;
void InitValidKernels(const miopen::conv::ProblemDescription& problem);
mutable bool use_tf32 = false;
Comment thread
BrianHarrisonAMD marked this conversation as resolved.
};

struct ConvHipImplicitGemm3DGroupFwdXdlops final
Expand Down Expand Up @@ -4567,7 +4569,7 @@ struct ConvHipImplicitGemm3DGroupFwdXdlops final
bool MayNeedWorkspace() const override { return true; }

private:
template <typename DataType>
template <typename DataType, typename ComputeType = DataType>
bool CheckCKApplicability(const miopen::conv::ProblemDescription&) const;
};

Expand Down
21 changes: 21 additions & 0 deletions projects/miopen/src/include/miopen/convolution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,15 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_CONVOLUTION_DETERMINISTIC)
MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE)
MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_DEBUG_CONVOLUTION_ATTRIB_FP8_ROUNDING_SEED)

// disable TF32 by default temporarily until we fully complete this feature.
// TODO:(LYM) change back
MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TF32_OVERRIDE, 0);
MIOPEN_DECLARE_ENV_VAR_BOOL(NVIDIA_TF32_OVERRIDE, 0);

namespace miopen {

MIOPEN_INTERNALS_EXPORT bool EnvEnableTF32();

namespace conv {
struct ProblemDescription;
} // namespace conv
Expand Down Expand Up @@ -132,6 +139,17 @@ struct MIOPEN_INTERNALS_EXPORT ConvolutionAttribute
}
} deterministic;

class MathType
{
// temporary set default to pedantic until we fully complete this feature.
// TODO:(LYM) change back
miopenMathType_t value = miopenMathPedantic;
friend struct ConvolutionAttribute;

public:
inline int Get() const { return value; }
} math_type;

/// Tri-state attribute values:
/// * -1: Default (attribute-specific).
/// * 0: Disabled/Yes.
Expand Down Expand Up @@ -350,6 +368,7 @@ struct MIOPEN_INTERNALS_EXPORT ConvolutionDescriptor : miopenConvolutionDescript
Data_t dw,
Data_t workSpace,
std::size_t workSpaceSize) const;
miopenMathType_t GetMathType() const;

std::size_t spatialDim;
miopenConvolutionMode_t mode;
Expand All @@ -373,6 +392,8 @@ struct MIOPEN_INTERNALS_EXPORT ConvolutionDescriptor : miopenConvolutionDescript
std::size_t GetSolutionCountFallback(const ExecutionContext& ctx,
const conv::ProblemDescription& problem) const;

bool EnableTF32() const;

friend void to_json(nlohmann::json& json, const ConvolutionDescriptor& conv);
friend void from_json(const nlohmann::json& json, ConvolutionDescriptor& conv);

Expand Down
1 change: 1 addition & 0 deletions projects/miopen/src/include/miopen/fusion/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct FusionContext : ExecutionContext
{
auto ctx = ExecutionContext{*this};
conv_problem.SetupFloats(ctx);
conv_problem.SetupComputeType(ctx);
return ctx;
}
};
Expand Down
Loading