Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
49a0bbb
Added first attempt at fdeep versions of 3d ktn models.
amd-bartgips Jul 25, 2025
d015559
Added co-pilot generated new model class for selecting the best candi…
amd-bartgips Jul 25, 2025
289a1f2
performed dot product directly in cpp code, removed candidate_selecto…
amd-bartgips Jul 25, 2025
2c155cc
Added the machinery to use the new candidate selection heuristics for…
amd-bartgips Jul 25, 2025
10722a3
added split_k functionality
amd-bartgips Jul 25, 2025
fe19eb9
refactored by moving some helper functions outside the main function
amd-bartgips Jul 25, 2025
2e000f3
improved loading of metadata for candidate selection model
amd-bartgips Jul 28, 2025
dbf021b
split off own metadata class to keep it distinct from legacy version
amd-bartgips Jul 28, 2025
11ec8d5
Added new versions of CS models (+new metadata)
amd-bartgips Jul 28, 2025
05871e3
Added methods for preprocessing input and kernel_convigs for CS model…
amd-bartgips Jul 28, 2025
b007d9e
fixed naming of CandidateSelectionMetadata and Ptrs variables
amd-bartgips Jul 28, 2025
7574ca7
Added new model and metadata class to header file
amd-bartgips Jul 28, 2025
e795096
removed unused function
amd-bartgips Jul 28, 2025
c0a95a8
removed duplicate definition of CandidateSelectionMetadata
amd-bartgips Jul 28, 2025
b89cae2
* altered SelectBestCandidate to avoid errors.
amd-bartgips Jul 28, 2025
b09b18f
removed unused function
amd-bartgips Jul 28, 2025
cb846ec
Build now works.
amd-bartgips Jul 28, 2025
efeece1
moved new candidateSelection code to its own files (and sub namespace)
amd-bartgips Jul 29, 2025
33244fc
refactored general 3D conv kernel tuning functions into their own fil…
amd-bartgips Jul 29, 2025
25b25a8
Cleaned up unused include
amd-bartgips Jul 29, 2025
279b87a
removed superfluous includes
amd-bartgips Jul 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
411 changes: 411 additions & 0 deletions src/conv/heuristics/ai_candidate_selection.cpp

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions src/conv/heuristics/ai_heuristics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@
*
*******************************************************************************/

// This file implements AI-based heuristics for convolution solver and kernel tuning.
// Sections:
// 1. Includes
// 2. Common utilities
// 3. Immediate mode AI models
// 4. Kernel tuning AI models (sequential prediction of kernel parameters)

#include <miopen/conv/heuristics/ai_heuristics.hpp>
#if MIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK || MIOPEN_ENABLE_AI_KERNEL_TUNING
#include <fdeep/fdeep.hpp>
Expand Down
185 changes: 185 additions & 0 deletions src/conv/heuristics/ai_kernel_tuning_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************
* general AI-related code for kernel tuning and heuristics. To be called in the
* solver-specific code.
*******************************************************************************/
#include <miopen/conv/heuristics/ai_kernel_tuning_utils.hpp>
#include <sstream>
#include <algorithm>
#include <miopen/conv/heuristics/ai_candidate_selection.hpp>
#include <miopen/logger.hpp>

#if MIOPEN_ENABLE_AI_KERNEL_TUNING
namespace miopen {
namespace solver {
namespace conv {

// Helper: Extract 3D convolution features
std::vector<float>
GetFeatures3D(const ProblemDescription& problem, int max_cu, const std::string& arch)
{
std::vector<float> features;
features.push_back(static_cast<float>(ProblemInterpreter::GetBatchN(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetInputChannelC(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetOutputChannelK(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetGroupCountG(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetInputDepthDi(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetInputHeightHi(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetInputWidthWi(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetOutputDepthDo(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetOutputHeightHo(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetOutputWidthWo(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetFilterDepthZ(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetFilterHeightY(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetFilterWidthX(problem)));
features.push_back(
static_cast<float>(ProblemInterpreter::GetAdjustedConvolutionStrideD(problem)));
features.push_back(
static_cast<float>(ProblemInterpreter::GetAdjustedConvolutionStrideH(problem)));
features.push_back(
static_cast<float>(ProblemInterpreter::GetAdjustedConvolutionStrideW(problem)));
features.push_back(
static_cast<float>(ProblemInterpreter::GetAdjustedConvolutionDilationD(problem)));
features.push_back(
static_cast<float>(ProblemInterpreter::GetAdjustedConvolutionDilationH(problem)));
features.push_back(
static_cast<float>(ProblemInterpreter::GetAdjustedConvolutionDilationW(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetInputLeftPadD(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetInputLeftPadH(problem)));
features.push_back(static_cast<float>(ProblemInterpreter::GetInputLeftPadW(problem)));
features.push_back(static_cast<float>(max_cu));
features.push_back(static_cast<float>(problem.GetInDataType()));
features.push_back(problem.IsLayoutNHWC() ? 1.0f : 0.0f);
return features;
}

// Helper: Tokenize kernel string
std::vector<std::string> TokenizeKernel(const std::string& kernel)
{
std::vector<std::string> tokens;
std::stringstream ss(kernel);
std::string token;
while(std::getline(ss, token, '_'))
{
if(!token.empty())
tokens.push_back(token);
}
return tokens;
}

// Helper: Filter kernels by type and collect indexes/tokens
void FilterHeuristicKernels(const std::string& type,
const std::vector<std::string>& valid_kernels,
std::vector<int>& indexes,
std::vector<std::vector<std::string>>& kernels)
{
indexes.clear();
kernels.clear();
for(std::size_t i = 0; i < valid_kernels.size(); ++i)
{
auto tokens = TokenizeKernel(valid_kernels[i]);
if(!tokens.empty() && tokens[0] == type)
{
indexes.push_back(i);
kernels.push_back(tokens);
}
}
}

// Helper: Generate split_k values (powers of two)
std::vector<int> GenerateSplitK(int max_split_k)
{
std::vector<int> split_ks;
for(int k = 1; k <= max_split_k; k *= 2)
split_ks.push_back(k);
return split_ks;
}

// Helper: Expand kernel params with split_k and keep mapping
std::pair<std::vector<std::vector<std::string>>, std::vector<std::pair<int, int>>>
ExpandKernelParamsWithSplitK(const std::vector<std::vector<std::string>>& kernels,
const std::vector<int>& indexes,
const std::vector<int>& split_ks)
{
std::vector<std::vector<std::string>> expanded;
std::vector<std::pair<int, int>> mapping;
for(size_t i = 0; i < kernels.size(); ++i)
{
for(int split_k : split_ks)
{
auto candidate = kernels[i];
candidate.push_back(std::to_string(split_k));
expanded.push_back(candidate);
mapping.emplace_back(indexes[i], split_k);
}
}
return {expanded, mapping};
}

// Main: Run AI parameter prediction model

// Explicit template instantiations for common types
template bool RunParameterPredictionModel<float>(
const ExecutionContext&,
const ProblemDescription&,
std::vector<std::string>&,
int&,
int&,
std::string&,
std::function<std::vector<std::string>(const ProblemDescription&)>,
std::string);
template bool RunParameterPredictionModel<ck::half_t>(
const ExecutionContext&,
const ProblemDescription&,
std::vector<std::string>&,
int&,
int&,
std::string&,
std::function<std::vector<std::string>(const ProblemDescription&)>,
std::string);
template bool RunParameterPredictionModel<int8_t>(
const ExecutionContext&,
const ProblemDescription&,
std::vector<std::string>&,
int&,
int&,
std::string&,
std::function<std::vector<std::string>(const ProblemDescription&)>,
std::string);
template bool RunParameterPredictionModel<ck::bhalf_t>(
const ExecutionContext&,
const ProblemDescription&,
std::vector<std::string>&,
int&,
int&,
std::string&,
std::function<std::vector<std::string>(const ProblemDescription&)>,
std::string);

} // namespace conv
} // namespace solver
} // namespace miopen
#endif // MIOPEN_ENABLE_AI_KERNEL_TUNING
76 changes: 76 additions & 0 deletions src/include/miopen/conv/heuristics/ai_candidate_selection.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/
#pragma once

#include <vector>
#include <string>
#include <memory>
#include <optional>

namespace miopen {
namespace ai {
namespace tuning {
namespace candidate_selection {

class CandidateSelectionMetadata
{
public:
CandidateSelectionMetadata(const std::string& arch, const std::string& solver);
size_t GetInputParamIndex(const std::string& name) const;
size_t GetOutputParamIndex(const std::string& name) const;
std::optional<std::string> GetInputConstant(const std::string& name) const;
std::optional<std::string> GetOutputConstant(const std::string& name) const;
std::vector<size_t> GetConstantInputIndices() const;
std::vector<size_t> GetConstantOutputIndices() const;
std::vector<std::string> input_params;
std::vector<std::string> output_params;
};

class CandidateSelectionModel
{
public:
CandidateSelectionMetadata metadata;
CandidateSelectionModel(const std::string& arch, const std::string& solver);
~CandidateSelectionModel();
// ...add other public methods as needed...
};

std::shared_ptr<CandidateSelectionModel> GetCandidateSelectionModel(const std::string& arch,
const std::string& solver);

std::vector<std::vector<float>>
EncodeKernelParams(const std::vector<std::vector<std::string>>& valid_kernel_params,
const CandidateSelectionMetadata& metadata);

int ModelSelectBestCandidate(const std::string& arch,
const std::string& solver,
const std::vector<float>& features,
const std::vector<std::vector<std::string>>& valid_kernel_params);

} // namespace candidate_selection
} // namespace tuning
} // namespace ai
} // namespace miopen
109 changes: 109 additions & 0 deletions src/include/miopen/conv/heuristics/ai_kernel_tuning_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*******************************************************************************
*
* MIT License
*
* Copyright (c) 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*******************************************************************************/

#pragma once

#include <vector>
#include <string>
#include <miopen/conv/problem_description.hpp>
#include <miopen/conv/heuristics/ai_heuristics.hpp>
#include <miopen/conv/heuristics/ai_candidate_selection.hpp>
#include <miopen/execution_context.hpp>
#include <miopen/solver/implicitgemm_ck_util.hpp>
#include <miopen/solver/implicitgemm_util.hpp>

namespace miopen {
namespace solver {
namespace conv {

std::vector<float>
GetFeatures3D(const miopen::conv::ProblemDescription&, int max_cu, const std::string& arch);
std::vector<std::string> TokenizeKernel(const std::string& kernel);
void FilterHeuristicKernels(const std::string& type,
const std::vector<std::string>& valid_kernels,
std::vector<int>& indexes,
std::vector<std::vector<std::string>>& kernels);
std::vector<int> GenerateSplitK(int max_split_k);
std::pair<std::vector<std::vector<std::string>>, std::vector<std::pair<int, int>>>
ExpandKernelParamsWithSplitK(const std::vector<std::vector<std::string>>& kernels,
const std::vector<int>& indexes,
const std::vector<int>& split_ks);

template <typename DataType>
bool RunParameterPredictionModel(
const miopen::ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem,
std::vector<std::string>& valid_kernels,
int& index,
int& split_k,
std::string& kernel_id,
std::function<std::vector<std::string>(const miopen::conv::ProblemDescription&)>
fill_valid_kernels,
std::string solver_name)
{
valid_kernels = fill_valid_kernels(problem);

// Filter kernels by type
std::vector<int> heuristic_indexes;
std::vector<std::vector<std::string>> heuristic_kernels;
FilterHeuristicKernels(
"DeviceGroupedConvBwdWeight", valid_kernels, heuristic_indexes, heuristic_kernels);

// Prepare features and split_k values
const std::string& arch = ctx.GetStream().GetDeviceName();
std::vector<float> features =
GetFeatures3D(problem, ctx.GetStream().GetMaxComputeUnits(), arch);
std::vector<int> split_ks = GenerateSplitK(128); // TODO: make configurable

// Expand kernel params with split_k and keep mapping
auto [expanded_params, mapping_pairs] =
ExpandKernelParamsWithSplitK(heuristic_kernels, heuristic_indexes, split_ks);

// Use AI model to select best candidate
try
{
int best_idx = ai::tuning::candidate_selection::ModelSelectBestCandidate(
arch, solver_name, features, expanded_params);

if(best_idx >= 0 && best_idx < static_cast<int>(mapping_pairs.size()))
{
index = mapping_pairs[best_idx].first;
split_k = mapping_pairs[best_idx].second;
kernel_id = valid_kernels[index] + "+" + std::to_string(split_k);
return true;
}
MIOPEN_LOG_I("AI prediction returned invalid kernel index, falling back");
return false;
}
catch(const miopen::Exception& ex)
{
MIOPEN_LOG_I2("[Warning] AI model failed: " << ex.what());
return false;
}
}
} // namespace conv
} // namespace solver
} // namespace miopen

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading