Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
6115f17
add cutoff time for GenericSearch relative to worst case timing from …
cderb Sep 4, 2025
02a92b9
remove const from execution context to allow message passing between …
cderb Sep 5, 2025
ca3384b
removing const modifiers
cderb Sep 5, 2025
a303d23
use context copy for ExecutePrimitive
cderb Sep 8, 2025
4ab9fd1
Merge remote-tracking branch 'origin/develop' into users/cderb/tuning…
cderb Sep 8, 2025
f8e3c30
whitespace
cderb Sep 9, 2025
bbba8f6
adjust function signatures, corrections for cutoff updates
cderb Sep 9, 2025
233cdcc
clang format
cderb Sep 9, 2025
770f56f
Merge remote-tracking branch 'origin/develop' into users/cderb/tuning…
cderb Sep 9, 2025
1352364
split cutoff time to best_time and worst_time
cderb Sep 10, 2025
2d449a4
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Sep 11, 2025
a953883
Merge remote-tracking branch 'origin/develop' into users/cderb/tuning…
cderb Sep 11, 2025
2362fd6
restore const to ExecutionContext, add mutable to search time fields
cderb Sep 11, 2025
a6ea744
add envs to modify GenericSearch skips
cderb Sep 11, 2025
5c03cc8
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Sep 16, 2025
7f78659
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Oct 6, 2025
a72cef3
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Oct 14, 2025
2f548b2
format
cderb Oct 14, 2025
9ef9664
format
cderb Oct 14, 2025
0507b84
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Oct 14, 2025
179b0d6
adjust tuning policy smoke test
cderb Oct 15, 2025
dc3b189
format
cderb Oct 17, 2025
7e23c0a
Merge remote-tracking branch 'origin/develop' into users/cderb/tuning…
cderb Oct 27, 2025
e981caf
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Nov 3, 2025
950c6aa
Merge remote-tracking branch 'origin/develop' into users/cderb/tuning…
cderb Nov 3, 2025
cc29aa9
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Nov 4, 2025
5121522
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Nov 6, 2025
8795a3a
switch to using env MIOPEN_SEARCH_CUTOFF
cderb Nov 7, 2025
b5ae9ab
Merge remote-tracking branch 'origin/develop' into users/cderb/tuning…
cderb Nov 7, 2025
f6fceac
stop populating perf_sols when unused
cderb Nov 8, 2025
8477e12
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Nov 8, 2025
3891835
Merge branch 'develop' into users/cderb/tuning_cutoff
cderb Nov 10, 2025
829793c
add skip mechanism to find loop
cderb Nov 19, 2025
4cae3cb
order solver finders by performance
cderb Nov 20, 2025
c953669
Merge remote-tracking branch 'origin/develop' into users/cderb/find_c…
cderb Nov 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 40 additions & 19 deletions projects/miopen/src/conv/solver_finders.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY)

MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_FIND_CONV_INSUFFICIENT_WORKSPACE_ALLOW_FINDDB_UPDATE)

MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_SEARCH_CUTOFF, false)
MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_SEARCH_SKIP_PCT, 130)

namespace miopen {

namespace conv {
Expand Down Expand Up @@ -202,11 +205,11 @@ const std::vector<std::unique_ptr<ISolversFinder>>& GetConvSolverFinders()
{
static const auto finders = []() {
auto tmp = std::vector<std::unique_ptr<ISolversFinder>>{};
tmp.emplace_back(std::make_unique<WinogradSolverFinder>());
tmp.emplace_back(std::make_unique<DirectSolverFinder>());
tmp.emplace_back(std::make_unique<ImplicitGemmSolverFinder>());
tmp.emplace_back(std::make_unique<GemmSolverFinder>());
tmp.emplace_back(std::make_unique<WinogradSolverFinder>());
tmp.emplace_back(std::make_unique<FftSolverFinder>());
tmp.emplace_back(std::make_unique<DirectSolverFinder>());
return tmp;
}();

Expand All @@ -221,17 +224,18 @@ std::vector<Solution> EvaluateInvokers(const Handle& handle,
const AlgorithmName& algorithm_name,
const NetworkConfig& network_config,
const AnyInvokeParams& invoke_ctx,
bool& is_result_optimal,
FindCoreResult& core_result,
bool force_attach_binary)
{
const auto arch = env::value(MIOPEN_DEVICE_ARCH);
if(!arch.empty())
return {};

auto selected = miopen::solver::ConvSolution{miopenStatusUnknownError};
auto best = std::numeric_limits<float>::max();
auto best_invoker = Invoker{};
auto ret = std::vector<Solution>{};
bool using_search_cutoff = env::value(MIOPEN_SEARCH_CUTOFF);
auto selected = miopen::solver::ConvSolution{miopenStatusUnknownError};
auto best = std::numeric_limits<float>::max();
auto best_invoker = Invoker{};
auto ret = std::vector<Solution>{};
std::vector<float> samples;

for(const auto& sol : solutions)
Expand All @@ -250,13 +254,27 @@ std::vector<Solution> EvaluateInvokers(const Handle& handle,
// That is why we do not write sub-optimal results into persistent find-db (on disk)
// unless this is explicitly enabled via environment setting.
if(!env::enabled(MIOPEN_FIND_CONV_INSUFFICIENT_WORKSPACE_ALLOW_FINDDB_UPDATE))
is_result_optimal = false;
core_result.is_optimal = false;
continue;
}

if(!sol.invoker_factory)
MIOPEN_THROW("Invoker is not provided by solver " + sol.solver_id);

float skip_time = core_result.find_search_best_time;
if(skip_time < std::numeric_limits<float>::max())
{
skip_time *= env::value(MIOPEN_SEARCH_SKIP_PCT) / 100.0f;
// skip Naive if another solver has been timed.
if(using_search_cutoff && sol.solver_id.find("Naive") != std::string::npos)
{
MIOPEN_LOG_I("Skipping Naive Solver: " << algorithm_name.ToString() << ":"
<< sol.solver_id);
continue;
}
}
MIOPEN_LOG_I("Evaluating Solver: " << algorithm_name.ToString() << ":" << sol.solver_id);

std::vector<Program> programs;
const auto invoker = handle.PrepareInvoker(*sol.invoker_factory,
sol.construction_params,
Expand All @@ -272,6 +290,7 @@ std::vector<Solution> EvaluateInvokers(const Handle& handle,
auto first_elapsed = static_cast<elapsed_t>(0);
int i = 0;
samples.clear();

while(i < N_RUNS_MAX && elapsed < TIME_MS_MAX)
{
invoker(handle, invoke_ctx);
Expand All @@ -280,6 +299,12 @@ std::vector<Solution> EvaluateInvokers(const Handle& handle,
if(i > 0)
{
samples.push_back(handle.GetKernelTime());
if(i == 1 && using_search_cutoff && samples.front() > skip_time)
{
MIOPEN_LOG_I("Skipping (Slow) Solver: " << algorithm_name.ToString() << ":"
<< sol.solver_id);
break;
}
}
else
{
Expand All @@ -306,9 +331,10 @@ std::vector<Solution> EvaluateInvokers(const Handle& handle,
<< (elapsed < best ? " < " : " >= ") << best);
if(elapsed < best)
{
best = elapsed;
selected = sol;
best_invoker = invoker;
best = elapsed;
selected = sol;
best_invoker = invoker;
core_result.find_search_best_time = best;
}

auto solution = Solution{solver::Id{sol.solver_id}, elapsed, sol.workspace_sz};
Expand Down Expand Up @@ -345,7 +371,7 @@ FindCoreResult FindCore(const AnyInvokeParams& invoke_ctx,
auto& handle = ctx.GetStream();

// Find
auto solutions = std::map<AlgorithmName, std::vector<solver::ConvSolution>>{};
auto solutions = std::vector<std::pair<AlgorithmName, std::vector<solver::ConvSolution>>>{};
std::transform(
finders.begin(), finders.end(), std::inserter(solutions, solutions.end()), [&](auto&& f) {
return std::make_pair(f->GetAlgorithmName(problem),
Expand Down Expand Up @@ -393,13 +419,8 @@ FindCoreResult FindCore(const AnyInvokeParams& invoke_ctx,

for(const auto& ss : solutions)
{
auto evaluated = EvaluateInvokers(handle,
ss.second,
ss.first,
network_config,
invoke_ctx,
ret.is_optimal,
force_attach_binary);
auto evaluated = EvaluateInvokers(
handle, ss.second, ss.first, network_config, invoke_ctx, ret, force_attach_binary);

ret.solutions.insert(ret.solutions.end(),
std::make_move_iterator(evaluated.begin()),
Expand Down
15 changes: 8 additions & 7 deletions projects/miopen/src/include/miopen/any_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ struct AnySolver
std::string GenericSearch(const ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem,
const miopen::AnyInvokeParams& invoke_ctx,
std::vector<miopen::solver::SolutionPerf>* perf_sols = nullptr) const
std::vector<miopen::solver::SolutionPerf>* perf_solsp = nullptr) const
{
assert(ptr_value != nullptr);
return ptr_value->GenericSearch(ctx, problem, invoke_ctx, perf_sols);
return ptr_value->GenericSearch(ctx, problem, invoke_ctx, perf_solsp);
}

InvokerFactory GetInvokeFactory(const ExecutionContext& ctx,
Expand Down Expand Up @@ -213,7 +213,7 @@ struct AnySolver
GenericSearch(const ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem,
const miopen::AnyInvokeParams& invoke_ctx,
std::vector<miopen::solver::SolutionPerf>* perf_sols) const = 0;
std::vector<miopen::solver::SolutionPerf>* perf_solsp) const = 0;
virtual InvokerFactory GetInvokeFactory(const ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem,
const std::string& perf_cfg) const = 0;
Expand Down Expand Up @@ -457,11 +457,12 @@ struct AnySolver
std::string GenericSearch(const ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem,
const miopen::AnyInvokeParams& invoke_ctx,
std::vector<miopen::solver::SolutionPerf>* perf_sols,
std::vector<miopen::solver::SolutionPerf>* perf_solsp,
std::true_type,
std::false_type) const
{
auto config = miopen::solver::GenericSearch(value, ctx, problem, invoke_ctx, perf_sols);
auto config =
miopen::solver::GenericSearch(value, ctx, problem, invoke_ctx, perf_solsp);
return config.ToString();
}

Expand Down Expand Up @@ -490,12 +491,12 @@ struct AnySolver
GenericSearch(const ExecutionContext& ctx,
const miopen::conv::ProblemDescription& problem,
const miopen::AnyInvokeParams& invoke_ctx,
std::vector<miopen::solver::SolutionPerf>* perf_sols) const override
std::vector<miopen::solver::SolutionPerf>* perf_solsp) const override
{
return GenericSearch(ctx,
problem,
invoke_ctx,
perf_sols,
perf_solsp,
std::integral_constant<bool, TunableSolver::Is>(),
std::integral_constant<bool, LegacySolver::Is>());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ const std::vector<std::unique_ptr<ISolversFinder>>& GetConvSolverFinders();
struct FindCoreResult
{
std::vector<Solution> solutions;
float find_search_best_time = std::numeric_limits<float>::max();
bool is_optimal;
};

Expand All @@ -168,7 +169,7 @@ std::vector<Solution> EvaluateInvokers(const Handle& handle,
const AlgorithmName& algorithm_name,
const NetworkConfig& network_config,
const AnyInvokeParams& invoke_ctx,
bool& is_result_optimal,
FindCoreResult& core_result,
bool force_attach_binary);

FindCoreResult FindCore(const AnyInvokeParams& invoke_ctx,
Expand Down
8 changes: 5 additions & 3 deletions projects/miopen/src/include/miopen/execution_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,11 @@ struct MIOPEN_INTERNALS_EXPORT ExecutionContext
// to optimize the getWorkspaceSize() calls for speed. This specific optimization is correct
// because Solvers shall be written so that the required workspace size does not depend on the
// performance config.
bool disable_perfdb_access = false;
bool use_dynamic_solutions_only = false;
bool is_for_generic_search = false;
bool disable_perfdb_access = false;
bool use_dynamic_solutions_only = false;
bool is_for_generic_search = false;
mutable float generic_search_worst_time = std::numeric_limits<float>::max();
mutable float generic_search_best_time = std::numeric_limits<float>::max();

inline const Handle& GetStream() const { return *stream; }
inline void SetStream(const Handle* stream_) { stream = stream_; }
Expand Down
57 changes: 43 additions & 14 deletions projects/miopen/src/include/miopen/generic_search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ auto GenericSearch(const Solver s,
const Context& context_,
const Problem& problem,
const AnyInvokeParams& invoke_ctx_,
std::vector<SolutionPerf>* perf_sols = nullptr)
std::vector<SolutionPerf>* perf_solsp = nullptr)
-> decltype(s.GetDefaultPerformanceConfig(context_, problem))
{
auto context = context_;
Expand All @@ -443,10 +443,7 @@ auto GenericSearch(const Solver s,
}();

// list of sampled solutions
if(perf_sols)
{
perf_sols->erase(perf_sols->begin(), perf_sols->end());
}
std::vector<SolutionPerf> perf_sols;

auto& profile_h = context.GetStream();
const AutoEnableProfiling enableProfiling{profile_h};
Expand Down Expand Up @@ -485,6 +482,19 @@ auto GenericSearch(const Solver s,
float worst_time = std::numeric_limits<float>::max();
size_t n_failed = 0;
size_t n_best = 0;
// enable early search termination
bool using_search_cutoff = env::value(MIOPEN_SEARCH_CUTOFF);
// terminate search when perf is less than cutoff
float cutoff_time = context.generic_search_worst_time;
if(cutoff_time < std::numeric_limits<float>::max())
cutoff_time *= env::value(MIOPEN_SEARCH_CUTOFF_MUL);
// skip detailed measurement for configs slower than skip_time
float skip_time = context.generic_search_best_time;
if(skip_time < std::numeric_limits<float>::max())
skip_time *= env::value(MIOPEN_SEARCH_SKIP_PCT) / 100.0f;

bool rec_results = perf_solsp || using_search_cutoff;

HeartBeat<PerformanceConfig> heartbeat;
heartbeat.Start();

Expand Down Expand Up @@ -583,13 +593,23 @@ auto GenericSearch(const Solver s,

if(ret == 0)
{
// If config is worse than the cutoff time abort the search
if(elapsed_time > cutoff_time)
{
MIOPEN_LOG_I2("Ending Search, measured time: "
<< elapsed_time << " was greater than cutoff: " << cutoff_time);
for(const auto& kernelInfo : current_solution.construction_params)
profile_h.ClearProgram(kernelInfo.kernel_file, kernelInfo.comp_options);
break;
}

// Smooth the jitter of measurements:
// If the 1st probe is NOT too bad (measured time <= 1.10 * worst sample of the best
// config), then gather 9 more samples, and remove positive z-score outliers. Use
// the mean value with outliers removed for calculating best config.
constexpr int N_RUNS = 10;
last_imprv++;
if(elapsed_time / worst_time < 1.10f)
if(elapsed_time < worst_time * 1.10f && elapsed_time < skip_time)
{
MIOPEN_LOG_I2("Finding average for: " << elapsed_time << " / " << best_time
<< " = " << (elapsed_time / best_time));
Expand Down Expand Up @@ -636,10 +656,8 @@ auto GenericSearch(const Solver s,
}
}
}
if(perf_sols)
{
perf_sols->push_back({current_config.ToString(), elapsed_time});
}
if(rec_results)
perf_sols.push_back({current_config.ToString(), elapsed_time});
}

// Banchmarked kernels will not be used anymore.
Expand Down Expand Up @@ -679,10 +697,21 @@ auto GenericSearch(const Solver s,
if(!is_passed)
MIOPEN_THROW("Search failed");

if(perf_sols)
std::sort(perf_sols->begin(), perf_sols->end(), [](SolutionPerf a, SolutionPerf b) {
return a.time < b.time;
});
std::sort(perf_sols.begin(), perf_sols.end(), [](SolutionPerf a, SolutionPerf b) {
return a.time < b.time;
});

// if using cutoff for search update timing
if(using_search_cutoff == true && best_time < context.generic_search_best_time)
{
float new_worst = (perf_sols.end() - 1)->time;
context_.generic_search_best_time = best_time;
context_.generic_search_worst_time = new_worst;
MIOPEN_LOG_I2("Times updated, best: " << best_time << " worst: " << new_worst);
}

if(perf_solsp)
*perf_solsp = std::move(perf_sols);

// Run once with the default config and show score.
const auto& invoker = profile_h.PrepareInvoker(*default_solution.invoker_factory,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,9 @@ MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_DEBUG_COMPILE_ONLY)

const size_t MIOPEN_DEFAULT_VERIFY_TOLERANCE_PCT = 15;
MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_VERIFY_TOLERANCE_PCT, MIOPEN_DEFAULT_VERIFY_TOLERANCE_PCT)

MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_SEARCH_CUTOFF, false)

MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_SEARCH_CUTOFF_MUL, 10)

MIOPEN_DECLARE_ENV_VAR_UINT64(MIOPEN_SEARCH_SKIP_PCT, 130)
23 changes: 15 additions & 8 deletions projects/miopen/src/ocl/convolutionocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,8 @@ std::vector<Solution> EvaluateConvSolutions(const ExecutionContext& ctx,
// test timing of solver reported by system db
const auto& handle = ctx.GetStream();
AutoEnableProfiling enableProfiling{handle};
bool is_optimal = true;
FindCoreResult core_result;
core_result.is_optimal = true;

// reverse solutions so that EvaluateInvokers registers the fastest solution last
auto sol_itr = solutions.rbegin();
Expand All @@ -308,7 +309,7 @@ std::vector<Solution> EvaluateConvSolutions(const ExecutionContext& ctx,
AlgorithmName algo{
ConvolutionAlgoToDirectionalString(id.GetAlgo(), problem.GetDirection())};
std::vector<Solution> eval_sol = EvaluateInvokers(
handle, conv_sols, algo, problem.MakeNetworkConfig(), invoke_ctx, is_optimal, false);
handle, conv_sols, algo, problem.MakeNetworkConfig(), invoke_ctx, core_result, false);

if(!eval_sol.empty())
eval_sols.emplace_back(eval_sol.front());
Expand Down Expand Up @@ -377,16 +378,16 @@ std::vector<Solution> VerifiedFDBSolution(const ExecutionContext& ctx,
// system db result is good
// add to user fdb so this check is skipped next time
MIOPEN_LOG_I2("TrustVerify: Add system db entry to user db");
auto fallback = FallbackPath();
auto ret = FindCoreResult();
ret.is_optimal = true;
auto copy_sols = conv.GetSolutions(ctx, problem, 4, &fallback, &invoke_ctx);
auto fallback = FallbackPath();
auto core_result = FindCoreResult();
core_result.is_optimal = true;
auto copy_sols = conv.GetSolutions(ctx, problem, 4, &fallback, &invoke_ctx);
for(const auto& s : copy_sols)
{
auto solution = Solution{solver::Id{s.solution_id}, s.time, s.workspace_size};
ret.solutions.emplace_back(std::move(solution));
core_result.solutions.emplace_back(std::move(solution));
}
return ret;
return core_result;
}
else
{
Expand Down Expand Up @@ -414,6 +415,9 @@ std::vector<Solution> VerifiedFDBSolution(const ExecutionContext& ctx,
else
MIOPEN_LOG_I("Find Ended: " << record.GetKey());

ctx.generic_search_worst_time = ctx_copy.generic_search_worst_time;
ctx.generic_search_best_time = ctx_copy.generic_search_best_time;

return ret;
}
});
Expand Down Expand Up @@ -521,6 +525,9 @@ std::vector<Solution> FindConvolution(const ExecutionContext& ctx,
else
MIOPEN_LOG_I("Find Ended: " << record.GetKey());

ctx.generic_search_worst_time = ctx_copy.generic_search_worst_time;
ctx.generic_search_best_time = ctx_copy.generic_search_best_time;

return ret;
});
}
Expand Down