Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 34 additions & 9 deletions projects/miopen/src/include/miopen/generic_search.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -560,10 +560,23 @@ auto GenericSearch(const Solver s,
invoker(profile_h, invoke_ctx);
profile_h.ResetKernelTime();

// Run 2 initial tests and take the minimum to reduce noise
// (Based on 100-run stability analysis: 1st sample CV=11.9%, 2nd CV=3.1%)
float initial_time_1 = 0.0f;
float initial_time_2 = 0.0f;

invoker(profile_h, invoke_ctx);
initial_time_1 = profile_h.GetKernelTime();
profile_h.ResetKernelTime();

invoker(profile_h, invoke_ctx);
elapsed_time = profile_h.GetKernelTime();
samples.push_back(elapsed_time);
initial_time_2 = profile_h.GetKernelTime();
profile_h.ResetKernelTime();

// Use minimum of the two initial tests for early-stop threshold check
elapsed_time = std::min(initial_time_1, initial_time_2);
samples.push_back(initial_time_1);
samples.push_back(initial_time_2);
}
catch(const std::exception& e)
{
Expand All @@ -584,19 +597,24 @@ auto GenericSearch(const Solver s,
if(ret == 0)
{
// Smooth the jitter of measurements:
// If the 1st probe is NOT too bad (measured time <= 1.10 * worst sample of the best
// config), then gather 9 more samples, and remove positive z-score outliers. Use
// the mean value with outliers removed for calculating best config.
// Early-stop strategy: If the minimum of 2 initial tests is within 1.2x of the
// worst sample of the best config, continue with 8 more samples (total 10).
// The 1.2x threshold (vs original 1.1x) accounts for initial test variance.
// Remove positive z-score outliers and use the mean for calculating best config.
constexpr int N_RUNS = 10;
constexpr float EARLY_STOP_THRESHOLD = 1.20f;
last_imprv++;
if(elapsed_time / worst_time < 1.10f)
if(elapsed_time / worst_time < EARLY_STOP_THRESHOLD)
{
MIOPEN_LOG_I2("Finding average for: " << elapsed_time << " / " << best_time
<< " = " << (elapsed_time / best_time));
MIOPEN_LOG_I2("Initial test passed (" << elapsed_time << " / " << worst_time
<< " = " << (elapsed_time / worst_time)
<< " < " << EARLY_STOP_THRESHOLD
<< "), continuing with 8 more samples");

try
{
for(int i = 1; i < N_RUNS; ++i)
// Continue with 8 more samples (we already have 2 initial samples)
for(int i = 2; i < N_RUNS; ++i)
{
invoker(profile_h, invoke_ctx);
samples.push_back(profile_h.GetKernelTime());
Expand Down Expand Up @@ -637,6 +655,13 @@ auto GenericSearch(const Solver s,

}
}
else
{
MIOPEN_LOG_I2("Configuration discarded by early-stop: " << elapsed_time << " / "
<< worst_time << " = "
<< (elapsed_time / worst_time)
<< " >= " << EARLY_STOP_THRESHOLD);
}
if(perf_sols)
{
perf_sols->push_back({current_config.ToString(), elapsed_time});
Expand Down