diff --git a/projects/miopen/src/include/miopen/generic_search.hpp b/projects/miopen/src/include/miopen/generic_search.hpp index 9a6b78da573..ef4e6a9c171 100644 --- a/projects/miopen/src/include/miopen/generic_search.hpp +++ b/projects/miopen/src/include/miopen/generic_search.hpp @@ -560,10 +560,23 @@ auto GenericSearch(const Solver s, invoker(profile_h, invoke_ctx); profile_h.ResetKernelTime(); + // Run 2 initial tests and take the minimum to reduce noise + // (Based on 100-run stability analysis: 1st sample CV=11.9%, 2nd CV=3.1%) + float initial_time_1 = 0.0f; + float initial_time_2 = 0.0f; + + invoker(profile_h, invoke_ctx); + initial_time_1 = profile_h.GetKernelTime(); + profile_h.ResetKernelTime(); + invoker(profile_h, invoke_ctx); - elapsed_time = profile_h.GetKernelTime(); - samples.push_back(elapsed_time); + initial_time_2 = profile_h.GetKernelTime(); profile_h.ResetKernelTime(); + + // Use minimum of the two initial tests for early-stop threshold check + elapsed_time = std::min(initial_time_1, initial_time_2); + samples.push_back(initial_time_1); + samples.push_back(initial_time_2); } catch(const std::exception& e) { @@ -584,19 +597,24 @@ auto GenericSearch(const Solver s, if(ret == 0) { // Smooth the jitter of measurements: - // If the 1st probe is NOT too bad (measured time <= 1.10 * worst sample of the best - // config), then gather 9 more samples, and remove positive z-score outliers. Use - // the mean value with outliers removed for calculating best config. + // Early-stop strategy: If the minimum of 2 initial tests is within 1.2x of the + // worst sample of the best config, continue with 8 more samples (total 10). + // The 1.2x threshold (vs original 1.1x) accounts for initial test variance. + // Remove positive z-score outliers and use the mean for calculating best config. constexpr int N_RUNS = 10; + constexpr float EARLY_STOP_THRESHOLD = 1.20f; last_imprv++; - if(elapsed_time / worst_time < 1.10f) + if(elapsed_time / worst_time < EARLY_STOP_THRESHOLD) { - MIOPEN_LOG_I2("Finding average for: " << elapsed_time << " / " << best_time - << " = " << (elapsed_time / best_time)); + MIOPEN_LOG_I2("Initial test passed (" << elapsed_time << " / " << worst_time + << " = " << (elapsed_time / worst_time) + << " < " << EARLY_STOP_THRESHOLD + << "), continuing with 8 more samples"); try { - for(int i = 1; i < N_RUNS; ++i) + // Continue with 8 more samples (we already have 2 initial samples) + for(int i = 2; i < N_RUNS; ++i) { invoker(profile_h, invoke_ctx); samples.push_back(profile_h.GetKernelTime()); @@ -637,6 +655,13 @@ auto GenericSearch(const Solver s, } } + else + { + MIOPEN_LOG_I2("Configuration discarded by early-stop: " << elapsed_time << " / " + << worst_time << " = " + << (elapsed_time / worst_time) + << " >= " << EARLY_STOP_THRESHOLD); + } if(perf_sols) { perf_sols->push_back({current_config.ToString(), elapsed_time});