From 649de85c270a26f5922aca17886c433c2eab1ec1 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Thu, 26 Oct 2023 12:02:37 +0200 Subject: [PATCH 01/12] Implemented fused find Cleaned up transpose Removed wrong declaration Implemented fused find result sorting Implemented fused find 2.0 find Removed redundant visitor Split logging command to methods in solution --- src/fusion.cpp | 55 ++++--- src/include/miopen/fusion_plan.hpp | 2 + src/include/miopen/problem.hpp | 34 +++- src/include/miopen/solution.hpp | 13 +- src/problem.cpp | 253 ++++++++++++++++++++++------- src/solution.cpp | 66 +++++--- 6 files changed, 312 insertions(+), 111 deletions(-) diff --git a/src/fusion.cpp b/src/fusion.cpp index 2005e45eab..169a55d360 100644 --- a/src/fusion.cpp +++ b/src/fusion.cpp @@ -120,14 +120,12 @@ miopenStatus_t ConvBiasActivFusion(Handle& handle, } static auto -AllocateBuffersAndMakeFusionInvokeParams(const FusionContext& context, +AllocateBuffersAndMakeFusionInvokeParams(Handle& handle, const FusionDescription& problem, std::vector& invoke_bufs, miopen::OperatorArgs& params, const FusionPlanDescriptor& plan) { - auto& handle = context.GetStream(); - const auto allocate_buffer = [&](std::size_t size) { auto ptr = handle.Create(size); auto ret = ptr.get(); @@ -761,47 +759,47 @@ static const std::vector>& GetFusionSolverFinder return finders; } -miopenStatus_t FusionPlanDescriptor::Compile(Handle& handle) +static std::vector +FindFusion(const ExecutionContext& ctx, + const FusionDescription& fusion_problem, + const std::function& invoke_params) { - auto fusion_ctx = FusionContext{handle}; - auto fusion_problem = FusionDescription{this}; - const FindEnforce enforce; - - // sols is a collection of ConvSolutions that have been returned from Find for the - // fusion_problem. These ConvSolutions store instructions on how to build kernels and an invoker - // factory. - std::vector sols; - - auto find_results = UserFindDbRecord::TryLoad( - handle, + return UserFindDbRecord::TryLoad( + ctx.GetStream(), fusion_problem, [&](DbRecord& record) { // fusion_ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(fusion_ctx); - // We need buffers for find, thus we allocate them. - miopen::OperatorArgs params; - std::vector invoke_bufs; - const auto invoke_params = AllocateBuffersAndMakeFusionInvokeParams( - fusion_ctx, fusion_problem, invoke_bufs, params, *this); - - FindCore(invoke_params, + // We need buffers for find, thus we lazily get them, possibly allocating. + FindCore(invoke_params(), record, - fusion_ctx, + ctx, fusion_problem, FusionFindParameters{}, GetFusionSolverFinders()); }, "fusion"); +} - const auto network_config = fusion_problem.MakeNetworkConfig(); +miopenStatus_t FusionPlanDescriptor::Compile(Handle& handle) +{ + std::vector invoke_bufs; + miopen::OperatorArgs params; + + const auto find_results = Find(handle, [&]() { + return AllocateBuffersAndMakeFusionInvokeParams( + handle, FusionDescription{this}, invoke_bufs, params, *this); + }); + + const auto network_config = FusionDescription{this}.MakeNetworkConfig(); for(const auto& result : find_results) { if(conv_fwd_algo && result.algorithm != "fusion" && miopen::StringToConvolutionFwdAlgo(result.algorithm) != *conv_fwd_algo) continue; - const auto id = solver::Id{result.solver_id}; + const auto id = solver::Id{result.solver_id}; const auto invoker = handle.GetInvoker(network_config, id); if(!invoker) @@ -823,6 +821,13 @@ miopenStatus_t FusionPlanDescriptor::Compile(Handle& handle) return miopenStatusSuccess; } +std::vector +FusionPlanDescriptor::Find(Handle& handle, + const std::function& invoke_params) const +{ + return FindFusion(&handle, this, invoke_params); +} + miopenStatus_t FusionPlanDescriptor::Execute(const Handle& handle, const TensorDescriptor& inputDesc, ConstData_t input, diff --git a/src/include/miopen/fusion_plan.hpp b/src/include/miopen/fusion_plan.hpp index c5bf150e88..f72a198580 100644 --- a/src/include/miopen/fusion_plan.hpp +++ b/src/include/miopen/fusion_plan.hpp @@ -57,6 +57,8 @@ struct FusionPlanDescriptor : miopenFusionPlanDescriptor Data_t output, const OperatorArgs& op_args); miopenStatus_t Compile(Handle& handle); + std::vector + Find(Handle& handle, const std::function& invoke_params) const; friend std::ostream& operator<<(std::ostream& stream, const FusionPlanDescriptor& fpd); miopenStatus_t diff --git a/src/include/miopen/problem.hpp b/src/include/miopen/problem.hpp index 150fc1d4d4..b27fa7bbb5 100644 --- a/src/include/miopen/problem.hpp +++ b/src/include/miopen/problem.hpp @@ -138,15 +138,14 @@ struct Problem std::unordered_map tensor_descriptors; OperatorDescriptor operator_descriptor; - using AllocatedBuffers = std::unordered_map; + using Buffers = std::unordered_map; std::vector FindSolutionsImpl(Handle& handle, const FindOptions& options, std::size_t max_solutions, - const AllocatedBuffers& buffers, + const Buffers& buffers, const ConvolutionDescriptor& conv_desc) const; - void TransposeImpl(const ConvolutionDescriptor& conv_desc); void LogDriverCommand(const ConvolutionDescriptor& conv_desc) const; void LogDriverCommand(const ActivationDescriptor& descriptor) const; }; @@ -160,19 +159,38 @@ struct FusedProblem // Not implemented, but silently } - std::vector FindSolutions(Handle& /*handle*/, - const FindOptions& /*options*/, - std::size_t /*max_solutions*/) const + [[nodiscard]] std::vector + FindSolutions(Handle& handle, const FindOptions& options, std::size_t max_solutions) const; + + void PropagateDescriptors(); + + [[nodiscard]] miopenTensorArgumentId_t GetInputId() const { - MIOPEN_THROW(miopenStatusNotImplemented); + return problems.front().GetInputId(); } - void PropagateDescriptors(); + [[nodiscard]] miopenTensorArgumentId_t GetOutputId() const + { + return problems.back().GetOutputId(); + } + + friend void to_json(nlohmann::json& j, const FusedProblem& problem); + friend void from_json(const nlohmann::json& j, FusedProblem& problem); + +private: + static void AddProblemToPlan(struct FusionPlanDescriptor& plan, const Problem& problem); }; struct ProblemContainer : miopenProblem { boost::variant item; + + ProblemContainer() = default; + ProblemContainer(boost::variant item_) + : item(std::move(item_)) {} // NOLINT(*-explicit-constructor) + + friend void to_json(nlohmann::json& j, const ProblemContainer& problem); + friend void from_json(const nlohmann::json& j, ProblemContainer& problem); }; } // namespace miopen diff --git a/src/include/miopen/solution.hpp b/src/include/miopen/solution.hpp index e1acc9f5a7..1bd1018fb4 100644 --- a/src/include/miopen/solution.hpp +++ b/src/include/miopen/solution.hpp @@ -83,8 +83,8 @@ struct Solution : miopenSolution const solver::Id& GetSolver() const { return solver; } void SetSolver(solver::Id value) { solver = value; } void SetPerfConfig(const std::optional& cfg) { perf_cfg = cfg; } - const Problem& GetProblem() const { return problem; } - void SetProblem(Problem value) { problem = std::move(value); } + const ProblemContainer& GetProblem() const { return problem; } + void SetProblem(ProblemContainer value) { problem = std::move(value); } void Run(Handle& handle, const std::unordered_map& inputs, @@ -100,7 +100,7 @@ struct Solution : miopenSolution float time = 0; std::size_t workspace_required = 0; solver::Id solver; - Problem problem; + ProblemContainer problem; std::optional perf_cfg = std::nullopt; void RunImpl(Handle& handle, @@ -110,7 +110,12 @@ struct Solution : miopenSolution const ConvolutionDescriptor& conv_desc); static Problem Transpose(const Problem& problem, RunInput* x, const RunInput& w, RunInput* y); - void LogDriverCommand(const ConvolutionDescriptor& conv_desc) const; + + void LogDriverCommand(const ConvolutionDescriptor& desc) const; + void LogDriverCommand(const ActivationDescriptor& desc) const; + + void LogDriverCommand(const Problem& problem_) const; + void LogDriverCommand(const FusedProblem& problem_) const; }; } // namespace miopen diff --git a/src/problem.cpp b/src/problem.cpp index ac7c15b4be..4c56937ddb 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -27,14 +27,16 @@ #include #include +#include #include #include #include #include #include +#include #include -#include #include +#include #include #include #include @@ -111,58 +113,74 @@ void VisitType(int id, Args... args) detail::VisitType{}(id, args...); } -std::vector -Problem::FindSolutions(Handle& handle, const FindOptions& options, std::size_t max_solutions) const +static Data_t AllocateTensor(Handle& handle, + const FindOptions& options, + std::vector& owned, + std::unordered_map& all, + miopenTensorArgumentId_t id, + const TensorDescriptor& descriptor) { - auto owned_buffers = std::vector{}; - auto buffers = std::unordered_map{}; + const auto preallocated = options.preallocated_tensors.find(id); - for(const auto& pair : tensor_descriptors) + if(preallocated != options.preallocated_tensors.end()) { - const auto preallocated = options.preallocated_tensors.find(pair.first); - - if(preallocated != options.preallocated_tensors.end()) - { - buffers.emplace(pair.first, preallocated->second); - continue; - } - - const auto& descriptor = pair.second; - const auto element_size = get_data_size(descriptor.GetType()); - auto buffer = handle.Create(descriptor.GetElementSpace() * element_size); + all.emplace(id, preallocated->second); + return preallocated->second; + } - visit_float(descriptor.GetType(), [&](auto as_float) { - const auto zero = as_float(0.f); - SetTensor(handle, descriptor, buffer.get(), &zero); - }); + const auto element_size = get_data_size(descriptor.GetType()); + auto buffer = handle.Create(descriptor.GetElementSpace() * element_size); - buffers.emplace(pair.first, buffer.get()); - owned_buffers.emplace_back(std::move(buffer)); - } + visit_float(descriptor.GetType(), [&](auto as_float) { + const auto zero = as_float(0.f); + SetTensor(handle, descriptor, buffer.get(), &zero); + }); - const auto find = boost::hof::match( - [&](const ConvolutionDescriptor& op_desc) { - return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc); - }, - [&](const ActivationDescriptor& /*op_desc*/) -> std::vector { - MIOPEN_THROW(miopenStatusNotImplemented); - }); + const auto allocated = buffer.get(); + all.emplace(id, allocated); + owned.emplace_back(std::move(buffer)); + return allocated; +} - auto ret = boost::apply_visitor(find, operator_descriptor); +static void SortFindResults(const FindOptions& options, std::vector& results) +{ + std::sort(results.begin(), + results.end(), + [&]() -> std::function { + switch(options.results_order) + { + case miopenFindResultsOrderByTime: + return [](auto&& l, auto&& r) { return l.GetTime() < r.GetTime(); }; + case miopenFindResultsOrderByWorkspaceSize: + return [](auto&& l, auto&& r) { + return l.GetWorkspaceSize() < r.GetWorkspaceSize(); + }; + } + MIOPEN_THROW(miopenStatusNotImplemented); + }()); +} - const auto sorter = [&]() -> std::function { - switch(options.results_order) - { - case miopenFindResultsOrderByTime: - return [](auto&& l, auto&& r) { return l.GetTime() < r.GetTime(); }; - case miopenFindResultsOrderByWorkspaceSize: - return [](auto&& l, auto&& r) { return l.GetWorkspaceSize() < r.GetWorkspaceSize(); }; - } - MIOPEN_THROW(miopenStatusNotImplemented); - }(); +std::vector +Problem::FindSolutions(Handle& handle, const FindOptions& options, std::size_t max_solutions) const +{ + auto owned_buffers = std::vector{}; + auto buffers = std::unordered_map{}; - std::sort(ret.begin(), ret.end(), sorter); + for(const auto& pair : tensor_descriptors) + AllocateTensor(handle, options, owned_buffers, buffers, pair.first, pair.second); + + auto ret = boost::apply_visitor( + boost::hof::match( + [&](const ConvolutionDescriptor& op_desc) { + return FindSolutionsImpl(handle, options, max_solutions, buffers, op_desc); + }, + [&](const ActivationDescriptor& /*op_desc*/) -> std::vector { + MIOPEN_THROW(miopenStatusNotImplemented); + }), + operator_descriptor); + owned_buffers.resize(0); + SortFindResults(options, ret); return ret; } @@ -209,21 +227,12 @@ Problem Problem::MakeTransposed() const for(const auto& descriptor : tensor_descriptors) transposed.tensor_descriptors.emplace(descriptor.first, descriptor.second); - const auto transpose_tensors = boost::hof::match( - [&](const ConvolutionDescriptor& op_desc) { return transposed.TransposeImpl(op_desc); }, - [&](const ActivationDescriptor& /*op_desc*/) { MIOPEN_THROW(miopenStatusNotImplemented); }); - - boost::apply_visitor(transpose_tensors, operator_descriptor); + std::swap(transposed.tensor_descriptors.at(GetInputId()), + transposed.tensor_descriptors.at(GetOutputId())); return transposed; } -void Problem::TransposeImpl(const ConvolutionDescriptor& /*conv_desc*/) -{ - std::swap(tensor_descriptors.at(miopenTensorConvolutionX), - tensor_descriptors.at(miopenTensorConvolutionY)); -} - conv::ProblemDescription Problem::AsConvolution() const { const auto& conv_desc = boost::get(operator_descriptor); @@ -265,7 +274,7 @@ activ::ProblemDescription Problem::AsActivation() const std::vector Problem::FindSolutionsImpl(Handle& handle, const FindOptions& options, std::size_t max_solutions, - const AllocatedBuffers& buffers, + const Buffers& buffers, const ConvolutionDescriptor& conv_desc) const { auto ret = std::vector{}; @@ -496,6 +505,18 @@ void from_json(const nlohmann::json& json, Problem& problem) primitive, &operator_json, &problem.operator_descriptor); } +void to_json(nlohmann::json& json, const FusedProblem& problem) +{ + json = nlohmann::json{ + {"problems", problem.problems}, + }; +} + +void from_json(const nlohmann::json& json, FusedProblem& problem) +{ + json.at("problems").get_to(problem.problems); +} + void Problem::CalculateOutput() { if(!HasInput()) @@ -550,4 +571,126 @@ void FusedProblem::PropagateDescriptors() } } +std::vector FusedProblem::FindSolutions(Handle& handle, + const FindOptions& options, + std::size_t max_solutions) const +{ + auto owned = std::vector{}; + + FusionPlanDescriptor plan; + + for(const auto& problem : problems) + AddProblemToPlan(plan, problem); + + miopen::OperatorArgs params; + + const auto make_invoke_params = [&]() { + auto buffers = std::unordered_map{}; + auto& in_desc = problems.front().GetInput(); + auto& out_desc = problems.back().GetOutput(); + Data_t in, out; + + { + auto allocate = [&](auto id, auto&& desc) { + return AllocateTensor(handle, options, owned, buffers, id, desc); + }; + + in = allocate(GetInputId(), in_desc); + out = allocate(GetOutputId(), out_desc); + + for(const auto& problem : problems) + for(const auto& pair : problem.tensor_descriptors) + if(pair.first != problem.GetInputId() && pair.first != problem.GetOutputId()) + allocate(pair.first, pair.second); + } + + bool gfx90aaltimpl = false; + + for(const auto& problem : problems) + { + boost::apply_visitor( + boost::hof::match( + [&](const ConvolutionDescriptor& conv_desc) { + gfx90aaltimpl = conv_desc.attribute.gfx90aFp16alt.GetFwd(); + + const auto wei_ptr = buffers.at(miopenTensorConvolutionW); + params.params.emplace_back( + std::make_unique(wei_ptr)); + }, + [&](const ActivationDescriptor& activ_desc) { + const auto alpha = activ_desc.GetAlpha(); + const auto beta = activ_desc.GetBeta(); + const auto gamma = activ_desc.GetGamma(); + + if(problem.GetDirection() == miopenProblemDirectionForward) + { + params.params.emplace_back( + std::make_unique( + alpha, beta, gamma)); + } + else + { + const auto x = buffers.at(miopenTensorActivationX); + const auto y = buffers.at(miopenTensorActivationY); + + params.params.emplace_back( + std::make_unique( + y, x, alpha, beta, gamma)); + } + }), + problem.operator_descriptor); + } + + return fusion::FusionInvokeParams(params, in_desc, in, out_desc, out, gfx90aaltimpl); + }; + + auto find1_solutions = plan.Find(handle, make_invoke_params); + owned.resize(0); + + auto ret = std::vector{}; + ret.reserve(find1_solutions.size()); + + // decltype(auto) db = GetDb(ExecutionContext{&handle}); + + for(auto i = 0; i < ret.size(); ++i) + { + auto solution = Solution{}; + solution.SetTime(find1_solutions[i].time); + solution.SetWorkspaceSize(find1_solutions[i].workspace); + solution.SetSolver(find1_solutions[i].solver_id); + solution.SetProblem({*this}); + // solution.SetPerfConfig(solution.GetSolver().GetSolver().GetPerfCfgParams(conv_ctx, + // legacy_problem, db)); + MIOPEN_LOG_I("Found solution: " << solution.GetSolver().ToString() << " , " + << solution.GetWorkspaceSize() << ", " + << solution.GetTime()); + + ret.emplace_back(std::move(solution)); + } + + SortFindResults(options, ret); + ret.resize(std::min(ret.size(), max_solutions)); + + return ret; +} + +void FusedProblem::AddProblemToPlan(FusionPlanDescriptor& plan, const Problem& problem) +{ + boost::apply_visitor( + boost::hof::match( + [&](const ConvolutionDescriptor& conv_desc) { + plan.AddOp(std::make_shared( + conv_desc, + problem.GetTensorDescriptorChecked(miopenTensorConvolutionW, + "miopenTensorConvolutionW"))); + }, + [&](const ActivationDescriptor& activ_desc) { + if(problem.GetDirection() == miopenProblemDirectionForward) + plan.AddOp(std::make_shared(activ_desc.GetMode())); + else + plan.AddOp(std::make_shared(activ_desc.GetMode())); + }), + problem.operator_descriptor); +} + } // namespace miopen diff --git a/src/solution.cpp b/src/solution.cpp index aa4907079f..01e141aee2 100644 --- a/src/solution.cpp +++ b/src/solution.cpp @@ -58,34 +58,60 @@ void Solution::Run(Handle& handle, std::to_string(workspace_required) + " workspace, while " + std::to_string(workspace_size) + " was provided"); - const auto run = boost::hof::match( - [&](const ConvolutionDescriptor& op_desc) { - RunImpl(handle, inputs, workspace, workspace_size, op_desc); - }, - [&](const ActivationDescriptor& /*op_desc*/) { MIOPEN_THROW(miopenStatusNotImplemented); }); - - boost::apply_visitor(run, problem.GetOperatorDescriptor()); + boost::apply_visitor( + boost::hof::match( + [&](const Problem& problem_) { + boost::apply_visitor( + boost::hof::match( + [&](const ConvolutionDescriptor& op_desc) { + RunImpl(handle, inputs, workspace, workspace_size, op_desc); + }, + [&](const ActivationDescriptor& /*op_desc*/) { + MIOPEN_THROW(miopenStatusNotImplemented); + }), + problem_.GetOperatorDescriptor()); + }, + [](const FusedProblem& problem_) { + std::ignore = problem_; + MIOPEN_THROW(miopenStatusNotImplemented); + }), + problem.item); } void Solution::LogDriverCommand() const { - const auto log_function = boost::hof::match( - [&](const ConvolutionDescriptor& op_desc) { return LogDriverCommand(op_desc); }, - [&](const ActivationDescriptor& /*op_desc*/) { MIOPEN_THROW(miopenStatusNotImplemented); }); - - boost::apply_visitor(log_function, problem.GetOperatorDescriptor()); + boost::apply_visitor([&](const auto& problem_) { LogDriverCommand(problem_); }, problem.item); } -void Solution::LogDriverCommand(const ConvolutionDescriptor& conv_desc) const +void Solution::LogDriverCommand(const ConvolutionDescriptor& desc) const { + auto problem_ = boost::get(problem.item); const auto& x_desc = - problem.GetTensorDescriptorChecked(miopenTensorConvolutionX, "miopenTensorConvolutionX"); + problem_.GetTensorDescriptorChecked(miopenTensorConvolutionX, "miopenTensorConvolutionX"); const auto& w_desc = - problem.GetTensorDescriptorChecked(miopenTensorConvolutionW, "miopenTensorConvolutionW"); + problem_.GetTensorDescriptorChecked(miopenTensorConvolutionW, "miopenTensorConvolutionW"); const auto& y_desc = - problem.GetTensorDescriptorChecked(miopenTensorConvolutionY, "miopenTensorConvolutionY"); + problem_.GetTensorDescriptorChecked(miopenTensorConvolutionY, "miopenTensorConvolutionY"); miopen::debug::LogCmdConvolution( - x_desc, w_desc, conv_desc, y_desc, problem.GetDirection(), solver.Value()); + x_desc, w_desc, desc, y_desc, problem_.GetDirection(), solver.Value()); +} + +void Solution::LogDriverCommand(const ActivationDescriptor& desc) const +{ + std::ignore = desc; + MIOPEN_THROW(miopenStatusNotImplemented); +} + +void Solution::LogDriverCommand(const Problem& problem_) const +{ + boost::apply_visitor([&](const auto& op_desc) { LogDriverCommand(op_desc); }, + problem_.GetOperatorDescriptor()); +} + +void Solution::LogDriverCommand(const FusedProblem& problem_) const +{ + std::ignore = problem_; + MIOPEN_THROW(miopenStatusNotImplemented); } void Solution::RunImpl(Handle& handle, @@ -94,6 +120,8 @@ void Solution::RunImpl(Handle& handle, std::size_t workspace_size, const ConvolutionDescriptor& conv_desc) { + const auto& problem_casted = boost::get(problem.item); + const auto get_input_checked = [&](auto name, const std::string& name_str) { const auto& found = inputs.find(name); if(found == inputs.end()) @@ -101,7 +129,7 @@ void Solution::RunImpl(Handle& handle, "Problem is missing " + name_str + " tensor descriptor."); auto ret = found->second; if(!ret.descriptor.has_value()) - ret.descriptor = GetProblem().GetTensorDescriptorChecked(name, name_str); + ret.descriptor = problem_casted.GetTensorDescriptorChecked(name, name_str); return ret; }; @@ -110,7 +138,7 @@ void Solution::RunImpl(Handle& handle, auto y = get_input_checked(miopenTensorConvolutionY, "miopenTensorConvolutionY"); const auto problem_ = - conv_desc.mode == miopenTranspose ? Transpose(GetProblem(), &x, w, &y) : GetProblem(); + conv_desc.mode == miopenTranspose ? Transpose(problem_casted, &x, w, &y) : problem_casted; if(problem_.GetDirection() == miopenProblemDirectionBackward && y.descriptor->GetLengths()[1] != w.descriptor->GetLengths()[0]) From b2275616d5edeecf90be4a8b6287edb60aa30ca0 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Tue, 31 Oct 2023 15:43:43 +0100 Subject: [PATCH 02/12] RunSolution for FusedProblem implementation --- src/fusion.cpp | 23 ++++ src/include/miopen/find_solution.hpp | 23 ++++ src/include/miopen/fusion.hpp | 6 + src/include/miopen/problem.hpp | 10 ++ src/include/miopen/solution.hpp | 6 + src/problem.cpp | 168 ++++++++++++++------------- src/solution.cpp | 49 +++++++- 7 files changed, 202 insertions(+), 83 deletions(-) diff --git a/src/fusion.cpp b/src/fusion.cpp index 169a55d360..ed94149ab3 100644 --- a/src/fusion.cpp +++ b/src/fusion.cpp @@ -705,6 +705,29 @@ static auto GetFusedWinogradSolvers() solver::fusion::ConvBinWinogradRxSf2x3g1Fused>{}; } +static auto GetAllFusionSolvers() +{ + return GetFusedNonConvSolvers() + GetFusedDirectSolvers() + GetFusedIGemmSolvers() + + GetFusedWinogradSolvers(); +} + +solver::ConvSolution MakeFusedSolution(const FusionContext& ctx, + solver::Id id, + const std::optional& perf_cfg_override, + const FusionDescription& problem, + const AnyInvokeParams& invoke_params) +{ + decltype(auto) db = GetDb(ctx); + solver::ConvSolution solution{miopenStatusInternalError}; + + GetAllFusionSolvers().FindById(id, [&](auto solver) { + solution = miopen::solver::FindSolution( + solver, ctx, problem, db, invoke_params, perf_cfg_override.value_or("")); + }); + + return solution; +} + struct FusionFindParameters : PrimitiveFindParameters { }; diff --git a/src/include/miopen/find_solution.hpp b/src/include/miopen/find_solution.hpp index fd65f55d4c..519f1de608 100644 --- a/src/include/miopen/find_solution.hpp +++ b/src/include/miopen/find_solution.hpp @@ -173,6 +173,29 @@ ConvSolution FindSolution(Solver s, template struct SolverContainer { + template + auto operator+(SolverContainer) const + { + return SolverContainer{}; + } + + ///\todo: remove when AnySolver would be able to work with non-conv solvers + template + void FindById(solver::Id id, Functor&& receiver) + { + bool found = false; + + miopen::each_args( + [&](auto solver) { + if(found || id != solver::Id{solver.SolverDbId()}) + return; + + found = true; + receiver(solver); + }, + Solvers{}...); + } + // Search for all applicable solutions among many solvers template std::vector diff --git a/src/include/miopen/fusion.hpp b/src/include/miopen/fusion.hpp index f00e5d803a..f5738efa8e 100644 --- a/src/include/miopen/fusion.hpp +++ b/src/include/miopen/fusion.hpp @@ -253,6 +253,12 @@ miopenStatus_t ConvBiasActivFusion(Handle& handle, const TensorDescriptor& yDesc, Data_t y); +solver::ConvSolution MakeFusedSolution(const struct FusionContext& ctx, + solver::Id id, + const std::optional& perf_cfg_override, + const struct FusionDescription& problem, + const AnyInvokeParams& invoke_params); + } // namespace miopen MIOPEN_DEFINE_OBJECT(miopenFusionOpDescriptor, miopen::FusionOpDescriptor); MIOPEN_DEFINE_OBJECT(miopenOperatorArgs, miopen::OperatorArgs); diff --git a/src/include/miopen/problem.hpp b/src/include/miopen/problem.hpp index b27fa7bbb5..e863bb4776 100644 --- a/src/include/miopen/problem.hpp +++ b/src/include/miopen/problem.hpp @@ -41,6 +41,9 @@ #include #include +#include "miopen/fusion/fusion_op_args.hpp" +#include "miopen/fusion/fusion_invoke_params.hpp" +#include "fusion_plan.hpp" namespace miopen { @@ -174,9 +177,16 @@ struct FusedProblem return problems.back().GetOutputId(); } + [[nodiscard]] FusionPlanDescriptor AsFusionPlan() const; + friend void to_json(nlohmann::json& j, const FusedProblem& problem); friend void from_json(const nlohmann::json& j, FusedProblem& problem); + [[nodiscard]] fusion::FusionInvokeParams + MakeInvokeParams(const std::function& + buffer_getter, + OperatorArgs& operator_args) const; + private: static void AddProblemToPlan(struct FusionPlanDescriptor& plan, const Problem& problem); }; diff --git a/src/include/miopen/solution.hpp b/src/include/miopen/solution.hpp index 1bd1018fb4..2001772f3a 100644 --- a/src/include/miopen/solution.hpp +++ b/src/include/miopen/solution.hpp @@ -109,6 +109,12 @@ struct Solution : miopenSolution std::size_t workspace_size, const ConvolutionDescriptor& conv_desc); + void RunImpl(Handle& handle, + const std::unordered_map& inputs, + Data_t workspace, + std::size_t workspace_size, + const FusedProblem& problem_); + static Problem Transpose(const Problem& problem, RunInput* x, const RunInput& w, RunInput* y); void LogDriverCommand(const ConvolutionDescriptor& desc) const; diff --git a/src/problem.cpp b/src/problem.cpp index 4c56937ddb..37a8d33cb3 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -116,17 +116,13 @@ void VisitType(int id, Args... args) static Data_t AllocateTensor(Handle& handle, const FindOptions& options, std::vector& owned, - std::unordered_map& all, miopenTensorArgumentId_t id, const TensorDescriptor& descriptor) { const auto preallocated = options.preallocated_tensors.find(id); if(preallocated != options.preallocated_tensors.end()) - { - all.emplace(id, preallocated->second); return preallocated->second; - } const auto element_size = get_data_size(descriptor.GetType()); auto buffer = handle.Create(descriptor.GetElementSpace() * element_size); @@ -137,7 +133,6 @@ static Data_t AllocateTensor(Handle& handle, }); const auto allocated = buffer.get(); - all.emplace(id, allocated); owned.emplace_back(std::move(buffer)); return allocated; } @@ -166,8 +161,14 @@ Problem::FindSolutions(Handle& handle, const FindOptions& options, std::size_t m auto owned_buffers = std::vector{}; auto buffers = std::unordered_map{}; + const auto allocate = [&](auto id, auto&& descriptor) { + auto buffer = AllocateTensor(handle, options, owned_buffers, id, descriptor); + buffers.emplace(id, buffer); + return buffer; + }; + for(const auto& pair : tensor_descriptors) - AllocateTensor(handle, options, owned_buffers, buffers, pair.first, pair.second); + allocate(pair.first, pair.second); auto ret = boost::apply_visitor( boost::hof::match( @@ -575,89 +576,31 @@ std::vector FusedProblem::FindSolutions(Handle& handle, const FindOptions& options, std::size_t max_solutions) const { - auto owned = std::vector{}; - - FusionPlanDescriptor plan; - - for(const auto& problem : problems) - AddProblemToPlan(plan, problem); - - miopen::OperatorArgs params; + const auto find1_solutions = [&]() { + OperatorArgs params; + auto owned_buffers = std::vector{}; - const auto make_invoke_params = [&]() { - auto buffers = std::unordered_map{}; - auto& in_desc = problems.front().GetInput(); - auto& out_desc = problems.back().GetOutput(); - Data_t in, out; - - { - auto allocate = [&](auto id, auto&& desc) { - return AllocateTensor(handle, options, owned, buffers, id, desc); + const auto make_invoke_params = [&]() { + auto buffer_allocator = [&](auto id, auto&& desc) { + return AllocateTensor(handle, options, owned_buffers, id, desc); }; - in = allocate(GetInputId(), in_desc); - out = allocate(GetOutputId(), out_desc); - - for(const auto& problem : problems) - for(const auto& pair : problem.tensor_descriptors) - if(pair.first != problem.GetInputId() && pair.first != problem.GetOutputId()) - allocate(pair.first, pair.second); - } - - bool gfx90aaltimpl = false; - - for(const auto& problem : problems) - { - boost::apply_visitor( - boost::hof::match( - [&](const ConvolutionDescriptor& conv_desc) { - gfx90aaltimpl = conv_desc.attribute.gfx90aFp16alt.GetFwd(); - - const auto wei_ptr = buffers.at(miopenTensorConvolutionW); - params.params.emplace_back( - std::make_unique(wei_ptr)); - }, - [&](const ActivationDescriptor& activ_desc) { - const auto alpha = activ_desc.GetAlpha(); - const auto beta = activ_desc.GetBeta(); - const auto gamma = activ_desc.GetGamma(); - - if(problem.GetDirection() == miopenProblemDirectionForward) - { - params.params.emplace_back( - std::make_unique( - alpha, beta, gamma)); - } - else - { - const auto x = buffers.at(miopenTensorActivationX); - const auto y = buffers.at(miopenTensorActivationY); - - params.params.emplace_back( - std::make_unique( - y, x, alpha, beta, gamma)); - } - }), - problem.operator_descriptor); - } - - return fusion::FusionInvokeParams(params, in_desc, in, out_desc, out, gfx90aaltimpl); - }; + return MakeInvokeParams(buffer_allocator, params); + }; - auto find1_solutions = plan.Find(handle, make_invoke_params); - owned.resize(0); + return AsFusionPlan().Find(handle, make_invoke_params); + }(); auto ret = std::vector{}; ret.reserve(find1_solutions.size()); - // decltype(auto) db = GetDb(ExecutionContext{&handle}); - for(auto i = 0; i < ret.size(); ++i) + for(const auto& find1_solution : find1_solutions) { auto solution = Solution{}; - solution.SetTime(find1_solutions[i].time); - solution.SetWorkspaceSize(find1_solutions[i].workspace); - solution.SetSolver(find1_solutions[i].solver_id); + solution.SetTime(find1_solution.time); + solution.SetWorkspaceSize(find1_solution.workspace); + solution.SetSolver(find1_solution.solver_id); solution.SetProblem({*this}); // solution.SetPerfConfig(solution.GetSolver().GetSolver().GetPerfCfgParams(conv_ctx, // legacy_problem, db)); @@ -670,7 +613,6 @@ std::vector FusedProblem::FindSolutions(Handle& handle, SortFindResults(options, ret); ret.resize(std::min(ret.size(), max_solutions)); - return ret; } @@ -693,4 +635,72 @@ void FusedProblem::AddProblemToPlan(FusionPlanDescriptor& plan, const Problem& p problem.operator_descriptor); } +fusion::FusionInvokeParams FusedProblem::MakeInvokeParams( + const std::function& buffer_getter, + OperatorArgs& operator_args) const +{ + auto buffers = std::unordered_map{}; + auto& in_desc = problems.front().GetInput(); + auto& out_desc = problems.back().GetOutput(); + + const auto get_buffer = [&](auto id, auto&& descriptor) { + auto buffer = buffer_getter(id, descriptor); + buffers.emplace(id, buffer); + return buffer; + }; + + bool gfx90aaltimpl = false; + auto in = get_buffer(GetInputId(), in_desc); + auto out = get_buffer(GetOutputId(), out_desc); + + for(const auto& problem : problems) + { + for(const auto& pair : problem.tensor_descriptors) + if(pair.first != problem.GetInputId() && pair.first != problem.GetOutputId()) + get_buffer(pair.first, pair.second); + + boost::apply_visitor( + boost::hof::match( + [&](const ConvolutionDescriptor& conv_desc) { + gfx90aaltimpl = conv_desc.attribute.gfx90aFp16alt.GetFwd(); + + const auto wei_ptr = buffers.at(miopenTensorConvolutionW); + operator_args.params.emplace_back( + std::make_unique(wei_ptr)); + }, + [&](const ActivationDescriptor& activ_desc) { + const auto alpha = activ_desc.GetAlpha(); + const auto beta = activ_desc.GetBeta(); + const auto gamma = activ_desc.GetGamma(); + + if(problem.GetDirection() == miopenProblemDirectionForward) + { + operator_args.params.emplace_back( + std::make_unique( + alpha, beta, gamma)); + } + else + { + const auto x = buffers.at(miopenTensorActivationX); + const auto y = buffers.at(miopenTensorActivationY); + + operator_args.params.emplace_back( + std::make_unique( + y, x, alpha, beta, gamma)); + } + }), + problem.operator_descriptor); + } + + return fusion::FusionInvokeParams(operator_args, in_desc, in, out_desc, out, gfx90aaltimpl); +} + +FusionPlanDescriptor FusedProblem::AsFusionPlan() const +{ + FusionPlanDescriptor plan; + for(const auto& problem : problems) + AddProblemToPlan(plan, problem); + return plan; +} + } // namespace miopen diff --git a/src/solution.cpp b/src/solution.cpp index 01e141aee2..3288e90c72 100644 --- a/src/solution.cpp +++ b/src/solution.cpp @@ -34,6 +34,8 @@ #include #include +#include "miopen/fusion/problem_description.hpp" +#include "miopen/fusion/context.hpp" namespace miopen::debug { // Todo: This should be updated when a separate driver command is implemented @@ -71,9 +73,8 @@ void Solution::Run(Handle& handle, }), problem_.GetOperatorDescriptor()); }, - [](const FusedProblem& problem_) { - std::ignore = problem_; - MIOPEN_THROW(miopenStatusNotImplemented); + [&](const FusedProblem& problem_) { + RunImpl(handle, inputs, workspace, workspace_size, problem_); }), problem.item); } @@ -210,7 +211,7 @@ void Solution::RunImpl(Handle& handle, auto conv_ctx = ExecutionContext{&handle}; conv_problem.SetupFloats(conv_ctx); - decltype(auto) db = GetDb(conv_ctx); + decltype(auto) db = GetDb(ctx); const auto conv_solution = GetSolver().GetSolver().FindSolution( conv_ctx, conv_problem, db, invoke_ctx, perf_cfg.value_or("")); decltype(auto) invoker = @@ -220,6 +221,46 @@ void Solution::RunImpl(Handle& handle, checkNumericsOutput_(); } +void Solution::RunImpl(Handle& handle, + const std::unordered_map& inputs, + Data_t /*workspace*/, + std::size_t /*workspace_size*/, + const FusedProblem& problem_) +{ + const auto buffer_getter = [&](auto id, auto&& descriptor) { + const auto found = inputs.find(id); + if(found == inputs.end()) + MIOPEN_THROW(miopenStatusInvalidValue, + "Problem is missing " + std::to_string(id) + " tensor descriptor."); + if(found->second.descriptor.has_value() && *found->second.descriptor != descriptor) + MIOPEN_THROW(miopenStatusNotImplemented, + "Providing new descriptors for a fused solution is not supported."); + return found->second.buffer; + }; + + OperatorArgs op_args; + const auto invoke_params = problem_.MakeInvokeParams(buffer_getter, op_args); + + const auto plan = problem_.AsFusionPlan(); + const auto fusion_problem = FusionDescription{&plan}; + const auto net_cfg = fusion_problem.MakeNetworkConfig(); + + const auto found_invoker = handle.GetInvoker(net_cfg, GetSolver()); + + if(found_invoker) + { + (*found_invoker)(handle, invoke_params); + return; + } + + const auto ctx = FusionContext{handle}; + const auto solution = MakeFusedSolution(ctx, solver, perf_cfg, fusion_problem, invoke_params); + decltype(auto) invoker = + handle.PrepareInvoker(*solution.invoker_factory, solution.construction_params); + handle.RegisterInvoker(invoker, net_cfg, GetSolver().ToString()); + invoker(handle, invoke_params); +} + Problem Solution::Transpose(const Problem& problem, RunInput* x, const RunInput& w, RunInput* y) { auto transposed = problem.MakeTransposed(); From 762659a4e20715d4f1a774064386443ed2144fb2 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Thu, 9 Nov 2023 14:02:41 +0100 Subject: [PATCH 03/12] Implemented test_cba_find2_infer --- src/fusion.cpp | 3 +- src/include/miopen/problem.hpp | 3 + src/include/miopen/search_options.hpp | 1 + src/include/miopen/solution.hpp | 2 + src/problem.cpp | 2 + test/gtest/cba_find2.hpp | 170 +++++++++++++++++++++++++ test/gtest/cba_find2_infer.cpp | 173 ++++++++++++++++++++++++++ 7 files changed, 353 insertions(+), 1 deletion(-) create mode 100644 test/gtest/cba_find2.hpp create mode 100644 test/gtest/cba_find2_infer.cpp diff --git a/src/fusion.cpp b/src/fusion.cpp index ed94149ab3..418c3ff995 100644 --- a/src/fusion.cpp +++ b/src/fusion.cpp @@ -794,9 +794,10 @@ FindFusion(const ExecutionContext& ctx, // fusion_ctx.use_dynamic_solutions_only = findMode.IsDynamicHybrid(fusion_ctx); // We need buffers for find, thus we lazily get them, possibly allocating. + auto fusion_ctx = FusionContext(ctx.GetStream()); FindCore(invoke_params(), record, - ctx, + fusion_ctx, fusion_problem, FusionFindParameters{}, GetFusionSolverFinders()); diff --git a/src/include/miopen/problem.hpp b/src/include/miopen/problem.hpp index e863bb4776..2156d88df7 100644 --- a/src/include/miopen/problem.hpp +++ b/src/include/miopen/problem.hpp @@ -177,6 +177,9 @@ struct FusedProblem return problems.back().GetOutputId(); } + [[nodiscard]] const TensorDescriptor& GetInput() const { return problems.front().GetInput(); } + [[nodiscard]] const TensorDescriptor& GetOutput() const { return problems.back().GetOutput(); } + [[nodiscard]] FusionPlanDescriptor AsFusionPlan() const; friend void to_json(nlohmann::json& j, const FusedProblem& problem); diff --git a/src/include/miopen/search_options.hpp b/src/include/miopen/search_options.hpp index bdf60be8d4..0c01dae944 100644 --- a/src/include/miopen/search_options.hpp +++ b/src/include/miopen/search_options.hpp @@ -28,6 +28,7 @@ #include +#include #include #include diff --git a/src/include/miopen/solution.hpp b/src/include/miopen/solution.hpp index 2001772f3a..4fab925bf2 100644 --- a/src/include/miopen/solution.hpp +++ b/src/include/miopen/solution.hpp @@ -74,6 +74,8 @@ struct Solution : miopenSolution if(argument.descriptor != nullptr) descriptor = miopen::deref(*argument.descriptor); } + + inline RunInput(Data_t buffer_) : buffer(buffer_) {} }; float GetTime() const { return time; } diff --git a/src/problem.cpp b/src/problem.cpp index 37a8d33cb3..d614811244 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -698,6 +698,8 @@ fusion::FusionInvokeParams FusedProblem::MakeInvokeParams( FusionPlanDescriptor FusedProblem::AsFusionPlan() const { FusionPlanDescriptor plan; + plan.input_desc = GetInput(); + plan.output_desc = GetOutput(); for(const auto& problem : problems) AddProblemToPlan(plan, problem); return plan; diff --git a/test/gtest/cba_find2.hpp b/test/gtest/cba_find2.hpp new file mode 100644 index 0000000000..0fe00cd070 --- /dev/null +++ b/test/gtest/cba_find2.hpp @@ -0,0 +1,170 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "tensor_util.hpp" +#include "get_handle.hpp" +#include "conv_common.hpp" + +#include "conv_test_base.hpp" +#include "conv_tensor_gen.hpp" + +template +struct ConvBiasActivInferFind2Test + : public ::testing::TestWithParam< + std::tuple>, + ConvFwdSolverTestBase +{ +protected: + ConvTestCase conv_config; + miopen::ActivationDescriptor activ_desc; + tensor bias; + miopen::Allocator::ManageDataPtr bias_dev; + bool test_skipped = false; + miopenActivationMode_t activ_mode; + /* + miopen::FusionPlanDescriptor fusePlanDesc; + */ + miopen::OperatorArgs params; + const float alpha = static_cast(1.0f); + const float beta = static_cast(0); + const float activ_alpha = static_cast(0.5f); + const float activ_beta = static_cast(0.5f); + const float activ_gamma = static_cast(0.5f); + miopenTensorLayout_t tensor_layout; + + miopen::FusedProblem fused_problem; + miopen::AnyInvokeParams invoke_params; + + using cfsb = ConvFwdSolverTestBase; + + void SetUp() override + { + test_skipped = false; + std::tie(activ_mode, conv_config, tensor_layout) = GetParam(); + + cfsb::SetUpImpl(conv_config, tensor_layout); + activ_desc = {activ_mode, activ_alpha, activ_beta, activ_gamma}; + bias = tensor{1, static_cast(conv_config.k), 1, 1}; + bias.generate(tensor_elem_gen_integer{3}); + std::fill( + cfsb::output.begin(), cfsb::output.end(), std::numeric_limits::quiet_NaN()); + bias_dev = get_handle().Write(bias.data); + + // Setup the fusion problem + fused_problem = miopen::FusedProblem{{ + MakeConvProblem(), + // MakeBiasProblem(), + MakeActivationProblem(), + }}; + + fused_problem.PropagateDescriptors(); + ASSERT_NO_THROW(invoke_params = MakeInvokeParams()); + + /* + fusePlanDesc = miopen::FusionPlanDescriptor(miopenVerticalFusion, cfsb::input.desc); + auto convOp = + std::make_shared(cfsb::conv_desc, cfsb::weights.desc); + auto biasOp = std::make_shared(bias.desc); + auto activOp = std::make_shared(activ_desc.GetMode()); + EXPECT_EQ(fusePlanDesc.AddOp(convOp), miopenStatusSuccess); + convOp->SetArgs(params, &alpha, &beta, cfsb::wei_dev.get()); + EXPECT_EQ(fusePlanDesc.AddOp(biasOp), miopenStatusSuccess); + biasOp->SetArgs(params, &alpha, &beta, bias_dev.get()); + EXPECT_EQ(fusePlanDesc.AddOp(activOp), miopenStatusSuccess); + activOp->SetArgs(params, &alpha, &beta, activ_alpha, activ_beta, activ_gamma); + */ + } + + void TearDown() override + { + if(test_skipped) + return; + conv_stats stats; + cfsb::TearDownConv(); + // cpu_bias_forward(cfsb::ref_out, bias); + + activationHostInfer(activ_mode, + activ_gamma, + activ_beta, + activ_alpha, + cfsb::ref_out.data, + cfsb::ref_out.data); + cfsb::ThresholdChecks(); + } + +private: + [[nodiscard]] miopen::Problem MakeConvProblem() const + { + auto problem = miopen::Problem{}; + problem.SetOperatorDescriptor(cfsb::conv_desc); + problem.RegisterTensorDescriptor(miopenTensorConvolutionX, cfsb::input.desc); + problem.RegisterTensorDescriptor(miopenTensorConvolutionW, cfsb::weights.desc); + problem.RegisterTensorDescriptor(miopenTensorConvolutionY, cfsb::output.desc); + return problem; + } + + [[nodiscard]] miopen::Problem MakeActivationProblem() const + { + auto problem = miopen::Problem{}; + problem.SetOperatorDescriptor(activ_desc); + return problem; + } + + [[nodiscard]] miopen::AnyInvokeParams MakeInvokeParams() + { + return fused_problem.MakeInvokeParams( + [&](auto id, const auto& desc) { + if(id == miopenTensorConvolutionX) + { + EXPECT_EQ(desc, cfsb::input.desc); + return cfsb::in_dev.get(); + } + if(id == miopenTensorConvolutionW) + { + EXPECT_EQ(desc, cfsb::weights.desc); + return cfsb::wei_dev.get(); + } + if(id == miopenTensorActivationY) + { + EXPECT_EQ(desc, cfsb::output.desc); + return cfsb::out_dev.get(); + } + MIOPEN_THROW(miopenStatusInternalError); + }, + params); + } +}; diff --git a/test/gtest/cba_find2_infer.cpp b/test/gtest/cba_find2_infer.cpp new file mode 100644 index 0000000000..817277f8a2 --- /dev/null +++ b/test/gtest/cba_find2_infer.cpp @@ -0,0 +1,173 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tensor_util.hpp" +#include "get_handle.hpp" +#include "cba_find2.hpp" +#include "miopen/search_options.hpp" + +struct ConvBiasActivFind2InferTestFloat : ConvBiasActivInferFind2Test +{ +}; + +struct ConvBiasActivFind2InferTestFloatFusionFind : ConvBiasActivInferFind2Test +{ +}; + +struct ConvBiasActivFind2InferTestHalf : ConvBiasActivInferFind2Test +{ +}; + +void setEnvironmentVariable(const std::string& name, const std::string& value) +{ + int ret = 0; + +#ifdef _WIN32 + std::string env_var(name + "=" + value); + ret = _putenv(env_var.c_str()); +#else + ret = setenv(name.c_str(), value.c_str(), 1); +#endif + EXPECT_EQ(ret, 0); +} + +template +void RunSolver(miopen::FusedProblem& problem, + const miopen::AnyInvokeParams& invoke_ctx, + const TestCase& conv_config, + bool& test_skipped) +{ + auto& handle = get_handle(); + Solver solv{}; + const auto plan = problem.AsFusionPlan(); + const auto fusion_problem = miopen::FusionDescription{&plan}; + auto fusion_ctx = miopen::FusionContext{handle}; + if(!solv.IsApplicable(fusion_ctx, fusion_problem)) + { + test_skipped = true; + GTEST_SKIP() << solv.SolverDbId() << " Not Applicable" << conv_config; + } + ASSERT_TRUE(solv.IsApplicable(fusion_ctx, fusion_problem)); + auto sol = solv.GetSolution(fusion_ctx, fusion_problem); + ASSERT_TRUE(sol.Succeeded()); + ASSERT_TRUE(sol.invoker_factory); + const auto invoker = handle.PrepareInvoker(*sol.invoker_factory, sol.construction_params); + (invoker)(handle, invoke_ctx); + handle.Finish(); +} + +template +void RunTunableSolver(miopen::FusedProblem& problem, + const miopen::AnyInvokeParams& invoke_ctx, + const ConvTestCase& conv_config, + bool& test_skipped) +{ + auto& handle = get_handle(); + Solver solv{}; + const auto plan = problem.AsFusionPlan(); + const auto fusion_problem = miopen::FusionDescription{&plan}; + auto fusion_ctx = miopen::FusionContext{handle}; + if(!solv.IsApplicable(fusion_ctx, fusion_problem)) + { + test_skipped = true; + GTEST_SKIP() << solv.SolverDbId() << " Not Applicable" << conv_config; + } + ASSERT_TRUE(solv.IsApplicable(fusion_ctx, fusion_problem)); + auto sol = solv.GetSolution( + fusion_ctx, fusion_problem, solv.GetDefaultPerformanceConfig(fusion_ctx, fusion_problem)); + ASSERT_TRUE(sol.Succeeded()); + ASSERT_TRUE(sol.invoker_factory); + const auto invoker = handle.PrepareInvoker(*sol.invoker_factory, sol.construction_params); + (invoker)(handle, invoke_ctx); + handle.Finish(); +} + +TEST_P(ConvBiasActivFind2InferTestFloat, ConvBiasActivAsm1x1UFind2Float) +{ + RunTunableSolver( + fused_problem, invoke_params, conv_config, test_skipped); +} +TEST_P(ConvBiasActivFind2InferTestFloat, ConvOclDirectFwdFind2Fused) +{ + RunTunableSolver( + fused_problem, invoke_params, conv_config, test_skipped); +} +TEST_P(ConvBiasActivFind2InferTestFloat, ConvBinWinogradRxSFind2Fused) +{ + RunSolver( + fused_problem, invoke_params, conv_config, test_skipped); +} +TEST_P(ConvBiasActivFind2InferTestFloat, ConvBinWinogradRxSf2x3g1Find2Fused) +{ + RunSolver( + fused_problem, invoke_params, conv_config, test_skipped); +} + +TEST_P(ConvBiasActivFind2InferTestHalf, ConvCKIgemmFwdBiasActivFind2Fused) +{ + RunTunableSolver( + fused_problem, invoke_params, conv_config, test_skipped); +} + +#if MIOPEN_BACKEND_HIP +TEST_P(ConvBiasActivFind2InferTestFloatFusionFind, ConvBiasActivAsm1x1UFind2Float_testFind) +{ + setEnvironmentVariable("MIOPEN_FIND_ENFORCE", "SEARCH_DB_UPDATE"); + setEnvironmentVariable("MIOPEN_DEBUG_TUNING_ITERATIONS_MAX", "5"); + const auto options = miopen::FindOptions{}; + auto solutions = fused_problem.FindSolutions(get_handle(), options, 1); + RunTunableSolver( + fused_problem, invoke_params, conv_config, test_skipped); +} + +INSTANTIATE_TEST_SUITE_P(CBAFind2InferSolverTest, + ConvBiasActivFind2InferTestFloatFusionFind, + testing::Combine(testing::Values(miopenActivationRELU), + testing::ValuesIn(GetNetworkForFusionCompileStepTest()), + testing::Values(miopenTensorNCHW))); + +#endif + +INSTANTIATE_TEST_SUITE_P(CBAFind2InferSolverTest, + ConvBiasActivFind2InferTestFloat, + testing::Combine(testing::Values(miopenActivationRELU), + testing::ValuesIn(GetNetwork1()), + testing::Values(miopenTensorNCHW))); + +INSTANTIATE_TEST_SUITE_P(CBAFind2InferSolverTest, + ConvBiasActivFind2InferTestHalf, + testing::Combine(testing::Values(miopenActivationRELU), + testing::ValuesIn(GetNetwork1()), + testing::Values(miopenTensorNHWC))); From 958cd7993737c9307dd38f62a0d26385a3819250 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Thu, 9 Nov 2023 14:20:15 +0100 Subject: [PATCH 04/12] Changed find test in test_cba_find2_infer to actually test solutions produced by find rather than be marked as skipped due to arbitrary solver inapplicability --- test/gtest/cba_find2.hpp | 37 +++++++++++++++++++++++++--------- test/gtest/cba_find2_infer.cpp | 21 ++++++++++++++----- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/test/gtest/cba_find2.hpp b/test/gtest/cba_find2.hpp index 0fe00cd070..0e905b2904 100644 --- a/test/gtest/cba_find2.hpp +++ b/test/gtest/cba_find2.hpp @@ -111,22 +111,22 @@ struct ConvBiasActivInferFind2Test void TearDown() override { - if(test_skipped) + if(test_skipped || checks_ran) return; - conv_stats stats; - cfsb::TearDownConv(); - // cpu_bias_forward(cfsb::ref_out, bias); + ValidateResult(); + } - activationHostInfer(activ_mode, - activ_gamma, - activ_beta, - activ_alpha, - cfsb::ref_out.data, - cfsb::ref_out.data); + void ValidateResult() + { + checks_ran = true; + CalculateCPUValuesIfNeeded(); cfsb::ThresholdChecks(); } private: + bool cpu_values_calculated = false; + bool checks_ran = false; + [[nodiscard]] miopen::Problem MakeConvProblem() const { auto problem = miopen::Problem{}; @@ -167,4 +167,21 @@ struct ConvBiasActivInferFind2Test }, params); } + + void CalculateCPUValuesIfNeeded() + { + if(cpu_values_calculated) + return; + + cpu_values_calculated = true; + cfsb::TearDownConv(); + // cpu_bias_forward(cfsb::ref_out, bias); + + activationHostInfer(activ_mode, + activ_gamma, + activ_beta, + activ_alpha, + cfsb::ref_out.data, + cfsb::ref_out.data); + } }; diff --git a/test/gtest/cba_find2_infer.cpp b/test/gtest/cba_find2_infer.cpp index 817277f8a2..883af6dcc4 100644 --- a/test/gtest/cba_find2_infer.cpp +++ b/test/gtest/cba_find2_infer.cpp @@ -142,14 +142,25 @@ TEST_P(ConvBiasActivFind2InferTestHalf, ConvCKIgemmFwdBiasActivFind2Fused) } #if MIOPEN_BACKEND_HIP -TEST_P(ConvBiasActivFind2InferTestFloatFusionFind, ConvBiasActivAsm1x1UFind2Float_testFind) +TEST_P(ConvBiasActivFind2InferTestFloatFusionFind, ConvBiasActivFind2Float_testFind) { setEnvironmentVariable("MIOPEN_FIND_ENFORCE", "SEARCH_DB_UPDATE"); setEnvironmentVariable("MIOPEN_DEBUG_TUNING_ITERATIONS_MAX", "5"); - const auto options = miopen::FindOptions{}; - auto solutions = fused_problem.FindSolutions(get_handle(), options, 1); - RunTunableSolver( - fused_problem, invoke_params, conv_config, test_skipped); + std::vector solutions; + + ASSERT_NO_THROW(solutions = fused_problem.FindSolutions(get_handle(), {}, 10)); + + auto tensors = std::unordered_map{ + {miopenTensorConvolutionX, in_dev.get()}, + {miopenTensorConvolutionW, wei_dev.get()}, + {miopenTensorActivationY, out_dev.get()}, + }; + + for(auto& solution : solutions) + { + ASSERT_NO_THROW(solution.Run(get_handle(), tensors, nullptr, 0)); + ValidateResult(); + } } INSTANTIATE_TEST_SUITE_P(CBAFind2InferSolverTest, From fcbe2dc80113ae290eb7f3a7c07bd4b1231c7330 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Mon, 13 Nov 2023 13:28:59 +0100 Subject: [PATCH 05/12] Fixed driver build Removed some development exceptions Added a couple of todo comments --- src/include/miopen/problem.hpp | 6 ++++-- src/problem.cpp | 2 +- src/solution.cpp | 8 +++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/include/miopen/problem.hpp b/src/include/miopen/problem.hpp index 2156d88df7..9b770bbc66 100644 --- a/src/include/miopen/problem.hpp +++ b/src/include/miopen/problem.hpp @@ -199,8 +199,10 @@ struct ProblemContainer : miopenProblem boost::variant item; ProblemContainer() = default; - ProblemContainer(boost::variant item_) - : item(std::move(item_)) {} // NOLINT(*-explicit-constructor) + ProblemContainer(boost::variant item_) // NOLINT(*-explicit-constructor) + : item(std::move(item_)) + { + } friend void to_json(nlohmann::json& j, const ProblemContainer& problem); friend void from_json(const nlohmann::json& j, ProblemContainer& problem); diff --git a/src/problem.cpp b/src/problem.cpp index d614811244..6f95db87ae 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -413,7 +413,7 @@ std::vector Problem::FindSolutionsImpl(Handle& handle, solution.SetSolver(handle.GetFound1_0SolverId(netcfg, AlgorithmName{algo}).value()); solution.SetPerfConfig( solution.GetSolver().GetSolver().GetPerfCfgParams(conv_ctx, conv_problem, db)); - solution.SetProblem(*this); + solution.SetProblem({*this}); MIOPEN_LOG_I("Found solution: " << solution.GetSolver().ToString() << " , " << solution.GetWorkspaceSize() << ", " << solution.GetTime()); diff --git a/src/solution.cpp b/src/solution.cpp index 3288e90c72..a7cee4102b 100644 --- a/src/solution.cpp +++ b/src/solution.cpp @@ -100,7 +100,9 @@ void Solution::LogDriverCommand(const ConvolutionDescriptor& desc) const void Solution::LogDriverCommand(const ActivationDescriptor& desc) const { std::ignore = desc; - MIOPEN_THROW(miopenStatusNotImplemented); + boost::get(problem.item).LogDriverCommand(); + /// \todo: when possible, add some command for reproducing a specific case rather than the whole + /// problem } void Solution::LogDriverCommand(const Problem& problem_) const @@ -112,7 +114,7 @@ void Solution::LogDriverCommand(const Problem& problem_) const void Solution::LogDriverCommand(const FusedProblem& problem_) const { std::ignore = problem_; - MIOPEN_THROW(miopenStatusNotImplemented); + /// \todo: add logging of some command to reproduce current solution or at least problem } void Solution::RunImpl(Handle& handle, @@ -211,7 +213,7 @@ void Solution::RunImpl(Handle& handle, auto conv_ctx = ExecutionContext{&handle}; conv_problem.SetupFloats(conv_ctx); - decltype(auto) db = GetDb(ctx); + decltype(auto) db = GetDb(conv_ctx); const auto conv_solution = GetSolver().GetSolver().FindSolution( conv_ctx, conv_problem, db, invoke_ctx, perf_cfg.value_or("")); decltype(auto) invoker = From 3dda4ee3efae7da38c67c6290e1c862298a8fc48 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Mon, 13 Nov 2023 15:59:54 +0100 Subject: [PATCH 06/12] Fixed tidy warning --- src/problem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/problem.cpp b/src/problem.cpp index 6f95db87ae..ca92a12595 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -692,7 +692,7 @@ fusion::FusionInvokeParams FusedProblem::MakeInvokeParams( problem.operator_descriptor); } - return fusion::FusionInvokeParams(operator_args, in_desc, in, out_desc, out, gfx90aaltimpl); + return {operator_args, in_desc, in, out_desc, out, gfx90aaltimpl}; } FusionPlanDescriptor FusedProblem::AsFusionPlan() const From fbd2bd0bcb904e669d27b5bf61d9c545d3f8c408 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Mon, 13 Nov 2023 20:37:51 +0100 Subject: [PATCH 07/12] Fixed missing serialization --- src/include/miopen/problem.hpp | 5 +++-- src/problem.cpp | 29 +++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/include/miopen/problem.hpp b/src/include/miopen/problem.hpp index 9b770bbc66..6f66a5f9db 100644 --- a/src/include/miopen/problem.hpp +++ b/src/include/miopen/problem.hpp @@ -196,10 +196,11 @@ struct FusedProblem struct ProblemContainer : miopenProblem { - boost::variant item; + using Item = boost::variant; + Item item; ProblemContainer() = default; - ProblemContainer(boost::variant item_) // NOLINT(*-explicit-constructor) + ProblemContainer(Item item_) // NOLINT(*-explicit-constructor) : item(std::move(item_)) { } diff --git a/src/problem.cpp b/src/problem.cpp index ca92a12595..1337e07fb2 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -518,6 +518,35 @@ void from_json(const nlohmann::json& json, FusedProblem& problem) json.at("problems").get_to(problem.problems); } +void to_json(nlohmann::json& json, const ProblemContainer& problem) +{ + json = nlohmann::json{ + {"problem_type", problem.item.which()}, + }; + + auto operator_serialization = [&](auto&& op) { json["value"] = op; }; + boost::apply_visitor(operator_serialization, problem.item); +} + +namespace detail { +template +struct ProblemDeserializer +{ + const nlohmann::json* json; + ProblemContainer::Item* problem; + + void operator()() const { *problem = json->get(); } +}; +} // namespace detail + +void from_json(const nlohmann::json& json, ProblemContainer& problem) +{ + const auto type = json.at("problem_type").get(); + auto value = json.at("value"); + + VisitType(type, &value, &problem.item); +} + void Problem::CalculateOutput() { if(!HasInput()) From 0f0672f466c5dfc2060027da0f4c6112f9e881a6 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Fri, 17 Nov 2023 15:35:14 +0100 Subject: [PATCH 08/12] Apply suggestions from code review Co-authored-by: JD --- test/gtest/cba_find2.hpp | 13 ------------- test/gtest/cba_find2_infer.cpp | 2 +- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/test/gtest/cba_find2.hpp b/test/gtest/cba_find2.hpp index 0e905b2904..40627c68a3 100644 --- a/test/gtest/cba_find2.hpp +++ b/test/gtest/cba_find2.hpp @@ -94,19 +94,6 @@ struct ConvBiasActivInferFind2Test fused_problem.PropagateDescriptors(); ASSERT_NO_THROW(invoke_params = MakeInvokeParams()); - /* - fusePlanDesc = miopen::FusionPlanDescriptor(miopenVerticalFusion, cfsb::input.desc); - auto convOp = - std::make_shared(cfsb::conv_desc, cfsb::weights.desc); - auto biasOp = std::make_shared(bias.desc); - auto activOp = std::make_shared(activ_desc.GetMode()); - EXPECT_EQ(fusePlanDesc.AddOp(convOp), miopenStatusSuccess); - convOp->SetArgs(params, &alpha, &beta, cfsb::wei_dev.get()); - EXPECT_EQ(fusePlanDesc.AddOp(biasOp), miopenStatusSuccess); - biasOp->SetArgs(params, &alpha, &beta, bias_dev.get()); - EXPECT_EQ(fusePlanDesc.AddOp(activOp), miopenStatusSuccess); - activOp->SetArgs(params, &alpha, &beta, activ_alpha, activ_beta, activ_gamma); - */ } void TearDown() override diff --git a/test/gtest/cba_find2_infer.cpp b/test/gtest/cba_find2_infer.cpp index 883af6dcc4..c2372b895b 100644 --- a/test/gtest/cba_find2_infer.cpp +++ b/test/gtest/cba_find2_infer.cpp @@ -72,7 +72,7 @@ void RunSolver(miopen::FusedProblem& problem, auto& handle = get_handle(); Solver solv{}; const auto plan = problem.AsFusionPlan(); - const auto fusion_problem = miopen::FusionDescription{&plan}; + const auto fusion_desc = miopen::FusionDescription{&plan}; auto fusion_ctx = miopen::FusionContext{handle}; if(!solv.IsApplicable(fusion_ctx, fusion_problem)) { From e88f3937a5cf5fe680a27e3be7cbf09f9d76d62e Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Fri, 17 Nov 2023 16:10:14 +0100 Subject: [PATCH 09/12] Removed skipping validation --- test/gtest/cba_find2.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/gtest/cba_find2.hpp b/test/gtest/cba_find2.hpp index 40627c68a3..1b1159336a 100644 --- a/test/gtest/cba_find2.hpp +++ b/test/gtest/cba_find2.hpp @@ -98,21 +98,19 @@ struct ConvBiasActivInferFind2Test void TearDown() override { - if(test_skipped || checks_ran) + if(test_skipped) return; ValidateResult(); } void ValidateResult() { - checks_ran = true; CalculateCPUValuesIfNeeded(); cfsb::ThresholdChecks(); } private: bool cpu_values_calculated = false; - bool checks_ran = false; [[nodiscard]] miopen::Problem MakeConvProblem() const { From 1b0156f67983183063430eec52026e8e1527f20a Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Fri, 17 Nov 2023 22:28:12 +0100 Subject: [PATCH 10/12] Removed env variables from the test --- src/conv/solver_finders.cpp | 25 ++++++++---- src/fusion.cpp | 29 +++++++++----- src/generic_search.cpp | 16 ++++++++ src/include/miopen/conv/solver_finders.hpp | 23 +++++++---- src/include/miopen/find_controls.hpp | 1 + src/include/miopen/find_solution.hpp | 23 +++++++---- src/include/miopen/fusion_plan.hpp | 5 ++- src/include/miopen/generic_search.hpp | 11 ++++++ src/include/miopen/search_options.hpp | 2 + src/problem.cpp | 2 +- test/gtest/cba_find2.hpp | 1 - test/gtest/cba_find2_infer.cpp | 45 ++++++++-------------- 12 files changed, 120 insertions(+), 63 deletions(-) diff --git a/src/conv/solver_finders.cpp b/src/conv/solver_finders.cpp index c998fb75a4..bb7e097f88 100644 --- a/src/conv/solver_finders.cpp +++ b/src/conv/solver_finders.cpp @@ -64,8 +64,10 @@ class DirectSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, const ProblemDescription& problem, const AnyInvokeParams& invoke_ctx, - const ConvFindParameters&) const override + const ConvFindParameters&, + const std::optional&) const override { + /// \todo: actually use FindOptions return problem.GetDirection() != conv::Direction::BackwardWeights ? FindAllDirectSolutions(ctx, problem, invoke_ctx) : FindAllBwdWrW2DSolutions(ctx, problem, invoke_ctx); @@ -91,8 +93,10 @@ class ImplicitGemmSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, const ProblemDescription& problem, const AnyInvokeParams& invoke_ctx, - const ConvFindParameters&) const override + const ConvFindParameters&, + const std::optional&) const override { + /// \todo: actually use FindOptions return problem.GetDirection() != conv::Direction::BackwardWeights ? FindAllImplicitGemmSolutions(ctx, problem, invoke_ctx) : FindImplicitGemmWrWAllSolutions(ctx, problem, invoke_ctx); @@ -120,8 +124,10 @@ class FftSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, const ProblemDescription& problem, const AnyInvokeParams& invoke_ctx, - const ConvFindParameters&) const override + const ConvFindParameters&, + const std::optional&) const override { + /// \todo: actually use FindOptions return FindAllFFTSolutions(ctx, problem, invoke_ctx); } }; @@ -145,8 +151,10 @@ class GemmSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, const ProblemDescription& problem, const AnyInvokeParams& invoke_ctx, - const ConvFindParameters&) const override + const ConvFindParameters&, + const std::optional&) const override { + /// \todo: actually use FindOptions return FindAllGemmSolutions(ctx, problem, invoke_ctx); } }; @@ -170,8 +178,10 @@ class WinogradSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, const ProblemDescription& problem, const AnyInvokeParams& invoke_ctx, - const ConvFindParameters& parameters) const override + const ConvFindParameters& parameters, + const std::optional&) const override { + /// \todo: actually use FindOptions auto ctx_copy = ctx; if(parameters.use_winograd_only) ctx_copy.use_dynamic_solutions_only = true; @@ -283,7 +293,8 @@ void FindCore(const AnyInvokeParams& invoke_ctx, const ExecutionContext& ctx, const ProblemDescriptionBase& problem, const PrimitiveFindParameters& parameters, - const std::vector>& finders) + const std::vector>& finders, + const std::optional& options) { auto& handle = ctx.GetStream(); @@ -292,7 +303,7 @@ void FindCore(const AnyInvokeParams& invoke_ctx, std::transform( finders.begin(), finders.end(), std::inserter(solutions, solutions.end()), [&](auto&& f) { return std::make_pair(f->GetAlgorithmName(problem), - f->Find(ctx, problem, invoke_ctx, parameters)); + f->Find(ctx, problem, invoke_ctx, parameters, options)); }); // Precompile diff --git a/src/fusion.cpp b/src/fusion.cpp index 418c3ff995..ca6fa0360e 100644 --- a/src/fusion.cpp +++ b/src/fusion.cpp @@ -751,13 +751,19 @@ class FusionSolverFinder : public SolversFinderMixin FindImpl(const ExecutionContext& ctx, - const FusionDescription& problem, - const AnyInvokeParams& invoke_ctx, - const FusionFindParameters&) const override + std::vector + FindImpl(const ExecutionContext& ctx, + const FusionDescription& problem, + const AnyInvokeParams& invoke_ctx, + const FusionFindParameters&, + const std::optional& options) const override { - return solvers.SearchForAllSolutions( - dynamic_cast(ctx), problem, miopen::GetDb(ctx), invoke_ctx); + return solvers.SearchForAllSolutions(dynamic_cast(ctx), + problem, + miopen::GetDb(ctx), + invoke_ctx, + std::numeric_limits::max(), + options); } private: @@ -785,7 +791,8 @@ static const std::vector>& GetFusionSolverFinder static std::vector FindFusion(const ExecutionContext& ctx, const FusionDescription& fusion_problem, - const std::function& invoke_params) + const std::function& invoke_params, + const std::optional& options = std::nullopt) { return UserFindDbRecord::TryLoad( ctx.GetStream(), @@ -800,7 +807,8 @@ FindFusion(const ExecutionContext& ctx, fusion_ctx, fusion_problem, FusionFindParameters{}, - GetFusionSolverFinders()); + GetFusionSolverFinders(), + options); }, "fusion"); } @@ -847,9 +855,10 @@ miopenStatus_t FusionPlanDescriptor::Compile(Handle& handle) std::vector FusionPlanDescriptor::Find(Handle& handle, - const std::function& invoke_params) const + const std::function& invoke_params, + const std::optional& options) const { - return FindFusion(&handle, this, invoke_params); + return FindFusion(&handle, this, invoke_params, options); } miopenStatus_t FusionPlanDescriptor::Execute(const Handle& handle, diff --git a/src/generic_search.cpp b/src/generic_search.cpp index f7647b8f95..484070894c 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -33,9 +33,25 @@ namespace miopen { namespace solver { +namespace debug { +static std::optional tuning_iterations_limit; + +TuningIterationScopedLimiter::TuningIterationScopedLimiter(std::size_t new_limit) + : old_limit(tuning_iterations_limit) +{ + tuning_iterations_limit = new_limit; +} + +TuningIterationScopedLimiter::~TuningIterationScopedLimiter() +{ + tuning_iterations_limit = old_limit; +} +} // namespace debug std::size_t GetTuningIterationsMax() { + if(debug::tuning_iterations_limit) + return *debug::tuning_iterations_limit; return Value(MIOPEN_DEBUG_TUNING_ITERATIONS_MAX{}, std::numeric_limits::max()); } diff --git a/src/include/miopen/conv/solver_finders.hpp b/src/include/miopen/conv/solver_finders.hpp index 05d7c13b62..e24b40c7b5 100644 --- a/src/include/miopen/conv/solver_finders.hpp +++ b/src/include/miopen/conv/solver_finders.hpp @@ -27,9 +27,10 @@ #pragma once #include +#include #include #include -#include +#include #include #include @@ -58,7 +59,8 @@ class ISolversFinder Find(const ExecutionContext& ctx, const ProblemDescriptionBase& problem, const AnyInvokeParams& invoke_ctx, - const PrimitiveFindParameters& parameters) const + const PrimitiveFindParameters& parameters, + const std::optional& find_options) const { if(!IsEnabled(ctx, problem, parameters)) { @@ -69,7 +71,7 @@ class ISolversFinder try { MIOPEN_LOG_I2("Starting find for " << GetAlgorithmName(problem).ToString()); - return FindImpl(ctx, problem, invoke_ctx, parameters); + return FindImpl(ctx, problem, invoke_ctx, parameters, find_options); } catch(Exception& ex) { @@ -86,7 +88,8 @@ class ISolversFinder FindImpl(const ExecutionContext& ctx, const ProblemDescriptionBase& problem, const AnyInvokeParams& invoke_ctx, - const PrimitiveFindParameters& parameters) const = 0; + const PrimitiveFindParameters& parameters, + const std::optional& options) const = 0; }; template @@ -105,12 +108,14 @@ class SolversFinderMixin : public ISolversFinder FindImpl(const ExecutionContext& ctx, const ProblemDescriptionBase& problem, const AnyInvokeParams& invoke_ctx, - const PrimitiveFindParameters& parameters) const final + const PrimitiveFindParameters& parameters, + const std::optional& options) const final { return FindImpl(ctx, static_cast(problem), invoke_ctx, - static_cast(parameters)); + static_cast(parameters), + options); } [[nodiscard]] bool IsEnabled(const ExecutionContext& ctx, @@ -130,7 +135,8 @@ class SolversFinderMixin : public ISolversFinder FindImpl(const ExecutionContext& ctx, const ProblemDescription& problem, const AnyInvokeParams& invoke_ctx, - const FindParameters& parameters) const = 0; + const FindParameters& parameters, + const std::optional& options) const = 0; [[nodiscard]] virtual bool IsEnabled(const ExecutionContext& ctx, const ProblemDescription& problem, @@ -148,7 +154,8 @@ void FindCore(const AnyInvokeParams& invoke_ctx, const ExecutionContext& ctx, const ProblemDescriptionBase& problem, const PrimitiveFindParameters& parameters, - const std::vector>& finders); + const std::vector>& finders, + const std::optional& options = std::nullopt); namespace conv { diff --git a/src/include/miopen/find_controls.hpp b/src/include/miopen/find_controls.hpp index 5101321412..65bb440c0d 100644 --- a/src/include/miopen/find_controls.hpp +++ b/src/include/miopen/find_controls.hpp @@ -72,6 +72,7 @@ class FindEnforce public: FindEnforce(); + explicit FindEnforce(FindEnforceAction action_) : action(action_) {} template bool IsDbClean(const Context& context) const diff --git a/src/include/miopen/find_solution.hpp b/src/include/miopen/find_solution.hpp index 519f1de608..4943ecaaa7 100644 --- a/src/include/miopen/find_solution.hpp +++ b/src/include/miopen/find_solution.hpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -51,10 +52,12 @@ auto FindSolutionImpl(rank<1>, const Problem& problem, Db& db, const AnyInvokeParams& invoke_ctx, - const std::string& perf_cfg) + const std::string& perf_cfg, + const std::optional& options) -> decltype(s.GetSolution(context, problem, s.Search(context, problem, invoke_ctx))) { - const FindEnforce enforce; + const FindEnforce enforce = + options && options->find_enforce ? *options->find_enforce : FindEnforce{}; if(context.disable_perfdb_access) { MIOPEN_LOG_I(s.SolverDbId() << " (db access disabled)"); @@ -142,7 +145,9 @@ auto FindSolutionImpl(rank<0>, const Problem& problem, Db&, const AnyInvokeParams&, - const std::string&) -> decltype(s.GetSolution(context, problem)) + const std::string&, + const std::optional&) + -> decltype(s.GetSolution(context, problem)) { MIOPEN_LOG_I(s.SolverDbId() << " (not searchable)"); return s.GetSolution(context, problem); @@ -160,12 +165,14 @@ ConvSolution FindSolution(Solver s, const Problem& problem, Db& db, const AnyInvokeParams& invoke_ctx, - const std::string& perf_cfg = "") + const std::string& perf_cfg = "", + const std::optional& options = std::nullopt) { static_assert(sizeof(Solver) == sizeof(SolverBase), "Solver must be stateless"); static_assert(std::is_base_of{}, "Not derived class of SolverBase"); // TODO: This assumes all solutions are ConvSolution - auto solution = FindSolutionImpl(rank<1>{}, s, context, problem, db, invoke_ctx, perf_cfg); + auto solution = + FindSolutionImpl(rank<1>{}, s, context, problem, db, invoke_ctx, perf_cfg, options); solution.solver_id = s.SolverDbId(); return solution; } @@ -203,7 +210,8 @@ struct SolverContainer const Problem& problem, Db&& db, const AnyInvokeParams& invoke_ctx, - std::size_t limit = std::numeric_limits::max()) const + std::size_t limit = std::numeric_limits::max(), + const std::optional& options = std::nullopt) const { std::vector ss; std::size_t count = 0; @@ -229,7 +237,8 @@ struct SolverContainer } else { - const Solution s = FindSolution(solver, ctx, problem, db, invoke_ctx); + const Solution s = + FindSolution(solver, ctx, problem, db, invoke_ctx, "", options); if(s.Succeeded()) { ++count; diff --git a/src/include/miopen/fusion_plan.hpp b/src/include/miopen/fusion_plan.hpp index f72a198580..e78cdbeda5 100644 --- a/src/include/miopen/fusion_plan.hpp +++ b/src/include/miopen/fusion_plan.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -58,7 +59,9 @@ struct FusionPlanDescriptor : miopenFusionPlanDescriptor const OperatorArgs& op_args); miopenStatus_t Compile(Handle& handle); std::vector - Find(Handle& handle, const std::function& invoke_params) const; + Find(Handle& handle, + const std::function& invoke_params, + const std::optional& options = std::nullopt) const; friend std::ostream& operator<<(std::ostream& stream, const FusionPlanDescriptor& fpd); miopenStatus_t diff --git a/src/include/miopen/generic_search.hpp b/src/include/miopen/generic_search.hpp index 71b34b908a..f718b5389e 100644 --- a/src/include/miopen/generic_search.hpp +++ b/src/include/miopen/generic_search.hpp @@ -52,6 +52,17 @@ namespace miopen { namespace solver { +namespace debug { +struct TuningIterationScopedLimiter +{ + TuningIterationScopedLimiter(std::size_t new_limit); + ~TuningIterationScopedLimiter(); + +private: + std::optional old_limit; +}; +} // namespace debug + /// This STL-like container together with corresponding iterator provide access /// to a set of all available performance configs for the given problem config. /// diff --git a/src/include/miopen/search_options.hpp b/src/include/miopen/search_options.hpp index 0c01dae944..c2feef5eb0 100644 --- a/src/include/miopen/search_options.hpp +++ b/src/include/miopen/search_options.hpp @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -50,6 +51,7 @@ struct FindOptions : miopenFindOptions std::size_t workspace_limit = std::numeric_limits::max(); std::unordered_map preallocated_tensors; std::optional preallocated_workspace; + std::optional find_enforce; }; } // namespace miopen diff --git a/src/problem.cpp b/src/problem.cpp index 1337e07fb2..8664c63ae1 100644 --- a/src/problem.cpp +++ b/src/problem.cpp @@ -617,7 +617,7 @@ std::vector FusedProblem::FindSolutions(Handle& handle, return MakeInvokeParams(buffer_allocator, params); }; - return AsFusionPlan().Find(handle, make_invoke_params); + return AsFusionPlan().Find(handle, make_invoke_params, options); }(); auto ret = std::vector{}; diff --git a/test/gtest/cba_find2.hpp b/test/gtest/cba_find2.hpp index 1b1159336a..2ed673e17b 100644 --- a/test/gtest/cba_find2.hpp +++ b/test/gtest/cba_find2.hpp @@ -93,7 +93,6 @@ struct ConvBiasActivInferFind2Test fused_problem.PropagateDescriptors(); ASSERT_NO_THROW(invoke_params = MakeInvokeParams()); - } void TearDown() override diff --git a/test/gtest/cba_find2_infer.cpp b/test/gtest/cba_find2_infer.cpp index c2372b895b..2057f86ad0 100644 --- a/test/gtest/cba_find2_infer.cpp +++ b/test/gtest/cba_find2_infer.cpp @@ -24,6 +24,7 @@ * *******************************************************************************/ #include +#include #include #include #include @@ -36,7 +37,6 @@ #include "tensor_util.hpp" #include "get_handle.hpp" #include "cba_find2.hpp" -#include "miopen/search_options.hpp" struct ConvBiasActivFind2InferTestFloat : ConvBiasActivInferFind2Test { @@ -50,19 +50,6 @@ struct ConvBiasActivFind2InferTestHalf : ConvBiasActivInferFind2Test solutions; + auto options = miopen::FindOptions{}; + options.find_enforce = miopen::FindEnforce{miopen::FindEnforceAction::SearchDbUpdate}; - ASSERT_NO_THROW(solutions = fused_problem.FindSolutions(get_handle(), {}, 10)); + ASSERT_NO_THROW(solutions = fused_problem.FindSolutions(get_handle(), options, 10)); auto tensors = std::unordered_map{ {miopenTensorConvolutionX, in_dev.get()}, From cdd65001eaabfd090b283c6e8fc29f4bc0dc032d Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Sat, 18 Nov 2023 12:12:29 +0100 Subject: [PATCH 11/12] Fixed tidy warning --- src/generic_search.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/generic_search.cpp b/src/generic_search.cpp index 484070894c..908cd9dc4e 100644 --- a/src/generic_search.cpp +++ b/src/generic_search.cpp @@ -34,6 +34,8 @@ namespace miopen { namespace solver { namespace debug { + +// NOLINTNEXTLINE (cppcoreguidelines-avoid-non-const-global-variables) static std::optional tuning_iterations_limit; TuningIterationScopedLimiter::TuningIterationScopedLimiter(std::size_t new_limit) From 5fe3b829ab93d0e102bcdcc29e520a37b0f35a34 Mon Sep 17 00:00:00 2001 From: Vasilii Filippov Date: Sat, 18 Nov 2023 12:12:50 +0100 Subject: [PATCH 12/12] Added a comment on struct TuningIterationScopedLimiter --- src/include/miopen/generic_search.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/include/miopen/generic_search.hpp b/src/include/miopen/generic_search.hpp index 6a1fc7cfdd..9a68949ce4 100644 --- a/src/include/miopen/generic_search.hpp +++ b/src/include/miopen/generic_search.hpp @@ -53,6 +53,8 @@ namespace miopen { namespace solver { namespace debug { +// This struct is not MT-safe, meaning one should use it before starting threads, thus avoiding +// constructing it inside a worker thread. struct TuningIterationScopedLimiter { TuningIterationScopedLimiter(std::size_t new_limit);