diff --git a/projects/miopen/CHANGELOG.md b/projects/miopen/CHANGELOG.md index 50f9b58f73c..5f7e6fff465 100644 --- a/projects/miopen/CHANGELOG.md +++ b/projects/miopen/CHANGELOG.md @@ -2,12 +2,17 @@ # Change Log for MIOpen Full documentation for MIOpen is available [here](https://rocm.docs.amd.com/projects/MIOpen/en/latest/) +## (Unreleased) MIOpen 3.5.1 for ROCm 7.2.0 +### Optimized +* [Conv] Improve Composable Kernel (CK) kernel selection during tuning. + ## MIOpen 3.5.1 for ROCm 7.1.0 ### Added * Added a new trust verify find mode. * Ported Op4dTensorLite kernel from OpenCL to HIP. * Implemented a generic HIP kernel for backwards layer normalization. +* [BatchNorm] Enabled tuning using `miopenSetTuningPolicy`. ### Changed @@ -16,7 +21,6 @@ Full documentation for MIOpen is available [here](https://rocm.docs.amd.com/proj ### Optimized * [Conv] Enabled Composable Kernel (CK) implicit gemms on gfx950. -* [Conv] Improve Composable Kernel (CK) kernel selection during tuning ### Resolved issues diff --git a/projects/miopen/docs/conceptual/perfdb.rst b/projects/miopen/docs/conceptual/perfdb.rst index 098d003514e..61448fe92a9 100644 --- a/projects/miopen/docs/conceptual/perfdb.rst +++ b/projects/miopen/docs/conceptual/perfdb.rst @@ -51,9 +51,11 @@ In order for auto-tuning to begin, the following conditions must be met: * The value of the ``exhaustiveSearch`` parameter is set to ``true`` * Neither the System nor User PerfDb can contain values for the relevant "problem configuration". -You can override the latter two conditions and force the search using the -``-MIOPEN_FIND_ENFORCE`` environment variable. You can also use this variable to remove values -from User PerfDb, as described in the following section. +You can override the latter two conditions and force the search using either the API call +``miopenSetTuningPolicy()`` or the ``-MIOPEN_FIND_ENFORCE`` environment variable. In addition to +controlling the auto-tuning behaviour of convolutions, both ``miopenSetTuningPolicy()`` and +``-MIOPEN_FIND_ENFORCE`` can be used to control the tuning for batch normalization. +See the following section for more details. To optimize performance, MIOpen provides several find modes to accelerate find API calls. These modes include: @@ -65,7 +67,7 @@ These modes include: For more information about the MIOpen find modes, see :ref:`Find modes `. -Using MIOPEN_FIND_ENFORCE +Using MIOPEN_FIND_ENFORCE or miopenSetTuningPolicy() to control auto-tuning ---------------------------------------------------------------------------------------------------------- ``MIOPEN_FIND_ENFORCE`` supports case-insensitive symbolic and numeric values. The possible values @@ -90,6 +92,20 @@ are: Use the ``DB_CLEAN`` option with care. +Note that the API call miopenSetTuningPolicy() can be used to set the same modes as +``MIOPEN_FIND_ENFORCE``. For example, to set the ``SEARCH`` mode, code like the following could be used: +.. code-block:: c + + miopenSetTuningPolicy(handle, miopenTuningPolicySearch); + miopenBatchNorm*() + miopenSetTuningPolicy(handle, miopenTuningPolicyNone); + +Note that this API method is supported for both convolutions and batchnorms, although batchnorm does +not support a policy of ``DB_UPDATE`` (this will be a no-op and the user should specify ``SEARCH_DB_UPDATE`` +instead if they want ``DB_UPDATE`` behavior). + +If both the API method and environment variable are used, then the API method takes precedence. + Updating MIOpen and User PerfDb ========================================================== diff --git a/projects/miopen/docs/conceptual/tuningdb.rst b/projects/miopen/docs/conceptual/tuningdb.rst index 7c71ce782ab..2633cf26bb1 100644 --- a/projects/miopen/docs/conceptual/tuningdb.rst +++ b/projects/miopen/docs/conceptual/tuningdb.rst @@ -32,6 +32,9 @@ Enable this feature using these commands: export MIOPEN_FIND_ENFORCE=3 export MIOPEN_USER_DB_PATH="/user/specified/directory" +Note that the ``miopenSetTuningPolicy()`` API method can be used instead of the ``MIOPEN_FIND_ENFORCE`` +environment variable. + Exhaustive tuning ---------------------------------------------------------------------------------------------------------- diff --git a/projects/miopen/docs/how-to/find-and-immediate.rst b/projects/miopen/docs/how-to/find-and-immediate.rst index 00510ac42ac..85cbfbf7143 100644 --- a/projects/miopen/docs/how-to/find-and-immediate.rst +++ b/projects/miopen/docs/how-to/find-and-immediate.rst @@ -247,5 +247,5 @@ modes by using the ``MIOPEN_FIND_MODE`` environment variable with one of these v * ``TRUST_VERIFY_FULL``/``7`` (trust verify full find): Checks :doc:`FindDb <../conceptual/finddb>` Same as TRUST_VERIFY, with no limitations on tuning time. -The default find mode is ``TRUST_VERIFY``. To run the full ``NORMAL`` find mode, use +The default find mode is ``DYNAMIC_HYBRID``. To run the full ``NORMAL`` find mode, use ``export MIOPEN_FIND_MODE=NORMAL`` or ``export MIOPEN_FIND_MODE=1``. diff --git a/projects/miopen/driver/bn_driver.hpp b/projects/miopen/driver/bn_driver.hpp index a1b72cf4664..d00d23255cd 100644 --- a/projects/miopen/driver/bn_driver.hpp +++ b/projects/miopen/driver/bn_driver.hpp @@ -177,6 +177,8 @@ class BatchNormDriver : public Driver miopenTensorLayout_t bn_layout; GPUMem::Check buffer_check = GPUMem::Check::None; + + int tuning_policy; }; template @@ -191,6 +193,12 @@ int BatchNormDriver::ParseCmdLineArgs(int buffer_check = GetGpuBufferCheck(inflags); + tuning_policy = inflags.GetValueInt("tuning_policy"); + if(tuning_policy != 0) + { + miopenSetTuningPolicy(GetHandle(), static_cast(tuning_policy)); + } + return miopenStatusSuccess; } @@ -344,6 +352,11 @@ int BatchNormDriver::AddCmdLineArgs() inflags.AddInputFlag( "activ_beta", 'y', "1.0", "Activation function parameter beta (Default=1.0)", "float"); AddGpuBufferCheckFlag(inflags); + inflags.AddInputFlag("tuning_policy", + '&', + "0", + "MIOpen tuning policy (Default=0, or no tuning policy set)", + "int"); return miopenStatusSuccess; } diff --git a/projects/miopen/driver/conv_driver.hpp b/projects/miopen/driver/conv_driver.hpp index 23ecbf93f36..23523c6eb98 100644 --- a/projects/miopen/driver/conv_driver.hpp +++ b/projects/miopen/driver/conv_driver.hpp @@ -406,6 +406,7 @@ class ConvDriver : public Driver bool is_gpualloc = false; bool init_output_nan = false; GPUMem::Check buffer_check = GPUMem::Check::None; + int tuning_policy = 0; int num_iterations = 1; @@ -708,6 +709,12 @@ int ConvDriver::ParseCmdLineArgs(int argc, char* argv[]) buffer_check = GetGpuBufferCheck(inflags); + tuning_policy = inflags.GetValueInt("tuning_policy"); + if(tuning_policy != 0) + { + miopenSetTuningPolicy(GetHandle(), static_cast(tuning_policy)); + } + return 0; } @@ -1010,6 +1017,11 @@ int ConvDriver::AddCmdLineArgs() "wei_cast_type", 'R', "-1", "Cast type for weight tensor, default to not set", "string"); inflags.AddInputFlag( "init_output_nan", 'N', "0", "populate output buffers with nan values (Default=0)", "int"); + inflags.AddInputFlag("tuning_policy", + '&', + "0", + "MIOpen tuning policy (Default=0, or no tuning policy set)", + "int"); return 0; } diff --git a/projects/miopen/include/miopen/miopen.h b/projects/miopen/include/miopen/miopen.h index 676c1bb086b..b7b9214905e 100644 --- a/projects/miopen/include/miopen/miopen.h +++ b/projects/miopen/include/miopen/miopen.h @@ -8518,23 +8518,21 @@ MIOPEN_EXPORT miopenStatus_t miopenMultiMarginLossForward(miopenHandle_t handle, /*! @ingroup handle * @enum miopenTuningPolicy_t * Tuning policy for MIOpen Find-related calls. - * Supports only the following policies of MIOpenFindEnforce: + * Supports the following policies of MIOPEN_FIND_ENFORCE: * 1. None: Do not enforce anything. - * 2. DbUpdate: Tune and update the database. - * 3. Search: Search the database first; if no record is found, tune but do not update the database. + * 2. DbUpdate: Do not skip auto-tune even if PerfDb already contains optimized values. + * 3. Search: Search the database first; if no record is found, tune and update the database. * 4. SearchDbUpdate: Combination of Search and DbUpdate. * 5. DbClean: Remove existing entry, do not tune. - * Note: MIOpenFindEnforce has additional features that are not supported by TuningPolicy. * Note: TuningPolicy has higher priority over MIOPEN_FIND_ENFORCE. */ typedef enum { - miopenTuningPolicyNone = 1, /* do not enforce anything */ - miopenTuningPolicyDbUpdate = 2, /* tune and update the db */ - miopenTuningPolicySearch = - 3, /* search db first, if record not found tune but do not update the db */ - miopenTuningPolicySearchDbUpdate = 4, /* combination of Search and DbUpdate */ - miopenTuningPolicyDbClean = 5, /* remove existing entry, do not tune */ + miopenTuningPolicyNone = 1, /*!< do not enforce anything */ + miopenTuningPolicyDbUpdate = 2, /*!< tune and update the db */ + miopenTuningPolicySearch = 3, /*!< search db first, if record not found tune but do not update the db */ + miopenTuningPolicySearchDbUpdate = 4, /*!< combination of Search and DbUpdate */ + miopenTuningPolicyDbClean = 5, /*!< remove existing entry, do not tune */ } miopenTuningPolicy_t; /*! @ingroup handle diff --git a/projects/miopen/src/CMakeLists.txt b/projects/miopen/src/CMakeLists.txt index 9fb79147131..a438cd35985 100644 --- a/projects/miopen/src/CMakeLists.txt +++ b/projects/miopen/src/CMakeLists.txt @@ -80,6 +80,7 @@ set( MIOpen_Source adam_api.cpp addlayernorm_api.cpp api/find2_0_commons.cpp + api/tuning.cpp base64.cpp batch_norm.cpp batch_norm_api.cpp @@ -346,7 +347,6 @@ set( MIOpen_Source tensor_api.cpp transformers_adam_w_api.cpp seq_tensor.cpp - api/tuning.cpp ) if(MIOPEN_ENABLE_AI_KERNEL_TUNING OR MIOPEN_ENABLE_AI_IMMED_MODE_FALLBACK) diff --git a/projects/miopen/src/include/miopen/find_controls.hpp b/projects/miopen/src/include/miopen/find_controls.hpp index 64dc340a10c..98ac73a8f0f 100644 --- a/projects/miopen/src/include/miopen/find_controls.hpp +++ b/projects/miopen/src/include/miopen/find_controls.hpp @@ -85,9 +85,8 @@ class MIOPEN_INTERNALS_EXPORT FindEnforce template bool IsSearch(const Context& context) const { - return IsEnabled(context) && (action == FindEnforceAction::Search || - action == FindEnforceAction::SearchDbUpdate || - action == FindEnforceAction::DbUpdate); + return IsEnabled(context) && + (action == FindEnforceAction::Search || action == FindEnforceAction::SearchDbUpdate); } template diff --git a/projects/miopen/test/gtest/smoke_tuning_policy.cpp b/projects/miopen/test/gtest/smoke_tuning_policy.cpp index 3b2c7ace9cc..e4ee6d950f6 100644 --- a/projects/miopen/test/gtest/smoke_tuning_policy.cpp +++ b/projects/miopen/test/gtest/smoke_tuning_policy.cpp @@ -10,12 +10,29 @@ MIOPEN_LIB_ENV_VAR(MIOPEN_USER_DB_PATH) class CPU_TuningPolicy_NONE : public ::testing::Test { +protected: + void testSetInvalidValue(const miopenTuningPolicy_t original_policy, int invalid) + { + auto&& handle = get_handle(); + miopenTuningPolicy_t test_tuning_policy = miopenTuningPolicy_t::miopenTuningPolicyNone; + miopenTuningPolicy_t prev_tuning_policy; + + EXPECT_EQ(miopenSetTuningPolicy(&handle, original_policy), miopenStatusSuccess); + EXPECT_EQ(miopenGetTuningPolicy(&handle, &prev_tuning_policy), miopenStatusSuccess); + EXPECT_EQ(original_policy, prev_tuning_policy); + + EXPECT_EQ(miopenSetTuningPolicy(&handle, static_cast(invalid)), + miopenStatusBadParm); + EXPECT_EQ(miopenGetTuningPolicy(&handle, &test_tuning_policy), miopenStatusSuccess); + EXPECT_EQ(prev_tuning_policy, test_tuning_policy); + } }; -TEST_F(CPU_TuningPolicy_NONE, TestTuningPolicyGetterAndSetter) +TEST_F(CPU_TuningPolicy_NONE, TestTuningPolicyGetterAndSetterValidValues) { auto&& handle = get_handle(); miopenTuningPolicy_t test_tuning_policy; + EXPECT_EQ(miopenGetTuningPolicy(&handle, &test_tuning_policy), miopenStatusSuccess); EXPECT_EQ(test_tuning_policy, miopenTuningPolicy_t::miopenTuningPolicyNone); @@ -24,31 +41,35 @@ TEST_F(CPU_TuningPolicy_NONE, TestTuningPolicyGetterAndSetter) EXPECT_EQ(miopenGetTuningPolicy(&handle, &test_tuning_policy), miopenStatusSuccess); EXPECT_EQ(test_tuning_policy, miopenTuningPolicy_t::miopenTuningPolicyDbUpdate); + EXPECT_EQ(miopenSetTuningPolicy(&handle, miopenTuningPolicy_t::miopenTuningPolicySearch), + miopenStatusSuccess); + EXPECT_EQ(miopenGetTuningPolicy(&handle, &test_tuning_policy), miopenStatusSuccess); + EXPECT_EQ(test_tuning_policy, miopenTuningPolicy_t::miopenTuningPolicySearch); + EXPECT_EQ(miopenSetTuningPolicy(&handle, static_cast(4)), miopenStatusSuccess); EXPECT_EQ(miopenGetTuningPolicy(&handle, &test_tuning_policy), miopenStatusSuccess); EXPECT_EQ(test_tuning_policy, miopenTuningPolicy_t::miopenTuningPolicySearchDbUpdate); - EXPECT_EQ(miopenSetTuningPolicy(&handle, miopenTuningPolicy_t::miopenTuningPolicyNone), + EXPECT_EQ(miopenSetTuningPolicy(&handle, miopenTuningPolicy_t::miopenTuningPolicyDbClean), miopenStatusSuccess); EXPECT_EQ(miopenGetTuningPolicy(&handle, &test_tuning_policy), miopenStatusSuccess); - EXPECT_EQ(test_tuning_policy, miopenTuningPolicy_t::miopenTuningPolicyNone); - EXPECT_EQ(miopenSetTuningPolicy(&handle, static_cast(-1)), - miopenStatusBadParm); - - EXPECT_EQ(miopenSetTuningPolicy(&handle, static_cast(0)), - miopenStatusBadParm); - - EXPECT_EQ(miopenSetTuningPolicy(&handle, static_cast(6)), - miopenStatusBadParm); - - EXPECT_EQ(miopenSetTuningPolicy(&handle, static_cast(1000)), - miopenStatusBadParm); + EXPECT_EQ(test_tuning_policy, miopenTuningPolicy_t::miopenTuningPolicyDbClean); + EXPECT_EQ(miopenSetTuningPolicy(&handle, miopenTuningPolicy_t::miopenTuningPolicyNone), + miopenStatusSuccess); EXPECT_EQ(miopenGetTuningPolicy(&handle, &test_tuning_policy), miopenStatusSuccess); EXPECT_EQ(test_tuning_policy, miopenTuningPolicy_t::miopenTuningPolicyNone); } +TEST_F(CPU_TuningPolicy_NONE, TestTuningPolicyGetterAndSetterErrorHandling) +{ + testSetInvalidValue(miopenTuningPolicy_t::miopenTuningPolicyDbUpdate, -1); + testSetInvalidValue(miopenTuningPolicy_t::miopenTuningPolicySearch, 0); + testSetInvalidValue(miopenTuningPolicy_t::miopenTuningPolicySearchDbUpdate, 6); + testSetInvalidValue(miopenTuningPolicy_t::miopenTuningPolicyDbClean, 1000); +} + TEST_F(CPU_TuningPolicy_NONE, TestNullHandleForTuningPolicy) { miopenTuningPolicy_t test_tuning_policy;