Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

further hipBLASLt build speedup #145

Open
lamikr opened this issue Aug 22, 2024 · 0 comments
Open

further hipBLASLt build speedup #145

lamikr opened this issue Aug 22, 2024 · 0 comments

Comments

@lamikr
Copy link
Owner

lamikr commented Aug 22, 2024

Based on to the @jeroen-mostert experiment for deleting the yaml files from hipBLASLt to speedup it's build
for gpu's which doent need it, I came to idea that we could on createlibrary.py of it's tensilelite programmically filter
away all yaml files if some build flag is set.

I got it yesterday, almost working but not quite yet. For some reason the cmake-flag I setted up, did not get setup in the tensilelite's createlibrary.py so something is still missing. I anyway put the patches here so they stay safe or if somebody else has time to investigate and fix them.

  1. First the patch to binfo file to set up the new build flag if none of the GPU's supported by hipBLASLt was selected by the user.
diff --git a/binfo/core/025_02_hipBLASLt.binfo b/binfo/core/025_02_hipBLASLt.binfo
index 9185164..92a89ae 100755
--- a/binfo/core/025_02_hipBLASLt.binfo
+++ b/binfo/core/025_02_hipBLASLt.binfo
@@ -44,10 +44,12 @@ FILTERED_GPU_LIST=$(func_filter_gfx_array ${SEMICOLON_SEPARATED_GPU_TARGET_LIST_
 
 # build tests only if there are user selected GPUs on hipBLASLt build target list.
 GPU_TARGET_FOUND=ON
+CLEAR_LOGIC_FILE_LIST=OFF
 if [[ -z ${FILTERED_GPU_LIST} ]]; then
     # if support for none of the gpu's is build
     # then does not make sense to build benchmarks or other examples
     GPU_TARGET_FOUND=OFF
+    CLEAR_LOGIC_FILE_LIST=ON
     # we need to build the support for at least one gpu. Othetwise the build would fail.
     FILTERED_GPU_LIST="gfx90a"
 fi
@@ -70,6 +72,7 @@ BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DTensile_MERGE_FILES=ON"
 BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DTensile_LIBRARY_FORMAT=msgpack"
 BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DTensile_CPU_THREADS=${BUILD_CPU_COUNT_SAFE}"
 BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DTensile_TEST_LOCAL_PATH=${BINFO_APP_SRC_DIR}/tensilelite"
+BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DTensile_CLEAR_LOGIC_FILE_LIST=${CLEAR_LOGIC_FILE_LIST}"
 BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DTENSILE_VERSION=4.33.0"
 BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DRUN_HEADER_TESTING=OFF"
 BINFO_APP_CMAKE_CFG="${BINFO_APP_CMAKE_CFG} -DCMAKE_BUILD_TYPE=Release"

  1. Then the second hipBLASLt patch to receive the build flag and if it is set, only leave one yaml file to logicFiles-array.
    (I think the build will fail if there is only an empty list without any logic files)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b9d2fdb..63b13b36 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -181,6 +181,7 @@ else()
     set( Tensile_CODE_OBJECT_VERSION "default" CACHE STRING "Tensile code_object_version")
     set( Tensile_COMPILER "hipcc" CACHE STRING "Tensile compiler")
     set( Tensile_LIBRARY_FORMAT "msgpack" CACHE STRING "Tensile library format")
+    set( Tensile_CLEAR_LOGIC_FILE_LIST "OFF" CACHE STRING "Clear the list of logic files found")
     set( Tensile_CPU_THREADS "" CACHE STRING "Number of threads for Tensile parallel build")
 
     option( Tensile_MERGE_FILES "Tensile to merge kernels and solutions files?" ON )
@@ -193,6 +194,7 @@ else()
     set_property( CACHE Tensile_CODE_OBJECT_VERSION PROPERTY STRINGS default V4 V5 )
     set_property( CACHE Tensile_COMPILER PROPERTY STRINGS hcc hipcc)
     set_property( CACHE Tensile_LIBRARY_FORMAT PROPERTY STRINGS msgpack yaml)
+    set_property( CACHE Tensile_CLEAR_LOGIC_FILE_LIST PROPERTY STRINGS ON OFF)
 
     if(Tensile_LIBRARY_FORMAT MATCHES "yaml")
       option(TENSILE_USE_LLVM      "Use LLVM for parsing config files." ON)
diff --git a/tensilelite/Tensile/ClientExecutable.py b/tensilelite/Tensile/ClientExecutable.py
index a86ee521..5e3f01ab 100644
--- a/tensilelite/Tensile/ClientExecutable.py
+++ b/tensilelite/Tensile/ClientExecutable.py
@@ -64,6 +64,7 @@ def clientExecutableEnvironment(builddir=None):
                'TENSILE_USE_MSGPACK': 'ON',
                'TENSILE_USE_LLVM': 'ON',
                'Tensile_LIBRARY_FORMAT': globalParameters["LibraryFormat"],
+               'Tensile_CLEAR_LOGIC_FILE_LIST': globalParameters["ClearLogicFileList"],
                'CMAKE_CXX_COMPILER': os.path.join(globalParameters["ROCmBinPath"], globalParameters['CxxCompiler'])}
 
     return CMakeEnvironment(sourcedir, builddir, **options)
diff --git a/tensilelite/Tensile/ClientWriter.py b/tensilelite/Tensile/ClientWriter.py
index e8ef18b1..8a3991f6 100644
--- a/tensilelite/Tensile/ClientWriter.py
+++ b/tensilelite/Tensile/ClientWriter.py
@@ -231,6 +231,7 @@ def getBuildClientLibraryScript(buildPath, libraryLogicPath):
   callCreateLibraryCmd += " --code-object-version=" + globalParameters["CodeObjectVersion"]
   callCreateLibraryCmd += " --cxx-compiler=" + globalParameters["CxxCompiler"]
   callCreateLibraryCmd += " --library-format=" + globalParameters["LibraryFormat"]
+  callCreateLibraryCmd += " --clear-logic-file-list=" + globalParameters["ClearLogicFileList"]
 
   callCreateLibraryCmd += " %s" % libraryLogicPath
   callCreateLibraryCmd += " %s" % buildPath #" ../source"
diff --git a/tensilelite/Tensile/Source/TensileCreateLibrary.cmake b/tensilelite/Tensile/Source/TensileCreateLibrary.cmake
index 265b0ac1..98ad815d 100644
--- a/tensilelite/Tensile/Source/TensileCreateLibrary.cmake
+++ b/tensilelite/Tensile/Source/TensileCreateLibrary.cmake
@@ -39,7 +39,8 @@ function(TensileCreateLibraryCmake
     Tensile_LIBRARY_PRINT_DEBUG
     Tensile_CPU_THREADS
     Tensile_SEPARATE_ARCHITECTURES
-    Tensile_LAZY_LIBRARY_LOADING)
+    Tensile_LAZY_LIBRARY_LOADING
+    Tensile_CLEAR_LOGIC_FILE_LIST)
 
 # make Tensile_PACKAGE_LIBRARY and optional parameter
 # to avoid breaking applications which us this
@@ -57,6 +58,7 @@ function(TensileCreateLibraryCmake
   message(STATUS "Tensile_COMPILER            from TensileCreateLibraryCmake : ${Tensile_COMPILER}")
   message(STATUS "Tensile_ARCHITECTURE        from TensileCreateLibraryCmake : ${Tensile_ARCHITECTURE}")
   message(STATUS "Tensile_LIBRARY_FORMAT      from TensileCreateLibraryCmake : ${Tensile_LIBRARY_FORMAT}")
+  message(STATUS "Tensile_CLEAR_LOGIC_FILE_LIST                              : ${Tensile_CLEAR_LOGIC_FILE_LIST}")
   message(STATUS "Tensile_CPU_THREADS         from TensileCreateLibraryCmake : ${Tensile_CPU_THREADS}")
 
   #execute_process(COMMAND chmod 755 ${Tensile_ROOT}/bin/TensileCreateLibrary)
@@ -104,6 +106,7 @@ function(TensileCreateLibraryCmake
   set(Tensile_CREATE_COMMAND ${Tensile_CREATE_COMMAND} "--code-object-version=${Tensile_CODE_OBJECT_VERSION}")
   set(Tensile_CREATE_COMMAND ${Tensile_CREATE_COMMAND} "--cxx-compiler=${Tensile_COMPILER}")
   set(Tensile_CREATE_COMMAND ${Tensile_CREATE_COMMAND} "--library-format=${Tensile_LIBRARY_FORMAT}")
+  set(Tensile_CREATE_COMMAND ${Tensile_CREATE_COMMAND} "--clear-logic-file-list=${Tensile_CLEAR_LOGIC_FILE_LIST}")
   set(Tensile_CREATE_COMMAND ${Tensile_CREATE_COMMAND} "--jobs=${Tensile_CPU_THREADS}")
 
   # TensileLibraryWriter positional arguments
diff --git a/tensilelite/Tensile/TensileCreateLibrary.py b/tensilelite/Tensile/TensileCreateLibrary.py
index d497f05b..db2777b8 100644
--- a/tensilelite/Tensile/TensileCreateLibrary.py
+++ b/tensilelite/Tensile/TensileCreateLibrary.py
@@ -1261,6 +1261,8 @@ def TensileCreateLibrary():
                           default=False, help="Output manifest file with list of expected library objects and exit.")
   argParser.add_argument("--library-format", dest="LibraryFormat", choices=["yaml", "msgpack"],
                          action="store", default="msgpack", help="select which library format to use")
+  argParser.add_argument("--clear-logic-file-list", dest="ClearLogicFileList", choices=["ON", "OFF"],
+                         action="store", default="OFF", help="select whether to clear the found list of logic files")
   argParser.add_argument("--generate-sources-and-exit",   dest="GenerateSourcesAndExit", action="store_true",
                           default=False, help="Output source files only and exit.")
   argParser.add_argument("--jobs", "-j", dest="CpuThreads", type=int,
@@ -1308,6 +1310,7 @@ def TensileCreateLibrary():
   arguments["CodeFromFiles"] = False
   arguments["EmbedLibrary"] = args.EmbedLibrary
   arguments["LibraryFormat"] = args.LibraryFormat
+  arguments["ClearLogicFileList"] = args.ClearLogicFileList
   if args.no_enumerate:
     arguments["ROCmAgentEnumeratorPath"] = False
   arguments["PackageLibrary"] = args.PackageLibrary
@@ -1356,7 +1359,8 @@ def TensileCreateLibrary():
                        if os.path.splitext(f)[1]==".yaml" \
                        and (any(logicArch in os.path.splitext(f)[0] for logicArch in logicArchs) \
                        or "hip" in os.path.splitext(f)[0]) ]
-
+  if globalParameters["ClearLogicFileList"] == "ON":
+    logicFiles = [ "/home/lamikr/own/rocm/src/sdk/ubuntu2404/rocm_sdk_builder_612/src_projects/hipBLASLt/library/src/amd_detail/rocblaslt/src/Tensile/Logic/asm_full/aldebaran/110CU/Equality/aldebaran_Cijk_Ailk_Bljk_HSS_BH_Bias_AH_SAV.yaml" ]
   print1("# LibraryLogicFiles:" % logicFiles)
   for logicFile in logicFiles:
     print1("#   %s" % logicFile)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant