From ac72da7c7f8ef893d64c6ed9c8987509a3a347ca Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 12 Feb 2026 18:11:37 -0600 Subject: [PATCH 1/9] Add mxdatagenerator --- projects/hipblaslt/tensilelite/CMakeLists.txt | 1 + .../tensilelite/client/CMakeLists.txt | 29 +++++-- .../client/include/DataInitialization.hpp | 9 ++ .../tensilelite/client/src/CMakeLists.txt | 11 +++ .../client/src/DataInitialization.cpp | 85 +++++++++++++++++++ 5 files changed, 129 insertions(+), 6 deletions(-) diff --git a/projects/hipblaslt/tensilelite/CMakeLists.txt b/projects/hipblaslt/tensilelite/CMakeLists.txt index fb1dc0586d3..9027e9a7182 100644 --- a/projects/hipblaslt/tensilelite/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/CMakeLists.txt @@ -56,6 +56,7 @@ if(TENSILELITE_ENABLE_HOST) add_subdirectory(include) if(TENSILELITE_ENABLE_CLIENT) + option(TENSILELITE_ENABLE_ROCROLLER "Enable MX data generator for FP4 testing" ON) add_subdirectory(client) endif() diff --git a/projects/hipblaslt/tensilelite/client/CMakeLists.txt b/projects/hipblaslt/tensilelite/client/CMakeLists.txt index 2ec1552ab03..78f271151ca 100644 --- a/projects/hipblaslt/tensilelite/client/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/client/CMakeLists.txt @@ -13,6 +13,21 @@ target_link_libraries(tensilelite-client Boost::filesystem OpenMP::OpenMP_CXX ) + +if(TENSILELITE_ENABLE_ROCROLLER) + if(NOT ROCM_LIBS_SUPERBUILD) + if(HIPBLASLT_ENABLE_THEROCK) + find_package(mxDataGenerator REQUIRED) + else() + add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../shared/mxdatagenerator" + "${CMAKE_CURRENT_BINARY_DIR}/mxdatagenerator") + endif() + endif() + target_compile_definitions(tensilelite-client PRIVATE HIPBLASLT_USE_ROCROLLER) + target_link_libraries(tensilelite-client PRIVATE roc::mxDataGenerator) + target_compile_features(tensilelite-client PRIVATE cxx_std_20) +endif() + if(NOT WIN32) find_package(rocm_smi REQUIRED) else() @@ -24,12 +39,14 @@ endif() target_include_directories(tensilelite-client PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include") target_compile_definitions(tensilelite-client PRIVATE TENSILE_DEFAULT_SERIALIZATION) -set_target_properties(tensilelite-client - PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CXX_EXTENSIONS OFF -) +if(NOT TENSILELITE_ENABLE_ROCROLLER) + set_target_properties(tensilelite-client + PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + ) +endif() if(HIPBLASLT_ENABLE_ASAN) hipblaslt_target_configure_sanitizers(tensilelite-client PRIVATE) diff --git a/projects/hipblaslt/tensilelite/client/include/DataInitialization.hpp b/projects/hipblaslt/tensilelite/client/include/DataInitialization.hpp index c5509e21fb2..5c6aab5a98c 100644 --- a/projects/hipblaslt/tensilelite/client/include/DataInitialization.hpp +++ b/projects/hipblaslt/tensilelite/client/include/DataInitialization.hpp @@ -39,6 +39,10 @@ #include "RunListener.hpp" +#ifdef HIPBLASLT_USE_ROCROLLER +#include +#endif + namespace po = boost::program_options; namespace TensileLite @@ -944,6 +948,11 @@ namespace TensileLite void initializeConstantInputs(ContractionProblemGemm const& problem); +#ifdef HIPBLASLT_USE_ROCROLLER + // Initialize FP4 matrices with proper MX data distribution + void initializeMXDataForFP4(ContractionProblemGemm const& problem); +#endif + void copyInputs(std::vector& ptrs, std::vector& batchPtrs, std::vector& maxElements, diff --git a/projects/hipblaslt/tensilelite/client/src/CMakeLists.txt b/projects/hipblaslt/tensilelite/client/src/CMakeLists.txt index b150b86ffa5..775b6df5cc6 100644 --- a/projects/hipblaslt/tensilelite/client/src/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/client/src/CMakeLists.txt @@ -22,3 +22,14 @@ target_sources(tensilelite-client "${CMAKE_CURRENT_SOURCE_DIR}/SolutionIterator.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/LibraryUpdateReporter.cpp" ) + +if(TENSILELITE_ENABLE_ROCROLLER) + target_sources(tensilelite-client + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/src/mxDataGen.cpp" + ) + target_include_directories(tensilelite-client + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/include" + ) +endif() diff --git a/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp b/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp index 01ea4aa9cfd..d82c9c1d837 100644 --- a/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp +++ b/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp @@ -1692,6 +1692,16 @@ namespace TensileLite void DataInitialization::initializeCPUInputs(ContractionProblemGemm const& problem) { +#ifdef HIPBLASLT_USE_ROCROLLER + // Use MX data generator for FP4 matrices with MX format + if((problem.a().dataType() == rocisa::DataType::Float4 && problem.mxBlockA() > 0) + || (problem.b().dataType() == rocisa::DataType::Float4 && problem.mxBlockB() > 0)) + { + initializeMXDataForFP4(problem); + return; // Skip standard initialization for MX FP4 matrices + } +#endif + auto& tensors = problem.tensors(); for(size_t i = 0; i < m_vdata.size(); i++) { @@ -1756,6 +1766,81 @@ namespace TensileLite } } +#ifdef HIPBLASLT_USE_ROCROLLER + void DataInitialization::initializeMXDataForFP4(ContractionProblemGemm const& problem) + { + // Empty vectors for preSwizzle and preTile (not used in TensilteLite) + std::vector emptySwizzle; + std::vector emptyTile; + + // Generate MX data for matrix A if it uses FP4 with MX blocks + if(problem.mxBlockA() > 0 && problem.a().dataType() == rocisa::DataType::Float4) + { + // Get matrix A dimensions from tensor descriptor + auto const& tensorA = problem.a(); + auto rows = tensorA.sizes()[0]; // Number of rows + auto cols = tensorA.sizes()[1]; // Number of columns + auto stride = tensorA.strides()[1]; // Leading dimension + + // Get the pristine data pointer for matrix A + auto& pristineA + = m_vdata[ContractionProblemGemm::TENSOR::A].pristine[rocisa::DataType::Float4]; + auto& pristineMXScaleA + = m_vdata[ContractionProblemGemm::TENSOR::MXSA].pristine[rocisa::DataType::UInt8]; + + // Call mxDataGenerator directly + auto refA = generateMXInput(HIP_R_4F_E2M1_EXT, + pristineA.cpuInput.valid.get(), + pristineMXScaleA.cpuInput.valid.get(), + rows, + cols, + stride, + problem.transA(), + emptySwizzle, + emptyTile, + problem.mxBlockA(), // scaleBlockRowSize + 1, // scaleBlockColSize + true, // isMatrixA + "Bounded", + -1.0f, + 1.0f); + } + + // Generate MX data for matrix B if it uses FP4 with MX blocks + if(problem.mxBlockB() > 0 && problem.b().dataType() == rocisa::DataType::Float4) + { + // Get matrix B dimensions from tensor descriptor + auto const& tensorB = problem.b(); + auto rows = tensorB.sizes()[0]; // Number of rows + auto cols = tensorB.sizes()[1]; // Number of columns + auto stride = tensorB.strides()[1]; // Leading dimension + + // Get the pristine data pointer for matrix B + auto& pristineB + = m_vdata[ContractionProblemGemm::TENSOR::B].pristine[rocisa::DataType::Float4]; + auto& pristineMXScaleB + = m_vdata[ContractionProblemGemm::TENSOR::MXSB].pristine[rocisa::DataType::UInt8]; + + // Call mxDataGenerator directly + auto refB = generateMXInput(HIP_R_4F_E2M1_EXT, + pristineB.cpuInput.valid.get(), + pristineMXScaleB.cpuInput.valid.get(), + rows, + cols, + stride, + problem.transB(), + emptySwizzle, + emptyTile, + problem.mxBlockB(), + 1, + false, // isMatrixA = false for matrix B + "Bounded", + -1.0f, + 1.0f); + } + } +#endif + void DataInitialization::initializeConstantInputs(ContractionProblemGemm const& problem) { // Update constants if needed From afd351a8d97511577d2ac979af4d2838f6ddfd09 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 12 Feb 2026 18:17:35 -0600 Subject: [PATCH 2/9] Fix mxDataGenerator subdirectory conflict --- projects/hipblaslt/tensilelite/client/CMakeLists.txt | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/projects/hipblaslt/tensilelite/client/CMakeLists.txt b/projects/hipblaslt/tensilelite/client/CMakeLists.txt index 78f271151ca..8dda178ba0a 100644 --- a/projects/hipblaslt/tensilelite/client/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/client/CMakeLists.txt @@ -15,14 +15,8 @@ target_link_libraries(tensilelite-client ) if(TENSILELITE_ENABLE_ROCROLLER) - if(NOT ROCM_LIBS_SUPERBUILD) - if(HIPBLASLT_ENABLE_THEROCK) - find_package(mxDataGenerator REQUIRED) - else() - add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../shared/mxdatagenerator" - "${CMAKE_CURRENT_BINARY_DIR}/mxdatagenerator") - endif() - endif() + # Note: mxDataGenerator is added by clients/CMakeLists.txt, so we just link to it here + # If building standalone without clients, user must manually add mx DataGenerator target_compile_definitions(tensilelite-client PRIVATE HIPBLASLT_USE_ROCROLLER) target_link_libraries(tensilelite-client PRIVATE roc::mxDataGenerator) target_compile_features(tensilelite-client PRIVATE cxx_std_20) From d6e9e1686a50b28d82c0e488a42191a7e8b3ed8c Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Fri, 13 Feb 2026 03:37:39 -0600 Subject: [PATCH 3/9] Numerically passing mxfp4 --- projects/hipblaslt/tensilelite/CMakeLists.txt | 13 +++ .../client/src/DataInitialization.cpp | 91 +++++++++++++++++-- 2 files changed, 97 insertions(+), 7 deletions(-) diff --git a/projects/hipblaslt/tensilelite/CMakeLists.txt b/projects/hipblaslt/tensilelite/CMakeLists.txt index 9027e9a7182..12f27073451 100644 --- a/projects/hipblaslt/tensilelite/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/CMakeLists.txt @@ -57,6 +57,19 @@ if(TENSILELITE_ENABLE_HOST) if(TENSILELITE_ENABLE_CLIENT) option(TENSILELITE_ENABLE_ROCROLLER "Enable MX data generator for FP4 testing" ON) + + # Add mxDataGenerator if ROCROLLER is enabled and it hasn't been added by clients build + if(TENSILELITE_ENABLE_ROCROLLER AND NOT TARGET roc::mxDataGenerator) + if(NOT ROCM_LIBS_SUPERBUILD) + if(HIPBLASLT_ENABLE_THEROCK) + find_package(mxDataGenerator REQUIRED) + else() + add_subdirectory("${CMAKE_CURRENT_SOURCE_DIR}/../../../shared/mxdatagenerator" + "${CMAKE_CURRENT_BINARY_DIR}/mxdatagenerator") + endif() + endif() + endif() + add_subdirectory(client) endif() diff --git a/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp b/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp index d82c9c1d837..9a82a55589c 100644 --- a/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp +++ b/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp @@ -1213,6 +1213,8 @@ namespace TensileLite std::cout << "Tensor name " << m_vdata[i].name << " init mode " << ToString(m_vdata[i].init) << std::endl; } + fprintf(stderr, "[DEBUG-CONSTRUCTOR] Finished printing tensor init modes\n"); + fflush(stderr); // Init contants for(size_t i = 0; i < m_cdata.size(); i++) @@ -1269,6 +1271,27 @@ namespace TensileLite m_problemDependentData |= (m_sparse | (args["bias-type-args"].as>().size() > 1)); + +#ifdef HIPBLASLT_USE_ROCROLLER + // Force problem-dependent initialization for MX FP4 to enable mxDataGenerator + if(args.count("mx-block-a") && args["mx-block-a"].as() > 0) + { + fprintf(stderr, "[DEBUG-CONSTRUCTOR] Detected mx-block-a=%d, forcing problem-dependent init\n", + args["mx-block-a"].as()); + fflush(stderr); + m_problemDependentData = true; + } + if(args.count("mx-block-b") && args["mx-block-b"].as() > 0) + { + fprintf(stderr, "[DEBUG-CONSTRUCTOR] Detected mx-block-b=%d, forcing problem-dependent init\n", + args["mx-block-b"].as()); + fflush(stderr); + m_problemDependentData = true; + } +#endif + + fprintf(stderr, "[DEBUG-CONSTRUCTOR] m_problemDependentData = %d (after MX check)\n", m_problemDependentData); + fflush(stderr); allocNewCPUInputs(); allocNewGPUInputs(); @@ -1692,13 +1715,29 @@ namespace TensileLite void DataInitialization::initializeCPUInputs(ContractionProblemGemm const& problem) { + fprintf(stderr, "[DEBUG] *** initializeCPUInputs() CALLED ***\n"); + fflush(stderr); #ifdef HIPBLASLT_USE_ROCROLLER + fprintf(stderr, "[DEBUG] HIPBLASLT_USE_ROCROLLER is defined\n"); + fflush(stderr); + std::cout << "[DEBUG] Checking MX FP4 conditions: A.dataType=" + << (int)problem.a().dataType() << " (Float4=" << (int)rocisa::DataType::Float4 + << "), mxBlockA=" << problem.mxBlockA() + << ", B.dataType=" << (int)problem.b().dataType() + << ", mxBlockB=" << problem.mxBlockB() << std::endl; + // Use MX data generator for FP4 matrices with MX format - if((problem.a().dataType() == rocisa::DataType::Float4 && problem.mxBlockA() > 0) - || (problem.b().dataType() == rocisa::DataType::Float4 && problem.mxBlockB() > 0)) + bool useMXGenerator = (problem.a().dataType() == rocisa::DataType::Float4 && problem.mxBlockA() > 0) + || (problem.b().dataType() == rocisa::DataType::Float4 && problem.mxBlockB() > 0); + if(useMXGenerator) { + std::cout << "[DEBUG] *** MX FP4 condition TRUE - calling initializeMXDataForFP4() ***" << std::endl; initializeMXDataForFP4(problem); - return; // Skip standard initialization for MX FP4 matrices + std::cout << "[DEBUG] *** MX generator completed - will skip A/B init but continue with other tensors ***" << std::endl; + } + else + { + std::cout << "[DEBUG] MX FP4 condition FALSE - using standard initialization" << std::endl; } #endif @@ -1709,6 +1748,15 @@ namespace TensileLite or i == ContractionProblemGemm::TENSOR::METADATA) continue; +#ifdef HIPBLASLT_USE_ROCROLLER + // Skip A and B if MX generator was used + if(useMXGenerator && (i == ContractionProblemGemm::TENSOR::A || i == ContractionProblemGemm::TENSOR::B)) + { + std::cout << "[DEBUG] Skipping standard init for tensor " << i << " (using MX generator data)" << std::endl; + continue; + } +#endif + if(m_problemDependentData) { // Should this m_cEqualsD set in ContractionProblem or boost args? @@ -1769,6 +1817,8 @@ namespace TensileLite #ifdef HIPBLASLT_USE_ROCROLLER void DataInitialization::initializeMXDataForFP4(ContractionProblemGemm const& problem) { + std::cout << "[DEBUG] === INSIDE initializeMXDataForFP4() ===" << std::endl; + // Empty vectors for preSwizzle and preTile (not used in TensilteLite) std::vector emptySwizzle; std::vector emptyTile; @@ -1776,20 +1826,35 @@ namespace TensileLite // Generate MX data for matrix A if it uses FP4 with MX blocks if(problem.mxBlockA() > 0 && problem.a().dataType() == rocisa::DataType::Float4) { + std::cout << "[DEBUG] Generating MX data for matrix A (mxBlockA=" << problem.mxBlockA() << ")" << std::endl; + // Get matrix A dimensions from tensor descriptor auto const& tensorA = problem.a(); auto rows = tensorA.sizes()[0]; // Number of rows auto cols = tensorA.sizes()[1]; // Number of columns auto stride = tensorA.strides()[1]; // Leading dimension + std::cout << "[DEBUG] Matrix A dimensions: rows=" << rows << ", cols=" << cols << ", stride=" << stride << std::endl; + // Get the pristine data pointer for matrix A auto& pristineA = m_vdata[ContractionProblemGemm::TENSOR::A].pristine[rocisa::DataType::Float4]; auto& pristineMXScaleA - = m_vdata[ContractionProblemGemm::TENSOR::MXSA].pristine[rocisa::DataType::UInt8]; + = m_vdata[ContractionProblemGemm::TENSOR::MXSA].pristine[problem.mxsa().dataType()]; + + std::cout << "[DEBUG] Buffer sizes - A: maxElements=" << pristineA.maxElements + << ", A.cpuInput.valid=" << (void*)pristineA.cpuInput.valid.get() + << ", MXScaleA: maxElements=" << pristineMXScaleA.maxElements + << ", MXScaleA.cpuInput.valid=" << (void*)pristineMXScaleA.cpuInput.valid.get() + << std::endl; + std::cout << "[DEBUG] Expected tensor size: rows*cols=" << (rows*cols) + << ", FP4 packed size=" << ((rows*cols + 1)/2) << std::endl; + std::cout << "[DEBUG] TransA=" << problem.transA() << std::endl; + + std::cout << "[DEBUG] Calling generateMXInput() for matrix A..." << std::endl; // Call mxDataGenerator directly - auto refA = generateMXInput(HIP_R_4F_E2M1_EXT, + auto refA = generateMXInput((hipDataType)HIP_R_4F_E2M1_EXT, pristineA.cpuInput.valid.get(), pristineMXScaleA.cpuInput.valid.get(), rows, @@ -1804,25 +1869,33 @@ namespace TensileLite "Bounded", -1.0f, 1.0f); + + std::cout << "[DEBUG] generateMXInput() for matrix A completed, refA.size()=" << refA.size() << std::endl; } // Generate MX data for matrix B if it uses FP4 with MX blocks if(problem.mxBlockB() > 0 && problem.b().dataType() == rocisa::DataType::Float4) { + std::cout << "[DEBUG] Generating MX data for matrix B (mxBlockB=" << problem.mxBlockB() << ")" << std::endl; + // Get matrix B dimensions from tensor descriptor auto const& tensorB = problem.b(); auto rows = tensorB.sizes()[0]; // Number of rows auto cols = tensorB.sizes()[1]; // Number of columns auto stride = tensorB.strides()[1]; // Leading dimension + std::cout << "[DEBUG] Matrix B dimensions: rows=" << rows << ", cols=" << cols << ", stride=" << stride << std::endl; + // Get the pristine data pointer for matrix B auto& pristineB = m_vdata[ContractionProblemGemm::TENSOR::B].pristine[rocisa::DataType::Float4]; auto& pristineMXScaleB - = m_vdata[ContractionProblemGemm::TENSOR::MXSB].pristine[rocisa::DataType::UInt8]; + = m_vdata[ContractionProblemGemm::TENSOR::MXSB].pristine[problem.mxsb().dataType()]; + + std::cout << "[DEBUG] Calling generateMXInput() for matrix B..." << std::endl; // Call mxDataGenerator directly - auto refB = generateMXInput(HIP_R_4F_E2M1_EXT, + auto refB = generateMXInput((hipDataType)HIP_R_4F_E2M1_EXT, pristineB.cpuInput.valid.get(), pristineMXScaleB.cpuInput.valid.get(), rows, @@ -1837,7 +1910,11 @@ namespace TensileLite "Bounded", -1.0f, 1.0f); + + std::cout << "[DEBUG] generateMXInput() for matrix B completed, refB.size()=" << refB.size() << std::endl; } + + std::cout << "[DEBUG] === Exiting initializeMXDataForFP4() ===" << std::endl; } #endif From e53da42896787953afa704d081d8228015c07065 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 19 Feb 2026 02:37:33 -0600 Subject: [PATCH 4/9] Remove reliance on HIPBLASLT_USE_ROCROLLER --- projects/hipblaslt/tensilelite/CMakeLists.txt | 6 +- .../tensilelite/client/CMakeLists.txt | 18 +- .../client/include/DataInitialization.hpp | 5 - .../tensilelite/client/src/CMakeLists.txt | 18 +- .../client/src/DataInitialization.cpp | 156 ++++-------------- .../tensilelite/tests/CMakeLists.txt | 14 ++ 6 files changed, 62 insertions(+), 155 deletions(-) diff --git a/projects/hipblaslt/tensilelite/CMakeLists.txt b/projects/hipblaslt/tensilelite/CMakeLists.txt index 12f27073451..58c11ae6320 100644 --- a/projects/hipblaslt/tensilelite/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/CMakeLists.txt @@ -56,10 +56,8 @@ if(TENSILELITE_ENABLE_HOST) add_subdirectory(include) if(TENSILELITE_ENABLE_CLIENT) - option(TENSILELITE_ENABLE_ROCROLLER "Enable MX data generator for FP4 testing" ON) - - # Add mxDataGenerator if ROCROLLER is enabled and it hasn't been added by clients build - if(TENSILELITE_ENABLE_ROCROLLER AND NOT TARGET roc::mxDataGenerator) + # Add mxDataGenerator if it hasn't been added by clients build + if(NOT TARGET roc::mxDataGenerator) if(NOT ROCM_LIBS_SUPERBUILD) if(HIPBLASLT_ENABLE_THEROCK) find_package(mxDataGenerator REQUIRED) diff --git a/projects/hipblaslt/tensilelite/client/CMakeLists.txt b/projects/hipblaslt/tensilelite/client/CMakeLists.txt index 8dda178ba0a..2e8f738c456 100644 --- a/projects/hipblaslt/tensilelite/client/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/client/CMakeLists.txt @@ -14,13 +14,8 @@ target_link_libraries(tensilelite-client OpenMP::OpenMP_CXX ) -if(TENSILELITE_ENABLE_ROCROLLER) - # Note: mxDataGenerator is added by clients/CMakeLists.txt, so we just link to it here - # If building standalone without clients, user must manually add mx DataGenerator - target_compile_definitions(tensilelite-client PRIVATE HIPBLASLT_USE_ROCROLLER) - target_link_libraries(tensilelite-client PRIVATE roc::mxDataGenerator) - target_compile_features(tensilelite-client PRIVATE cxx_std_20) -endif() +target_link_libraries(tensilelite-client PRIVATE roc::mxDataGenerator) +target_compile_features(tensilelite-client PRIVATE cxx_std_20) if(NOT WIN32) find_package(rocm_smi REQUIRED) @@ -33,15 +28,6 @@ endif() target_include_directories(tensilelite-client PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include") target_compile_definitions(tensilelite-client PRIVATE TENSILE_DEFAULT_SERIALIZATION) -if(NOT TENSILELITE_ENABLE_ROCROLLER) - set_target_properties(tensilelite-client - PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - CXX_EXTENSIONS OFF - ) -endif() - if(HIPBLASLT_ENABLE_ASAN) hipblaslt_target_configure_sanitizers(tensilelite-client PRIVATE) endif() diff --git a/projects/hipblaslt/tensilelite/client/include/DataInitialization.hpp b/projects/hipblaslt/tensilelite/client/include/DataInitialization.hpp index 5c6aab5a98c..70e054a716c 100644 --- a/projects/hipblaslt/tensilelite/client/include/DataInitialization.hpp +++ b/projects/hipblaslt/tensilelite/client/include/DataInitialization.hpp @@ -39,9 +39,7 @@ #include "RunListener.hpp" -#ifdef HIPBLASLT_USE_ROCROLLER #include -#endif namespace po = boost::program_options; @@ -948,10 +946,7 @@ namespace TensileLite void initializeConstantInputs(ContractionProblemGemm const& problem); -#ifdef HIPBLASLT_USE_ROCROLLER - // Initialize FP4 matrices with proper MX data distribution void initializeMXDataForFP4(ContractionProblemGemm const& problem); -#endif void copyInputs(std::vector& ptrs, std::vector& batchPtrs, diff --git a/projects/hipblaslt/tensilelite/client/src/CMakeLists.txt b/projects/hipblaslt/tensilelite/client/src/CMakeLists.txt index 775b6df5cc6..55bd28264bb 100644 --- a/projects/hipblaslt/tensilelite/client/src/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/client/src/CMakeLists.txt @@ -23,13 +23,11 @@ target_sources(tensilelite-client "${CMAKE_CURRENT_SOURCE_DIR}/LibraryUpdateReporter.cpp" ) -if(TENSILELITE_ENABLE_ROCROLLER) - target_sources(tensilelite-client - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/src/mxDataGen.cpp" - ) - target_include_directories(tensilelite-client - PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/include" - ) -endif() +target_sources(tensilelite-client + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/src/mxDataGen.cpp" +) +target_include_directories(tensilelite-client + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/include" +) diff --git a/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp b/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp index 9a82a55589c..804f626476d 100644 --- a/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp +++ b/projects/hipblaslt/tensilelite/client/src/DataInitialization.cpp @@ -1213,9 +1213,6 @@ namespace TensileLite std::cout << "Tensor name " << m_vdata[i].name << " init mode " << ToString(m_vdata[i].init) << std::endl; } - fprintf(stderr, "[DEBUG-CONSTRUCTOR] Finished printing tensor init modes\n"); - fflush(stderr); - // Init contants for(size_t i = 0; i < m_cdata.size(); i++) { @@ -1272,26 +1269,12 @@ namespace TensileLite |= (m_sparse | (args["bias-type-args"].as>().size() > 1)); -#ifdef HIPBLASLT_USE_ROCROLLER // Force problem-dependent initialization for MX FP4 to enable mxDataGenerator if(args.count("mx-block-a") && args["mx-block-a"].as() > 0) - { - fprintf(stderr, "[DEBUG-CONSTRUCTOR] Detected mx-block-a=%d, forcing problem-dependent init\n", - args["mx-block-a"].as()); - fflush(stderr); m_problemDependentData = true; - } if(args.count("mx-block-b") && args["mx-block-b"].as() > 0) - { - fprintf(stderr, "[DEBUG-CONSTRUCTOR] Detected mx-block-b=%d, forcing problem-dependent init\n", - args["mx-block-b"].as()); - fflush(stderr); m_problemDependentData = true; - } -#endif - fprintf(stderr, "[DEBUG-CONSTRUCTOR] m_problemDependentData = %d (after MX check)\n", m_problemDependentData); - fflush(stderr); allocNewCPUInputs(); allocNewGPUInputs(); @@ -1715,31 +1698,10 @@ namespace TensileLite void DataInitialization::initializeCPUInputs(ContractionProblemGemm const& problem) { - fprintf(stderr, "[DEBUG] *** initializeCPUInputs() CALLED ***\n"); - fflush(stderr); -#ifdef HIPBLASLT_USE_ROCROLLER - fprintf(stderr, "[DEBUG] HIPBLASLT_USE_ROCROLLER is defined\n"); - fflush(stderr); - std::cout << "[DEBUG] Checking MX FP4 conditions: A.dataType=" - << (int)problem.a().dataType() << " (Float4=" << (int)rocisa::DataType::Float4 - << "), mxBlockA=" << problem.mxBlockA() - << ", B.dataType=" << (int)problem.b().dataType() - << ", mxBlockB=" << problem.mxBlockB() << std::endl; - - // Use MX data generator for FP4 matrices with MX format bool useMXGenerator = (problem.a().dataType() == rocisa::DataType::Float4 && problem.mxBlockA() > 0) || (problem.b().dataType() == rocisa::DataType::Float4 && problem.mxBlockB() > 0); if(useMXGenerator) - { - std::cout << "[DEBUG] *** MX FP4 condition TRUE - calling initializeMXDataForFP4() ***" << std::endl; initializeMXDataForFP4(problem); - std::cout << "[DEBUG] *** MX generator completed - will skip A/B init but continue with other tensors ***" << std::endl; - } - else - { - std::cout << "[DEBUG] MX FP4 condition FALSE - using standard initialization" << std::endl; - } -#endif auto& tensors = problem.tensors(); for(size_t i = 0; i < m_vdata.size(); i++) @@ -1748,14 +1710,8 @@ namespace TensileLite or i == ContractionProblemGemm::TENSOR::METADATA) continue; -#ifdef HIPBLASLT_USE_ROCROLLER - // Skip A and B if MX generator was used if(useMXGenerator && (i == ContractionProblemGemm::TENSOR::A || i == ContractionProblemGemm::TENSOR::B)) - { - std::cout << "[DEBUG] Skipping standard init for tensor " << i << " (using MX generator data)" << std::endl; continue; - } -#endif if(m_problemDependentData) { @@ -1814,109 +1770,69 @@ namespace TensileLite } } -#ifdef HIPBLASLT_USE_ROCROLLER void DataInitialization::initializeMXDataForFP4(ContractionProblemGemm const& problem) { - std::cout << "[DEBUG] === INSIDE initializeMXDataForFP4() ===" << std::endl; - - // Empty vectors for preSwizzle and preTile (not used in TensilteLite) std::vector emptySwizzle; std::vector emptyTile; - // Generate MX data for matrix A if it uses FP4 with MX blocks if(problem.mxBlockA() > 0 && problem.a().dataType() == rocisa::DataType::Float4) { - std::cout << "[DEBUG] Generating MX data for matrix A (mxBlockA=" << problem.mxBlockA() << ")" << std::endl; - - // Get matrix A dimensions from tensor descriptor auto const& tensorA = problem.a(); - auto rows = tensorA.sizes()[0]; // Number of rows - auto cols = tensorA.sizes()[1]; // Number of columns - auto stride = tensorA.strides()[1]; // Leading dimension + auto rows = tensorA.sizes()[0]; + auto cols = tensorA.sizes()[1]; + auto stride = tensorA.strides()[1]; - std::cout << "[DEBUG] Matrix A dimensions: rows=" << rows << ", cols=" << cols << ", stride=" << stride << std::endl; - - // Get the pristine data pointer for matrix A auto& pristineA = m_vdata[ContractionProblemGemm::TENSOR::A].pristine[rocisa::DataType::Float4]; auto& pristineMXScaleA = m_vdata[ContractionProblemGemm::TENSOR::MXSA].pristine[problem.mxsa().dataType()]; - std::cout << "[DEBUG] Buffer sizes - A: maxElements=" << pristineA.maxElements - << ", A.cpuInput.valid=" << (void*)pristineA.cpuInput.valid.get() - << ", MXScaleA: maxElements=" << pristineMXScaleA.maxElements - << ", MXScaleA.cpuInput.valid=" << (void*)pristineMXScaleA.cpuInput.valid.get() - << std::endl; - std::cout << "[DEBUG] Expected tensor size: rows*cols=" << (rows*cols) - << ", FP4 packed size=" << ((rows*cols + 1)/2) << std::endl; - std::cout << "[DEBUG] TransA=" << problem.transA() << std::endl; - - std::cout << "[DEBUG] Calling generateMXInput() for matrix A..." << std::endl; - - // Call mxDataGenerator directly - auto refA = generateMXInput((hipDataType)HIP_R_4F_E2M1_EXT, - pristineA.cpuInput.valid.get(), - pristineMXScaleA.cpuInput.valid.get(), - rows, - cols, - stride, - problem.transA(), - emptySwizzle, - emptyTile, - problem.mxBlockA(), // scaleBlockRowSize - 1, // scaleBlockColSize - true, // isMatrixA - "Bounded", - -1.0f, - 1.0f); - - std::cout << "[DEBUG] generateMXInput() for matrix A completed, refA.size()=" << refA.size() << std::endl; + generateMXInput((hipDataType)HIP_R_4F_E2M1_EXT, + pristineA.cpuInput.valid.get(), + pristineMXScaleA.cpuInput.valid.get(), + rows, + cols, + stride, + problem.transA(), + emptySwizzle, + emptyTile, + problem.mxBlockA(), + 1, + true, + "Bounded", + -1.0f, + 1.0f); } - // Generate MX data for matrix B if it uses FP4 with MX blocks if(problem.mxBlockB() > 0 && problem.b().dataType() == rocisa::DataType::Float4) { - std::cout << "[DEBUG] Generating MX data for matrix B (mxBlockB=" << problem.mxBlockB() << ")" << std::endl; - - // Get matrix B dimensions from tensor descriptor auto const& tensorB = problem.b(); - auto rows = tensorB.sizes()[0]; // Number of rows - auto cols = tensorB.sizes()[1]; // Number of columns - auto stride = tensorB.strides()[1]; // Leading dimension + auto rows = tensorB.sizes()[0]; + auto cols = tensorB.sizes()[1]; + auto stride = tensorB.strides()[1]; - std::cout << "[DEBUG] Matrix B dimensions: rows=" << rows << ", cols=" << cols << ", stride=" << stride << std::endl; - - // Get the pristine data pointer for matrix B auto& pristineB = m_vdata[ContractionProblemGemm::TENSOR::B].pristine[rocisa::DataType::Float4]; auto& pristineMXScaleB = m_vdata[ContractionProblemGemm::TENSOR::MXSB].pristine[problem.mxsb().dataType()]; - std::cout << "[DEBUG] Calling generateMXInput() for matrix B..." << std::endl; - - // Call mxDataGenerator directly - auto refB = generateMXInput((hipDataType)HIP_R_4F_E2M1_EXT, - pristineB.cpuInput.valid.get(), - pristineMXScaleB.cpuInput.valid.get(), - rows, - cols, - stride, - problem.transB(), - emptySwizzle, - emptyTile, - problem.mxBlockB(), - 1, - false, // isMatrixA = false for matrix B - "Bounded", - -1.0f, - 1.0f); - - std::cout << "[DEBUG] generateMXInput() for matrix B completed, refB.size()=" << refB.size() << std::endl; + generateMXInput((hipDataType)HIP_R_4F_E2M1_EXT, + pristineB.cpuInput.valid.get(), + pristineMXScaleB.cpuInput.valid.get(), + rows, + cols, + stride, + problem.transB(), + emptySwizzle, + emptyTile, + problem.mxBlockB(), + 1, + false, + "Bounded", + -1.0f, + 1.0f); } - - std::cout << "[DEBUG] === Exiting initializeMXDataForFP4() ===" << std::endl; } -#endif void DataInitialization::initializeConstantInputs(ContractionProblemGemm const& problem) { diff --git a/projects/hipblaslt/tensilelite/tests/CMakeLists.txt b/projects/hipblaslt/tensilelite/tests/CMakeLists.txt index b31b728e75c..86fb97e7754 100644 --- a/projects/hipblaslt/tensilelite/tests/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/tests/CMakeLists.txt @@ -50,6 +50,20 @@ target_link_libraries(tensilelite-tests ${CMAKE_DL_LIBS} ) +if(TARGET roc::mxDataGenerator) + target_sources(tensilelite-tests + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/MXDataGen_test.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/src/mxDataGen.cpp" + ) + target_include_directories(tensilelite-tests + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/include" + ) + target_link_libraries(tensilelite-tests PRIVATE roc::mxDataGenerator) + target_compile_features(tensilelite-tests PRIVATE cxx_std_20) +endif() + gtest_discover_tests(tensilelite-tests WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} TIMEOUT 60) target_link_libraries(tensilelite-tests PUBLIC GTest::gtest) From a6006d50551b03a22731bc7fc67499a2fdb0a6af Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 19 Feb 2026 04:50:13 -0600 Subject: [PATCH 5/9] Fix path --- projects/hipblaslt/tensilelite/tests/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/hipblaslt/tensilelite/tests/CMakeLists.txt b/projects/hipblaslt/tensilelite/tests/CMakeLists.txt index 86fb97e7754..1d119ec69b4 100644 --- a/projects/hipblaslt/tensilelite/tests/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/tests/CMakeLists.txt @@ -54,11 +54,11 @@ if(TARGET roc::mxDataGenerator) target_sources(tensilelite-tests PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/MXDataGen_test.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/src/mxDataGen.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/../../clients/common/src/mxDataGen.cpp" ) target_include_directories(tensilelite-tests PRIVATE - "${CMAKE_CURRENT_SOURCE_DIR}/../../../clients/common/include" + "${CMAKE_CURRENT_SOURCE_DIR}/../../clients/common/include" ) target_link_libraries(tensilelite-tests PRIVATE roc::mxDataGenerator) target_compile_features(tensilelite-tests PRIVATE cxx_std_20) From a07f9ea970d198c656d99bee6623c691b7598b9a Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 19 Feb 2026 04:50:19 -0600 Subject: [PATCH 6/9] Add mxDataGenerator test --- .../tensilelite/tests/MXDataGen_test.cpp | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp diff --git a/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp b/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp new file mode 100644 index 00000000000..727b0c286ba --- /dev/null +++ b/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp @@ -0,0 +1,127 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include + +#include + +#include +#include + +/** + * @brief Unpack two FP4 nibbles from a packed byte. + * + * FP4 E2M1 values are packed two-per-byte (low nibble first). + * Zero is represented by nibble value 0x0 (+0) or 0x8 (-0). + */ +static bool isZeroNibble(uint8_t nibble) +{ + // FP4 E2M1: 0x0 = +0.0, 0x8 = -0.0 + return (nibble == 0x0) || (nibble == 0x8); +} + +/** + * @brief Count elements that decode to zero in a packed FP4 buffer. + */ +static size_t countZerosFP4(const uint8_t* packedData, size_t numPackedBytes) +{ + size_t zeros = 0; + for(size_t i = 0; i < numPackedBytes; ++i) + { + uint8_t lo = packedData[i] & 0x0F; + uint8_t hi = (packedData[i] >> 4) & 0x0F; + if(isZeroNibble(lo)) + ++zeros; + if(isZeroNibble(hi)) + ++zeros; + } + return zeros; +} + +class MXDataGenFP4Test : public ::testing::TestWithParam> +{ +}; + +/** + * @brief Verify that generateMXInput produces FP4 data with an acceptable zero frequency. + * + * Due to FP4 E2M1 nibble-pair packing statistics, the expected zero frequency is + * approximately 12-13% (theoretical: 2/15 - 1/225 ≈ 12.89%). + */ +TEST_P(MXDataGenFP4Test, ZeroFrequencyWithinBounds) +{ + auto [rows, cols, mxBlock, isTranspose] = GetParam(); + + const uint64_t numElements = rows * cols; + const uint64_t numPacked = (numElements + 1) / 2; + const size_t numScales = ((rows + mxBlock - 1) / mxBlock) * cols; + + std::vector dataBuffer(numPacked, 0); + std::vector scaleBuffer(numScales, 0); + + std::vector emptySwizzle; + std::vector emptyTile; + + generateMXInput((hipDataType)HIP_R_4F_E2M1_EXT, + dataBuffer.data(), + scaleBuffer.data(), + rows, + cols, + rows, // stride = rows (column-major) + isTranspose, + emptySwizzle, + emptyTile, + mxBlock, + 1, + true, + "Bounded", + -1.0f, + 1.0f); + + size_t zeros = countZerosFP4(dataBuffer.data(), numPacked); + double zeroPercent = 100.0 * static_cast(zeros) / static_cast(numElements); + + // Expected: ~12.89% zeros due to FP4 nibble-pair statistics (2/15 - 1/225). + EXPECT_LT(zeroPercent, 13.0) + << "Zero frequency " << zeroPercent << "% exceeds 13% upper bound for " + << rows << "x" << cols << " FP4 matrix (transpose=" << isTranspose << ")"; + + // Ensure non-trivial data was actually generated (not all zeros) + EXPECT_GT(numElements - zeros, 0u) + << "All elements are zero for " << rows << "x" << cols << " FP4 matrix"; +} + +INSTANTIATE_TEST_SUITE_P( + FP4ZeroFrequency, + MXDataGenFP4Test, + ::testing::Values( + // rows, cols, mxBlock, isTranspose + std::make_tuple(128u, 128u, 32, true), + std::make_tuple(256u, 256u, 32, true), + std::make_tuple(2048u, 1026u, 32, true), + std::make_tuple(2048u, 514u, 32, false) + ) +); From 5e6ef4a97f1c0574cf09605c3ce74dfea9d4d3b7 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 19 Feb 2026 15:32:31 -0600 Subject: [PATCH 7/9] Address reviews --- .../tensilelite/client/CMakeLists.txt | 7 ++++- .../tensilelite/tests/CMakeLists.txt | 7 ++++- .../tensilelite/tests/MXDataGen_test.cpp | 27 ++----------------- 3 files changed, 14 insertions(+), 27 deletions(-) diff --git a/projects/hipblaslt/tensilelite/client/CMakeLists.txt b/projects/hipblaslt/tensilelite/client/CMakeLists.txt index 2e8f738c456..d43cb85e2aa 100644 --- a/projects/hipblaslt/tensilelite/client/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/client/CMakeLists.txt @@ -15,7 +15,12 @@ target_link_libraries(tensilelite-client ) target_link_libraries(tensilelite-client PRIVATE roc::mxDataGenerator) -target_compile_features(tensilelite-client PRIVATE cxx_std_20) +set_target_properties(tensilelite-client + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF +) if(NOT WIN32) find_package(rocm_smi REQUIRED) diff --git a/projects/hipblaslt/tensilelite/tests/CMakeLists.txt b/projects/hipblaslt/tensilelite/tests/CMakeLists.txt index 1d119ec69b4..9a3ceca2c1f 100644 --- a/projects/hipblaslt/tensilelite/tests/CMakeLists.txt +++ b/projects/hipblaslt/tensilelite/tests/CMakeLists.txt @@ -61,7 +61,12 @@ if(TARGET roc::mxDataGenerator) "${CMAKE_CURRENT_SOURCE_DIR}/../../clients/common/include" ) target_link_libraries(tensilelite-tests PRIVATE roc::mxDataGenerator) - target_compile_features(tensilelite-tests PRIVATE cxx_std_20) + set_target_properties(tensilelite-tests + PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON + CXX_EXTENSIONS OFF + ) endif() gtest_discover_tests(tensilelite-tests WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} TIMEOUT 60) diff --git a/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp b/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp index 727b0c286ba..aa328bfd5e4 100644 --- a/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp +++ b/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp @@ -1,28 +1,5 @@ -/******************************************************************************* - * - * MIT License - * - * Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - *******************************************************************************/ +// Copyright Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT #include From 94560b7c868e6289f591fc62d8779a76255bd869 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 19 Feb 2026 15:56:50 -0600 Subject: [PATCH 8/9] Update baseline and range of zero frequency in test comments --- .../tensilelite/tests/MXDataGen_test.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp b/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp index aa328bfd5e4..5a6e7a84e98 100644 --- a/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp +++ b/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp @@ -45,8 +45,17 @@ class MXDataGenFP4Test : public ::testing::TestWithParam(zeros) / static_cast(numElements); - // Expected: ~12.89% zeros due to FP4 nibble-pair statistics (2/15 - 1/225). + // Empirically ~12.5–12.9% zeros; naive baseline is 2/16 = 12.5% (2 zero values + // out of 16 FP4 nibble values), slightly elevated by MX block scaling bias. EXPECT_LT(zeroPercent, 13.0) << "Zero frequency " << zeroPercent << "% exceeds 13% upper bound for " << rows << "x" << cols << " FP4 matrix (transpose=" << isTranspose << ")"; From 379d0ee42ee9414e0dace2290d52ae1eedce57b6 Mon Sep 17 00:00:00 2001 From: archana-ramalingam Date: Thu, 19 Feb 2026 16:18:58 -0600 Subject: [PATCH 9/9] Update comment --- .../tensilelite/tests/MXDataGen_test.cpp | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp b/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp index 5a6e7a84e98..28c037b6973 100644 --- a/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp +++ b/projects/hipblaslt/tensilelite/tests/MXDataGen_test.cpp @@ -45,17 +45,10 @@ class MXDataGenFP4Test : public ::testing::TestWithParam