diff --git a/CMakeLists.txt b/CMakeLists.txt index cbe7c0bc3..e9969ecf5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,7 +146,7 @@ get_os_id(OS_ID) message (STATUS "OS detected is ${OS_ID}") # Versioning via rocm-cmake -set ( VERSION_STRING "2.9.0" ) +set ( VERSION_STRING "2.11.0" ) rocm_setup_version( VERSION ${VERSION_STRING} ) # Append our library helper cmake path and the cmake path for hip (for convenience) @@ -196,7 +196,7 @@ if( BUILD_WITH_TENSILE ) message (STATUS "using GIT Tensile fork=${tensile_fork} from branch=${tensile_tag}") endif() list(APPEND CMAKE_PREFIX_PATH ${VIRTUALENV_HOME_DIR}) - find_package(Tensile 4.12.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "${INSTALLED_TENSILE_PATH}") + find_package(Tensile 4.13.0 EXACT REQUIRED HIP LLVM OpenMP PATHS "${INSTALLED_TENSILE_PATH}") endif() # Find HCC/HIP dependencies diff --git a/Jenkinsfile b/Jenkinsfile index 2097e87d9..7c4d845a0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -76,77 +76,35 @@ rocBLASCI: { platform, project-> - def command - - if(platform.jenkinsLabel.contains('centos') || platform.jenkinsLabel.contains('sles')) - { - if(auxiliary.isJobStartedByTimer()) - { - command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix}/build/release/clients/staging - LD_LIBRARY_PATH=/opt/rocm/hcc/lib GTEST_LISTENER=NO_PASS_LINE_IN_LOG sudo ./rocblas-test --gtest_output=xml --gtest_color=yes --gtest_filter=*nightly*-*known_bug* #--gtest_filter=*nightly* - """ + String sudo = auxiliary.sudo(platform.jenkinsLabel) + def gfilter = auxiliary.isJobStartedByTimer() ? "*nightly*" : "*quick*:*pre_checkin*" - platform.runCommand(this, command) - junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" - } - else - { - command = """#!/usr/bin/env bash + def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix}/build/release/clients/staging - LD_LIBRARY_PATH=/opt/rocm/hcc/lib ./example-sscal - LD_LIBRARY_PATH=/opt/rocm/hcc/lib GTEST_LISTENER=NO_PASS_LINE_IN_LOG sudo ./rocblas-test --gtest_output=xml --gtest_color=yes --gtest_filter=*quick*:*pre_checkin*-*known_bug* #--gtest_filter=*checkin* + ${sudo} LD_LIBRARY_PATH=/opt/rocm/hcc/lib GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./rocblas-test --gtest_output=xml --gtest_color=yes --gtest_filter=${gfilter}-*known_bug* """ - platform.runCommand(this, command) - junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" - } - } - else - { - if(auxiliary.isJobStartedByTimer()) - { - command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix}/build/release/clients/staging - LD_LIBRARY_PATH=/opt/rocm/hcc/lib GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./rocblas-test --gtest_output=xml --gtest_color=yes --gtest_filter=*nightly*-*known_bug* #--gtest_filter=*nightly* - """ - - platform.runCommand(this, command) - junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" - } - else - { - command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix}/build/release/clients/staging - LD_LIBRARY_PATH=/opt/rocm/hcc/lib ./example-sscal - LD_LIBRARY_PATH=/opt/rocm/hcc/lib GTEST_LISTENER=NO_PASS_LINE_IN_LOG ./rocblas-test --gtest_output=xml --gtest_color=yes --gtest_filter=*quick*:*pre_checkin*-*known_bug* #--gtest_filter=*checkin* - """ - - platform.runCommand(this, command) - junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" - } - } + platform.runCommand(this, command) + junit "${project.paths.project_build_prefix}/build/release/clients/staging/*.xml" } def packageCommand = { platform, project-> + String sudo = auxiliary.sudo(platform.jenkinsLabel) def command - if(platform.jenkinsLabel.contains('centos')) + if(platform.jenkinsLabel.contains('centos') || platform.jenkinsLabel.contains('sles')) { command = """ set -x cd ${project.paths.project_build_prefix}/build/release - make package - mkdir -p package - mv *.rpm package/ - rpm -qlp package/*.rpm + ${sudo} make package + ${sudo} mkdir -p package + ${sudo} mv *.rpm package/ + ${sudo} rpm -qlp package/*.rpm """ platform.runCommand(this, command) diff --git a/bump_develop_version.sh b/bump_develop_version.sh index 557af9c80..9a86175f7 100755 --- a/bump_develop_version.sh +++ b/bump_develop_version.sh @@ -5,12 +5,16 @@ # - run this script in master branch # - after running this script merge master into develop -OLD_ROCBLAS_VERSION="2.8.0" -NEW_ROCBLAS_VERSION="2.9.0" +OLD_ROCBLAS_VERSION="2.10.0" +NEW_ROCBLAS_VERSION="2.11.0" -OLD_TENSILE_VERSION="tensile_tag 015477ad8c8ea0ef6f59b8d49a65015b46b8a48e" -NEW_TENSILE_VERSION="tensile_tag 015477ad8c8ea0ef6f59b8d49a65015b46b8a48e" +OLD_TENSILE_VERSION="Tensile 4.12.0" +NEW_TENSILE_VERSION="Tensile 4.13.0" + +OLD_TENSILE_HASH="fe4f721886d07eef6251cea4225e027181022aa5" +NEW_TENSILE_HASH="a9379f4e42efb754c9a618047bfbf292d74dfd0f" sed -i "s/${OLD_ROCBLAS_VERSION}/${NEW_ROCBLAS_VERSION}/g" CMakeLists.txt sed -i "s/${OLD_TENSILE_VERSION}/${NEW_TENSILE_VERSION}/g" CMakeLists.txt +sed -i "s/${OLD_TENSILE_HASH}/${NEW_TENSILE_HASH}/g" tensile_tag.txt diff --git a/bump_master_version.sh b/bump_master_version.sh index 5f1029d24..1abb977c4 100755 --- a/bump_master_version.sh +++ b/bump_master_version.sh @@ -6,20 +6,24 @@ # - after running this script and merging develop into master, run bump_develop_version.sh in master and # merge master into develop -OLD_ROCBLAS_VERSION="2.7.0" -NEW_ROCBLAS_VERSION="2.8.0" +OLD_ROCBLAS_VERSION="2.9.0" +NEW_ROCBLAS_VERSION="2.10.0" -OLD_TENSILE_VERSION="tensile_tag 9c63a0bf1c0acdb44376ddc80b867beb3386981a" -NEW_TENSILE_VERSION="tensile_tag 015477ad8c8ea0ef6f59b8d49a65015b46b8a48e" +OLD_TENSILE_VERSION="Tensile 4.12.0" +NEW_TENSILE_VERSION="Tensile 4.13.0" + +OLD_TENSILE_HASH="3ab0890743db4cca5244d0dab30a574fa34c89b8" +NEW_TENSILE_HASH="fe4f721886d07eef6251cea4225e027181022aa5" OLD_MINIMUM_REQUIRED_VERSION="MinimumRequiredVersion: 4.6.0" NEW_MINIMUM_REQUIRED_VERSION="MinimumRequiredVersion: 4.7.1" sed -i "s/${OLD_ROCBLAS_VERSION}/${NEW_ROCBLAS_VERSION}/g" CMakeLists.txt sed -i "s/${OLD_TENSILE_VERSION}/${NEW_TENSILE_VERSION}/g" CMakeLists.txt +sed -i "s/${OLD_TENSILE_HASH}/${NEW_TENSILE_HASH}/g" tensile_tag.txt #only update yaml files for a Tensile major version change #for FILE in library/src/blas3/Tensile/Logic/*/*yaml #do # sed -i "s/${OLD_MINIMUM_REQUIRED_VERSION}/${NEW_MINIMUM_REQUIRED_VERSION}/" $FILE -#done +#a9379f4e42efb754c9a618047bfbf292d74dfd0fdone diff --git a/clients/benchmarks/client.cpp b/clients/benchmarks/client.cpp index 13c83bda2..71a263b8d 100644 --- a/clients/benchmarks/client.cpp +++ b/clients/benchmarks/client.cpp @@ -21,9 +21,17 @@ #include "testing_nrm2_batched.hpp" #include "testing_nrm2_strided_batched.hpp" #include "testing_rot.hpp" +#include "testing_rot_batched.hpp" +#include "testing_rot_strided_batched.hpp" #include "testing_rotg.hpp" +#include "testing_rotg_batched.hpp" +#include "testing_rotg_strided_batched.hpp" #include "testing_rotm.hpp" +#include "testing_rotm_batched.hpp" +#include "testing_rotm_strided_batched.hpp" #include "testing_rotmg.hpp" +#include "testing_rotmg_batched.hpp" +#include "testing_rotmg_strided_batched.hpp" #include "testing_scal.hpp" #include "testing_scal_batched.hpp" #include "testing_scal_strided_batched.hpp" @@ -174,14 +182,18 @@ struct perf_blas< testing_set_get_vector(arg); else if(!strcmp(arg.function, "set_get_matrix")) testing_set_get_matrix(arg); - else if(!strcmp(arg.function, "rot")) - testing_rot(arg); - else if(!strcmp(arg.function, "rotg")) - testing_rotg(arg); else if(!strcmp(arg.function, "rotm")) testing_rotm(arg); + else if(!strcmp(arg.function, "rotm_batched")) + testing_rotm_batched(arg); + else if(!strcmp(arg.function, "rotm_strided_batched")) + testing_rotm_strided_batched(arg); else if(!strcmp(arg.function, "rotmg")) testing_rotmg(arg); + else if(!strcmp(arg.function, "rotmg_batched")) + testing_rotmg_batched(arg); + else if(!strcmp(arg.function, "rotmg_strided_batched")) + testing_rotmg_strided_batched(arg); else if(!strcmp(arg.function, "gemv")) testing_gemv(arg); else if(!strcmp(arg.function, "gemv_batched")) @@ -326,6 +338,47 @@ struct perf_blas +struct perf_blas_rot : rocblas_test_invalid +{ +}; + +template +struct perf_blas_rot< + Ti, + To, + Tc, + typename std::enable_if<( + (std::is_same{} && std::is_same{} && std::is_same{}) + || (std::is_same{} && std::is_same{} && std::is_same{}) + || (std::is_same{} && std::is_same{} + && std::is_same{}) + || (std::is_same{} && std::is_same{} + && std::is_same{}) + || (std::is_same{} && std::is_same{} + && std::is_same{}) + || (std::is_same{} && std::is_same{} + && std::is_same{}))>::type> +{ + explicit operator bool() + { + return true; + } + + void operator()(const Arguments& arg) + { + if(!strcmp(arg.function, "rot")) + testing_rot(arg); + else if(!strcmp(arg.function, "rot_batched")) + testing_rot_batched(arg); + else if(!strcmp(arg.function, "rot_strided_batched")) + testing_rot_strided_batched(arg); + else + throw std::invalid_argument("Invalid combination --function "s + arg.function + + " --a_type "s + rocblas_datatype2string(arg.a_type)); + } +}; + template struct perf_blas_scal : rocblas_test_invalid { @@ -361,6 +414,40 @@ struct perf_blas_scal< } }; +template +struct perf_blas_rotg : rocblas_test_invalid +{ +}; + +template +struct perf_blas_rotg< + Ta, + Tb, + typename std::enable_if< + (std::is_same{} && std::is_same{}) + || (std::is_same{} && std::is_same{}) + || (std::is_same{} && std::is_same{}) + || (std::is_same{} && std::is_same{})>::type> +{ + explicit operator bool() + { + return true; + } + void operator()(const Arguments& arg) + { + if(!strcmp(arg.function, "rotg")) + testing_rotg(arg); + else if(!strcmp(arg.function, "rotg_batched")) + testing_rotg_batched(arg); + else if(!strcmp(arg.function, "rotg_strided_batched")) + testing_rotg_strided_batched(arg); + else + throw std::invalid_argument("Invalid combination --function "s + arg.function + + " --a_type " + rocblas_datatype2string(arg.a_type) + + " --b_type " + rocblas_datatype2string(arg.b_type)); + } +}; + int run_bench_test(Arguments& arg) { // disable unit_check in client benchmark, it is only used in gtest unit test @@ -523,6 +610,12 @@ int run_bench_test(Arguments& arg) if(!strcmp(function, "scal") || !strcmp(function, "scal_batched") || !strcmp(function, "scal_strided_batched")) rocblas_blas1_dispatch(arg); + else if(!strcmp(function, "rotg") || !strcmp(function, "rotg_batched") + || !strcmp(function, "rotg_strided_batched")) + rocblas_blas1_dispatch(arg); + else if(!strcmp(function, "rot") || !strcmp(function, "rot_batched") + || !strcmp(function, "rot_strided_batched")) + rocblas_blas1_dispatch(arg); else rocblas_simple_dispatch(arg); } diff --git a/clients/common/rocblas_gentest.py b/clients/common/rocblas_gentest.py index 383eef4e5..4f3aefadf 100755 --- a/clients/common/rocblas_gentest.py +++ b/clients/common/rocblas_gentest.py @@ -199,21 +199,53 @@ def setdefaults(test): if test['function'] in ('asum_strided_batched', 'nrm2_strided_batched', 'scal_strided_batched', 'swap_strided_batched', 'copy_strided_batched', 'dot_strided_batched', - 'dotc_strided_batched'): + 'dotc_strided_batched', 'rot_strided_batched', + 'rotm_strided_batched'): if all([x in test for x in ('N', 'incx', 'stride_scale')]): test.setdefault('stride_x', int(test['N'] * abs(test['incx']) * test['stride_scale'])) if all([x in test for x in ('N', 'incy', 'stride_scale')]): test.setdefault('stride_y', int(test['N'] * abs(test['incy']) * test['stride_scale'])) - - if test['function'] in ('ger_strided_batched'): - if all([x in test for x in ('M', 'incx', 'stride_scale')]): - test.setdefault('stride_x', int(test['M'] * abs(test['incx']) * - test['stride_scale'])) - if all([x in test for x in ('N', 'incy', 'stride_scale')]): - test.setdefault('stride_y', int(test['N'] * abs(test['incy']) * - test['stride_scale'])) + # we are using stride_c for param in rotm + if all([x in test for x in ('stride_scale')]): + test.setdefault('stride_c', int(test['stride_scale']) * 5) + + if test['function'] in ('gemv_strided_batched', 'ger_strided_batched'): + if test['function'] in ('ger_strided_batched') or test['transA'] in ('T', 'C'): + if all([x in test for x in ('M', 'incx', 'stride_scale')]): + test.setdefault('stride_x', int(test['M'] * abs(test['incx']) * + test['stride_scale'])) + if all([x in test for x in ('N', 'incy', 'stride_scale')]): + test.setdefault('stride_y', int(test['N'] * abs(test['incy']) * + test['stride_scale'])) + else: + if all([x in test for x in ('N', 'incx', 'stride_scale')]): + test.setdefault('stride_x', int(test['N'] * abs(test['incx']) * + test['stride_scale'])) + if all([x in test for x in ('M', 'incy', 'stride_scale')]): + test.setdefault('stride_y', int(test['M'] * abs(test['incy']) * + test['stride_scale'])) + + # we are using stride_c for arg c and stride_d for arg s in rotg + # these are are single values for each batch + if test['function'] in ('rotg_strided_batched'): + if 'stride_scale' in test: + test.setdefault('stride_a', int(test['stride_scale'])) + test.setdefault('stride_b', int(test['stride_scale'])) + test.setdefault('stride_c', int(test['stride_scale'])) + test.setdefault('stride_d', int(test['stride_scale'])) + + # we are using stride_a for d1, stride_b for d2, and stride_c for param in + # rotmg. These are are single values for each batch, except param which is + # a 5 element array + if test['function'] in ('rotmg_strided_batched'): + if 'stride_scale' in test: + test.setdefault('stride_a', int(test['stride_scale'])) + test.setdefault('stride_b', int(test['stride_scale'])) + test.setdefault('stride_c', int(test['stride_scale']) * 5) + test.setdefault('stride_x', int(test['stride_scale'])) + test.setdefault('stride_y', int(test['stride_scale'])) test.setdefault('stride_x', 0) test.setdefault('stride_y', 0) diff --git a/clients/gtest/CMakeLists.txt b/clients/gtest/CMakeLists.txt index 9281e5a99..ead4e869e 100644 --- a/clients/gtest/CMakeLists.txt +++ b/clients/gtest/CMakeLists.txt @@ -179,7 +179,7 @@ endif( ) set( ROCBLAS_TEST_DATA "${PROJECT_BINARY_DIR}/staging/rocblas_gtest.data") add_custom_command( OUTPUT "${ROCBLAS_TEST_DATA}" COMMAND ../common/rocblas_gentest.py -I ../include rocblas_gtest.yaml -o "${ROCBLAS_TEST_DATA}" - DEPENDS ../common/rocblas_gentest.py rocblas_gtest.yaml ../include/rocblas_common.yaml known_bugs.yaml blas1_gtest.yaml gemm_gtest.yaml gemm_batched_gtest.yaml gemm_strided_batched_gtest.yaml gemv_gtest.yaml gemv_batched_gtest.yaml gemv_strided_batched_gtest.yaml symv_gtest.yaml syr_gtest.yaml ger_gtest.yaml trsm_gtest.yaml trtri_gtest.yaml geam_gtest.yaml set_get_vector_gtest.yaml set_get_matrix_gtest.yaml trsv_gtest.yaml logging_mode_gtest.yaml set_get_pointer_mode_gtest.yaml + DEPENDS ../common/rocblas_gentest.py rocblas_gtest.yaml ../include/rocblas_common.yaml known_bugs.yaml blas1_gtest.yaml gemm_gtest.yaml gemm_batched_gtest.yaml gemm_strided_batched_gtest.yaml gemv_gtest.yaml symv_gtest.yaml syr_gtest.yaml ger_gtest.yaml trsm_gtest.yaml trtri_gtest.yaml geam_gtest.yaml set_get_vector_gtest.yaml set_get_matrix_gtest.yaml trsv_gtest.yaml logging_mode_gtest.yaml set_get_pointer_mode_gtest.yaml WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" ) add_custom_target( rocblas-test-data DEPENDS "${ROCBLAS_TEST_DATA}" ) diff --git a/clients/gtest/blas1_gtest.cpp b/clients/gtest/blas1_gtest.cpp index a5b9536f4..f2a1e7ca1 100644 --- a/clients/gtest/blas1_gtest.cpp +++ b/clients/gtest/blas1_gtest.cpp @@ -18,9 +18,17 @@ #include "testing_nrm2_batched.hpp" #include "testing_nrm2_strided_batched.hpp" #include "testing_rot.hpp" +#include "testing_rot_batched.hpp" +#include "testing_rot_strided_batched.hpp" #include "testing_rotg.hpp" +#include "testing_rotg_batched.hpp" +#include "testing_rotg_strided_batched.hpp" #include "testing_rotm.hpp" +#include "testing_rotm_batched.hpp" +#include "testing_rotm_strided_batched.hpp" #include "testing_rotmg.hpp" +#include "testing_rotmg_batched.hpp" +#include "testing_rotmg_strided_batched.hpp" #include "testing_scal.hpp" #include "testing_scal_batched.hpp" #include "testing_scal_strided_batched.hpp" @@ -59,9 +67,17 @@ namespace swap_batched, swap_strided_batched, rot, + rot_batched, + rot_strided_batched, rotg, + rotg_batched, + rotg_strided_batched, rotm, + rotm_batched, + rotm_strided_batched, rotmg, + rotmg_batched, + rotmg_strided_batched, }; // ---------------------------------------------------------------------------- @@ -93,32 +109,45 @@ namespace { bool is_scal = (BLAS1 == blas1::scal || BLAS1 == blas1::scal_batched || BLAS1 == blas1::scal_strided_batched); + bool is_rot = (BLAS1 == blas1::rot || BLAS1 == blas1::rot_batched + || BLAS1 == blas1::rot_strided_batched); + bool is_rotg = (BLAS1 == blas1::rotg || BLAS1 == blas1::rotg_batched + || BLAS1 == blas1::rotg_strided_batched); + bool is_rotmg = (BLAS1 == blas1::rotmg || BLAS1 == blas1::rotmg_batched + || BLAS1 == blas1::rotmg_strided_batched); bool is_batched = (BLAS1 == blas1::nrm2_batched || BLAS1 == blas1::asum_batched || BLAS1 == blas1::scal_batched || BLAS1 == blas1::swap_batched || BLAS1 == blas1::copy_batched || BLAS1 == blas1::dot_batched - || BLAS1 == blas1::dotc_batched); + || BLAS1 == blas1::dotc_batched || BLAS1 == blas1::rot_batched + || BLAS1 == blas1::rotm_batched || BLAS1 == blas1::rotg_batched + || BLAS1 == blas1::rotmg_batched); bool is_strided = (BLAS1 == blas1::nrm2_strided_batched || BLAS1 == blas1::asum_strided_batched || BLAS1 == blas1::scal_strided_batched || BLAS1 == blas1::swap_strided_batched || BLAS1 == blas1::copy_strided_batched || BLAS1 == blas1::dot_strided_batched - || BLAS1 == blas1::dotc_strided_batched); + || BLAS1 == blas1::dotc_strided_batched + || BLAS1 == blas1::rot_strided_batched + || BLAS1 == blas1::rotm_strided_batched + || BLAS1 == blas1::rotg_strided_batched + || BLAS1 == blas1::rotmg_strided_batched); - if((is_scal || BLAS1 == blas1::rot || BLAS1 == blas1::rotg) - && arg.a_type != arg.b_type) + if((is_scal || is_rotg || is_rot) && arg.a_type != arg.b_type) name << '_' << rocblas_datatype2string(arg.b_type); - if(BLAS1 == blas1::rot && arg.compute_type != arg.a_type) + if(is_rot && arg.compute_type != arg.a_type) name << '_' << rocblas_datatype2string(arg.compute_type); - name << '_' << arg.N; + if(!is_rotg && !is_rotmg) + name << '_' << arg.N; if(BLAS1 == blas1::axpy || is_scal) name << '_' << arg.alpha << "_" << arg.alphai; - name << '_' << arg.incx; + if(!is_rotg && !is_rotmg) + name << '_' << arg.incx; - if(is_strided) + if(is_strided && !is_rotg) { name << '_' << arg.stride_x; } @@ -129,17 +158,31 @@ namespace || BLAS1 == blas1::dotc_batched || BLAS1 == blas1::dot_strided_batched || BLAS1 == blas1::dotc_strided_batched || BLAS1 == blas1::swap || BLAS1 == blas1::swap_batched || BLAS1 == blas1::swap_strided_batched - || BLAS1 == blas1::rot || BLAS1 == blas1::rotm) + || BLAS1 == blas1::rot || BLAS1 == blas1::rot_batched + || BLAS1 == blas1::rot_strided_batched || BLAS1 == blas1::rotm + || BLAS1 == blas1::rotm_batched || BLAS1 == blas1::rotm_strided_batched) { name << '_' << arg.incy; } if(BLAS1 == blas1::swap_strided_batched || BLAS1 == blas1::copy_strided_batched - || BLAS1 == blas1::dot_strided_batched || BLAS1 == blas1::dotc_strided_batched) + || BLAS1 == blas1::dot_strided_batched || BLAS1 == blas1::dotc_strided_batched + || BLAS1 == blas1::rot_strided_batched || BLAS1 == blas1::rotm_strided_batched) { name << '_' << arg.stride_y; } + if(BLAS1 == blas1::rotg_strided_batched) + { + name << '_' << arg.stride_a << '_' << arg.stride_b << '_' << arg.stride_c << '_' + << arg.stride_d; + } + + if(BLAS1 == blas1::rotm_strided_batched || BLAS1 == blas1::rotmg_strided_batched) + { + name << '_' << arg.stride_c; + } + if(is_batched || is_strided) { name << "_" << arg.batch_count; @@ -220,7 +263,8 @@ namespace || std::is_same{} || std::is_same{})) - || (BLAS1 == blas1::rot + || ((BLAS1 == blas1::rot || BLAS1 == blas1::rot_batched + || BLAS1 == blas1::rot_strided_batched) && ((std::is_same{} && std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{} && std::is_same{}) @@ -233,16 +277,22 @@ namespace || (std::is_same{} && std::is_same{} && std::is_same{}))) - || (BLAS1 == blas1::rotg && std::is_same{} + || ((BLAS1 == blas1::rotg || BLAS1 == blas1::rotg_batched + || BLAS1 == blas1::rotg_strided_batched) + && std::is_same{} && ((std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}) || (std::is_same{} && std::is_same{}))) - || (BLAS1 == blas1::rotm && std::is_same{} && std::is_same{} + || ((BLAS1 == blas1::rotm || BLAS1 == blas1::rotm_batched + || BLAS1 == blas1::rotm_strided_batched) + && std::is_same{} && std::is_same{} && (std::is_same{} || std::is_same{})) - || (BLAS1 == blas1::rotmg && std::is_same{} && std::is_same{} + || ((BLAS1 == blas1::rotmg || BLAS1 == blas1::rotmg_batched + || BLAS1 == blas1::rotmg_strided_batched) + && std::is_same{} && std::is_same{} && (std::is_same{} || std::is_same{}))>; // Creates tests for one of the BLAS 1 functions @@ -291,39 +341,47 @@ TEST_P(NAME, blas1) \ \ INSTANTIATE_TEST_CATEGORIES(NAME) - // clang-format on - #define ARG1(Ti, To, Tc) Ti #define ARG2(Ti, To, Tc) Ti, To #define ARG3(Ti, To, Tc) Ti, To, Tc - BLAS1_TESTING(asum, ARG1) - BLAS1_TESTING(asum_batched, ARG1) - BLAS1_TESTING(asum_strided_batched, ARG1) - BLAS1_TESTING(nrm2, ARG1) - BLAS1_TESTING(nrm2_batched, ARG1) - BLAS1_TESTING(nrm2_strided_batched, ARG1) - BLAS1_TESTING(iamax, ARG1) - BLAS1_TESTING(iamin, ARG1) - BLAS1_TESTING(axpy, ARG1) - BLAS1_TESTING(copy, ARG1) - BLAS1_TESTING(copy_batched, ARG1) - BLAS1_TESTING(copy_strided_batched, ARG1) - BLAS1_TESTING(dot, ARG1) - BLAS1_TESTING(dotc, ARG1) - BLAS1_TESTING(dot_batched, ARG1) - BLAS1_TESTING(dotc_batched, ARG1) - BLAS1_TESTING(dot_strided_batched, ARG1) - BLAS1_TESTING(dotc_strided_batched, ARG1) - BLAS1_TESTING(scal, ARG2) - BLAS1_TESTING(scal_batched, ARG2) - BLAS1_TESTING(scal_strided_batched, ARG2) - BLAS1_TESTING(swap, ARG1) - BLAS1_TESTING(swap_batched, ARG1) - BLAS1_TESTING(swap_strided_batched, ARG1) - BLAS1_TESTING(rot, ARG3) - BLAS1_TESTING(rotg, ARG2) - BLAS1_TESTING(rotm, ARG1) - BLAS1_TESTING(rotmg, ARG1) +BLAS1_TESTING(asum, ARG1) +BLAS1_TESTING(asum_batched, ARG1) +BLAS1_TESTING(asum_strided_batched, ARG1) +BLAS1_TESTING(nrm2, ARG1) +BLAS1_TESTING(nrm2_batched, ARG1) +BLAS1_TESTING(nrm2_strided_batched, ARG1) +BLAS1_TESTING(iamax, ARG1) +BLAS1_TESTING(iamin, ARG1) +BLAS1_TESTING(axpy, ARG1) +BLAS1_TESTING(copy, ARG1) +BLAS1_TESTING(copy_batched, ARG1) +BLAS1_TESTING(copy_strided_batched, ARG1) +BLAS1_TESTING(dot, ARG1) +BLAS1_TESTING(dotc, ARG1) +BLAS1_TESTING(dot_batched, ARG1) +BLAS1_TESTING(dotc_batched, ARG1) +BLAS1_TESTING(dot_strided_batched, ARG1) +BLAS1_TESTING(dotc_strided_batched, ARG1) +BLAS1_TESTING(scal, ARG2) +BLAS1_TESTING(scal_batched, ARG2) +BLAS1_TESTING(scal_strided_batched, ARG2) +BLAS1_TESTING(swap, ARG1) +BLAS1_TESTING(swap_batched, ARG1) +BLAS1_TESTING(swap_strided_batched, ARG1) +BLAS1_TESTING(rot, ARG3) +BLAS1_TESTING(rot_batched, ARG3) +BLAS1_TESTING(rot_strided_batched, ARG3) +BLAS1_TESTING(rotg, ARG2) +BLAS1_TESTING(rotg_batched, ARG2) +BLAS1_TESTING(rotg_strided_batched, ARG2) +BLAS1_TESTING(rotm, ARG1) +BLAS1_TESTING(rotm_batched, ARG1) +BLAS1_TESTING(rotm_strided_batched, ARG1) +BLAS1_TESTING(rotmg, ARG1) +BLAS1_TESTING(rotmg_batched, ARG1) +BLAS1_TESTING(rotmg_strided_batched, ARG1) + + // clang-format on } // namespace diff --git a/clients/gtest/blas1_gtest.yaml b/clients/gtest/blas1_gtest.yaml index 6b474eb31..65d92fa6d 100644 --- a/clients/gtest/blas1_gtest.yaml +++ b/clients/gtest/blas1_gtest.yaml @@ -46,6 +46,21 @@ Tests: - rotg: *rotg_precisions - rotmg: *single_double_precisions_complex_real + - name: blas1_batched + category: quick + batch_count: [-1, 0, 5] + function: + - rotg_batched: *rotg_precisions + - rotmg_batched: *single_double_precisions_complex_real + + - name: blas1_strided_batched + category: quick + batch_count: [-1, 0, 5] + stride_scale: [ 1.5 ] + function: + - rotg_strided_batched: *rotg_precisions + - rotmg_strided_batched: *single_double_precisions_complex_real + # All functions with alpha and incx and incy # quick @@ -313,6 +328,8 @@ Tests: function: - swap_batched: *single_double_precisions_complex_real - copy_batched: *single_double_precisions_complex_real + - rot_batched: *rot_precisions + - rotm_batched: *single_double_precisions_complex_real - name: blas1_strided_batched category: quick @@ -324,6 +341,8 @@ Tests: function: - swap_strided_batched: *single_double_precisions_complex_real - copy_strided_batched: *single_double_precisions_complex_real + - rot_strided_batched: *rot_precisions + - rotm_strided_batched: *single_double_precisions_complex_real # pre_checkin - name: blas1 @@ -346,6 +365,8 @@ Tests: function: - swap_batched: *single_double_precisions_complex_real - copy_batched: *single_double_precisions_complex_real + - rot_batched: *rot_precisions + - rotm_batched: *single_double_precisions_complex_real - name: blas1_strided_batched category: pre_checkin @@ -356,6 +377,8 @@ Tests: function: - swap_strided_batched: *single_double_precisions_complex_real - copy_strided_batched: *single_double_precisions_complex_real + - rot_strided_batched: *rot_precisions + - rotm_strided_batched: *single_double_precisions_complex_real # nightly - name: blas1 @@ -378,6 +401,8 @@ Tests: function: - swap_batched: *single_double_precisions_complex_real - copy_batched: *single_double_precisions_complex_real + - rot_batched: *rot_precisions + - rotm_batched: *single_double_precisions_complex_real - name: blas1_strided_batched category: nightly @@ -388,6 +413,8 @@ Tests: function: - swap_strided_batched: *single_double_precisions_complex_real - copy_strided_batched: *single_double_precisions_complex_real + - rot_strided_batched: *rot_precisions + - rotm_strided_batched: *single_double_precisions_complex_real # all functions bad arg # for bad_arg no arguments should be used by test code @@ -421,6 +448,14 @@ Tests: - rotg_bad_arg: *rotg_precisions - rotm_bad_arg: *single_double_precisions_complex_real - rotmg_bad_arg: *single_double_precisions_complex_real + - rot_batched_bad_arg: *rot_precisions + - rotg_batched_bad_arg: *rotg_precisions + - rotm_batched_bad_arg: *single_double_precisions_complex_real + - rotmg_batched_bad_arg: *single_double_precisions_complex_real + - rot_strided_batched_bad_arg: *rot_precisions + - rotg_strided_batched_bad_arg: *rotg_precisions + - rotm_strided_batched_bad_arg: *single_double_precisions_complex_real + - rotmg_strided_batched_bad_arg: *single_double_precisions_complex_real ... diff --git a/clients/gtest/gemv_batched_gtest.yaml b/clients/gtest/gemv_batched_gtest.yaml deleted file mode 100644 index e0eeedd3d..000000000 --- a/clients/gtest/gemv_batched_gtest.yaml +++ /dev/null @@ -1,91 +0,0 @@ ---- -include: rocblas_common.yaml -include: known_bugs.yaml - -Definitions: - - &small_matrix_size_range - - { M: -1, N: 1, lda: 1 } - - { M: 1, N: -1, lda: 1 } - - { M: 1, N: 1, lda: 0 } - - { M: 10, N: 10, lda: 9 } - - { M: 0, N: 1, lda: 1 } - - { M: 1, N: 0, lda: 1 } - - { M: -1, N: -1, lda: -1 } - - { M: 10, N: 10, lda: 2 } - - { M: 100, N: 200, lda: 200 } - - - &medium_matrix_size_range - - { M: 300, N: 400, lda: 400 } - - { M: 600, N: 500, lda: 601 } - - - &large_matrix_size_range - - { M: 1000, N: 1000, lda: 1000 } - - { M: 2000, N: 2000, lda: 2000 } - - { M: 4011, N: 4011, lda: 4011 } - - { M: 8000, N: 8000, lda: 8000 } - - - &incx_incy_range - - { incx: 2, incy: 1 } - - { incx: -1, incy: 2 } - - { incx: 1, incy: 1 } - - { incx: -1, incy: 3 } - - { incx: 3, incy: -1 } - - { incx: 0, incy: 1 } - - { incx: 1, incy: 0 } - - { incx: 0, incy: -1 } - - { incx: 10, incy: 100 } - - - &alpha_beta_range - - { alpha: 2.0, beta: 0.0 } - - { alpha: -1.0, beta: -1.0 } - - { alpha: 2.0, beta: 1.0 } - - { alpha: 0.0, beta: 1.0 } - -Tests: -- name: gemv_batched_bad_arg - category: pre_checkin - function: gemv_batched_bad_arg - precision: *single_double_precisions - transA: N - -- name: gemv_batched_NaN - category: pre_checkin - function: gemv_batched - precision: *single_double_precisions - transA: [ N, T, C ] - matrix_size: *medium_matrix_size_range - incx_incy: *incx_incy_range - alpha: [ -1.0, 0, 1.0, 2.0 ] - beta: .NaN # converted to 0.0 in test code - batch_count: [ -1, 0, 1, 3 ] - -- name: gemv_batched_small - category: quick - function: gemv_batched - precision: *single_double_precisions - transA: [ N, T, C ] - matrix_size: *small_matrix_size_range - incx_incy: *incx_incy_range - alpha_beta: *alpha_beta_range - batch_count: [ -1, 0, 1, 3 ] - -- name: gemv_batched_medium - category: pre_checkin - function: gemv_batched - precision: *single_double_precisions_complex_real - transA: [ N, T, C ] - matrix_size: *medium_matrix_size_range - incx_incy: *incx_incy_range - alpha_beta: *alpha_beta_range - batch_count: [ 3 ] - -- name: gemv_batched_large - category: nightly - function: gemv_batched - precision: *single_double_precisions - transA: [ N, T, C ] - matrix_size: *large_matrix_size_range - incx_incy: *incx_incy_range - alpha_beta: *alpha_beta_range - batch_count: [ 3 ] -... diff --git a/clients/gtest/gemv_gtest.yaml b/clients/gtest/gemv_gtest.yaml index 9dda01aee..db8b6ea6f 100644 --- a/clients/gtest/gemv_gtest.yaml +++ b/clients/gtest/gemv_gtest.yaml @@ -4,36 +4,36 @@ include: known_bugs.yaml Definitions: - &small_matrix_size_range - - { M: -1, N: 1, lda: 1 } - - { M: 1, N: -1, lda: 1 } - - { M: 1, N: 1, lda: 0 } - - { M: 10, N: 10, lda: 9 } - - { M: 0, N: 1, lda: 1 } - - { M: 1, N: 0, lda: 1 } - - { M: -1, N: -1, lda: -1 } - - { M: 10, N: 10, lda: 2 } - - { M: 100, N: 200, lda: 200 } + - { M: -1, N: 1, lda: 1, stride_a: 1 } + - { M: 1, N: -1, lda: 1, stride_a: 1 } + - { M: 1, N: 1, lda: 0, stride_a: 1 } + - { M: 10, N: 10, lda: 9, stride_a: 1 } + - { M: 0, N: 1, lda: 1, stride_a: 1 } + - { M: 1, N: 0, lda: 1, stride_a: 1 } + - { M: -1, N: -1, lda: -1, stride_a: 1 } + - { M: 10, N: 10, lda: 2, stride_a: 1 } + - { M: 100, N: 200, lda: 200, stride_a: 40000 } - &medium_matrix_size_range - - { M: 300, N: 400, lda: 400 } - - { M: 600, N: 500, lda: 601 } + - { M: 300, N: 400, lda: 400, stride_a: 160000 } + - { M: 600, N: 500, lda: 601, stride_a: 301000 } - &large_matrix_size_range - - { M: 1000, N: 1000, lda: 1000 } - - { M: 2000, N: 2000, lda: 2000 } - - { M: 4011, N: 4011, lda: 4011 } - - { M: 8000, N: 8000, lda: 8000 } + - { M: 1000, N: 1000, lda: 1000, stride_a: 1000000 } + - { M: 2000, N: 2000, lda: 2000, stride_a: 4000000 } + - { M: 4011, N: 4011, lda: 4011, stride_a: 16088200 } + - { M: 8000, N: 8000, lda: 8000, stride_a: 64000000 } - &incx_incy_range - - { incx: 2, incy: 1 } - - { incx: -1, incy: 2 } - - { incx: 1, incy: 1 } - - { incx: -1, incy: 3 } - - { incx: 3, incy: -1 } - - { incx: 0, incy: 1 } - - { incx: 1, incy: 0 } - - { incx: 0, incy: -1 } - - { incx: 10, incy: 100 } + - { incx: 2, incy: 1, stride_scale: 1 } + - { incx: -1, incy: 2, stride_scale: 1 } + - { incx: 1, incy: 1, stride_scale: 1 } + - { incx: -1, incy: 3, stride_scale: 1.5 } + - { incx: 3, incy: -1, stride_scale: 1 } + - { incx: 0, incy: 1, stride_scale: 1 } + - { incx: 1, incy: 0, stride_scale: 1 } + - { incx: 0, incy: -1, stride_scale: 2 } + - { incx: 10, incy: 100, stride_scale: 1 } - &alpha_beta_range - { alpha: 2.0, beta: 0.0, alphai: 1.5, betai: 0.5 } @@ -84,4 +84,98 @@ Tests: matrix_size: *large_matrix_size_range incx_incy: *incx_incy_range alpha_beta: *alpha_beta_range + +- name: gemv_batched_bad_arg + category: pre_checkin + function: gemv_batched_bad_arg + precision: *single_double_precisions + transA: N + +- name: gemv_batched_NaN + category: pre_checkin + function: gemv_batched + precision: *single_double_precisions + transA: [ N, T, C ] + matrix_size: *medium_matrix_size_range + incx_incy: *incx_incy_range + alpha: [ -1.0, 0, 1.0, 2.0 ] + beta: .NaN # converted to 0.0 in test code + batch_count: [ -1, 0, 1, 3 ] + +- name: gemv_batched_small + category: quick + function: gemv_batched + precision: *single_double_precisions + transA: [ N, T, C ] + matrix_size: *small_matrix_size_range + incx_incy: *incx_incy_range + alpha_beta: *alpha_beta_range + batch_count: [ -1, 0, 1, 3 ] + +- name: gemv_batched_medium + category: pre_checkin + function: gemv_batched + precision: *single_double_precisions_complex_real + transA: [ N, T, C ] + matrix_size: *medium_matrix_size_range + incx_incy: *incx_incy_range + alpha_beta: *alpha_beta_range + batch_count: [ 3 ] + +- name: gemv_batched_large + category: nightly + function: gemv_batched + precision: *single_double_precisions + transA: [ N, T, C ] + matrix_size: *large_matrix_size_range + incx_incy: *incx_incy_range + alpha_beta: *alpha_beta_range + batch_count: [ 3 ] + +- name: gemv_strided_batched_bad_arg + category: pre_checkin + function: gemv_strided_batched_bad_arg + precision: *single_double_precisions + transA: N + +- name: gemv_strided_batched_NaN + category: pre_checkin + function: gemv_strided_batched + precision: *single_double_precisions + transA: [ N, T, C ] + matrix_size: *medium_matrix_size_range + incx_incy: *incx_incy_range + alpha: [ -1.0, 0, 1.0, 2.0 ] + beta: .NaN # converted to 0.0 in test code + batch_count: [ -1, 0, 1, 3 ] + +- name: gemv_strided_batched_small + category: quick + function: gemv_strided_batched + precision: *single_double_precisions + transA: [ N, T, C ] + matrix_size: *small_matrix_size_range + incx_incy: *incx_incy_range + alpha_beta: *alpha_beta_range + batch_count: [ -1, 0, 1, 3 ] + +- name: gemv_strided_batched_medium + category: pre_checkin + function: gemv_strided_batched + precision: *single_double_precisions_complex_real + transA: [ N, T, C ] + matrix_size: *medium_matrix_size_range + incx_incy: *incx_incy_range + alpha_beta: *alpha_beta_range + batch_count: [ 3 ] + +- name: gemv_strided_batched_large + category: nightly + function: gemv_strided_batched + precision: *single_double_precisions + transA: [ N, T, C ] + matrix_size: *large_matrix_size_range + incx_incy: *incx_incy_range + alpha_beta: *alpha_beta_range + batch_count: [ 3 ] ... diff --git a/clients/gtest/gemv_strided_batched_gtest.yaml b/clients/gtest/gemv_strided_batched_gtest.yaml deleted file mode 100644 index 7a4ddbd2e..000000000 --- a/clients/gtest/gemv_strided_batched_gtest.yaml +++ /dev/null @@ -1,91 +0,0 @@ ---- -include: rocblas_common.yaml -include: known_bugs.yaml - -Definitions: - - &small_matrix_size_range - - { M: -1, N: 1, lda: 1, stride_a: 1 } - - { M: 1, N: -1, lda: 1, stride_a: 1 } - - { M: 1, N: 1, lda: 0, stride_a: 1 } - - { M: 10, N: 10, lda: 9, stride_a: 1 } - - { M: 0, N: 1, lda: 1, stride_a: 1 } - - { M: 1, N: 0, lda: 1, stride_a: 1 } - - { M: -1, N: -1, lda: -1, stride_a: 1 } - - { M: 10, N: 10, lda: 2, stride_a: 1 } - - { M: 100, N: 200, lda: 200, stride_a: 40000 } - - - &medium_matrix_size_range - - { M: 300, N: 400, lda: 400, stride_a: 160000 } - - { M: 600, N: 500, lda: 601, stride_a: 301000 } - - - &large_matrix_size_range - - { M: 1000, N: 1000, lda: 1000, stride_a: 1000000 } - - { M: 2000, N: 2000, lda: 2000, stride_a: 4000000 } - - { M: 4011, N: 4011, lda: 4011, stride_a: 16088200 } - - { M: 8000, N: 8000, lda: 8000, stride_a: 64000000 } - - - &incx_incy_range - - { incx: 2, incy: 1, stride_x: 8000, stride_y: 8000 } - - { incx: -1, incy: 2, stride_x: 8000, stride_y: 8000 } - - { incx: 1, incy: 1, stride_x: 8000, stride_y: 8000 } - - { incx: -1, incy: 3, stride_x: 4000, stride_y: 4000 } - - { incx: 3, incy: -1, stride_x: 2000, stride_y: 2000 } - - { incx: 0, incy: 1, stride_x: 1000, stride_y: 1000 } - - { incx: 1, incy: 0, stride_x: 1000, stride_y: 1000 } - - { incx: 0, incy: -1, stride_x: 1, stride_y: 1 } - - { incx: 10, incy: 100, stride_x: 8000, stride_y: 8000 } - - - &alpha_beta_range - - { alpha: 2.0, beta: 0.0 } - - { alpha: -1.0, beta: -1.0 } - - { alpha: 2.0, beta: 1.0 } - - { alpha: 0.0, beta: 1.0 } - -Tests: -- name: gemv_strided_batched_bad_arg - category: pre_checkin - function: gemv_strided_batched_bad_arg - precision: *single_double_precisions - transA: N - -- name: gemv_strided_batched_NaN - category: pre_checkin - function: gemv_strided_batched - precision: *single_double_precisions - transA: [ N, T, C ] - matrix_size: *medium_matrix_size_range - incx_incy: *incx_incy_range - alpha: [ -1.0, 0, 1.0, 2.0 ] - beta: .NaN # converted to 0.0 in test code - batch_count: [ -1, 0, 1, 3 ] - -- name: gemv_strided_batched_small - category: quick - function: gemv_strided_batched - precision: *single_double_precisions - transA: [ N, T, C ] - matrix_size: *small_matrix_size_range - incx_incy: *incx_incy_range - alpha_beta: *alpha_beta_range - batch_count: [ -1, 0, 1, 3 ] - -- name: gemv_strided_batched_medium - category: pre_checkin - function: gemv_strided_batched - precision: *single_double_precisions_complex_real - transA: [ N, T, C ] - matrix_size: *medium_matrix_size_range - incx_incy: *incx_incy_range - alpha_beta: *alpha_beta_range - batch_count: [ 3 ] - -- name: gemv_strided_batched_large - category: nightly - function: gemv_strided_batched - precision: *single_double_precisions - transA: [ N, T, C ] - matrix_size: *large_matrix_size_range - incx_incy: *incx_incy_range - alpha_beta: *alpha_beta_range - batch_count: [ 3 ] -... diff --git a/clients/gtest/ger_gtest.cpp b/clients/gtest/ger_gtest.cpp index 6154bc04d..1b9f3294a 100644 --- a/clients/gtest/ger_gtest.cpp +++ b/clients/gtest/ger_gtest.cpp @@ -55,24 +55,32 @@ namespace { RocBLAS_TestName name; - name << rocblas_datatype2string(arg.a_type) << '_' << arg.M << '_' << arg.N << '_' - << arg.alpha << '_' << arg.incx; + name << rocblas_datatype2string(arg.a_type); - if(GER_TYPE == GER_STRIDED_BATCHED) - name << '_' << arg.stride_x; + if(strstr(arg.function, "_bad_arg") != nullptr) + { + name << "_bad_arg"; + } + else + { + name << '_' << arg.M << '_' << arg.N << '_' << arg.alpha << '_' << arg.incx; + + if(GER_TYPE == GER_STRIDED_BATCHED) + name << '_' << arg.stride_x; - name << '_' << arg.incy; + name << '_' << arg.incy; - if(GER_TYPE == GER_STRIDED_BATCHED) - name << '_' << arg.stride_y; + if(GER_TYPE == GER_STRIDED_BATCHED) + name << '_' << arg.stride_y; - name << '_' << arg.lda; + name << '_' << arg.lda; - if(GER_TYPE == GER_STRIDED_BATCHED) - name << '_' << arg.stride_a; + if(GER_TYPE == GER_STRIDED_BATCHED) + name << '_' << arg.stride_a; - if(GER_TYPE == GER_STRIDED_BATCHED || GER_TYPE == GER_BATCHED) - name << '_' << arg.batch_count; + if(GER_TYPE == GER_STRIDED_BATCHED || GER_TYPE == GER_BATCHED) + name << '_' << arg.batch_count; + } return std::move(name); } diff --git a/clients/gtest/ger_gtest.yaml b/clients/gtest/ger_gtest.yaml index a1ab5d422..7215ad2be 100644 --- a/clients/gtest/ger_gtest.yaml +++ b/clients/gtest/ger_gtest.yaml @@ -11,7 +11,7 @@ Definitions: - { M: 0, N: 1, lda: 1, stride_a: 1 } - { M: 1, N: 0, lda: 1, stride_a: 1 } - { M: 1, N: 1, lda: 0, stride_a: 1 } - - { M: 11, N: 12, lda: 13, stride_a: 1 } + - { M: 11, N: 12, lda: 13, stride_a: 156 } - { M: 16, N: 16, lda: 16, stride_a: 256 } - { M: 33, N: 32, lda: 33, stride_a: 1056 } - { M: 65, N: 65, lda: 66, stride_a: 4300 } @@ -46,7 +46,10 @@ Definitions: Tests: - name: ger_bad_arg category: pre_checkin - function: ger_bad_arg + function: + - ger_bad_arg + - ger_batched_bad_arg + - ger_strided_batched_bad_arg precision: *single_double_precisions - name: ger_small @@ -73,12 +76,6 @@ Tests: incx_incy: *incx_incy_range alpha: [ -0.5, 2.0, 0.0, 0.6 ] -- name: ger_batched_bad_arg - category: pre_checkin - function: ger_batched_bad_arg - precision: *single_double_precisions - batch_count: [ -5, 0, 1, 5, 10 ] - - name: ger_batched_small category: quick function: ger_batched @@ -106,13 +103,6 @@ Tests: alpha: [ -0.5, 2.0, 0.0 ] batch_count: [ 1, 3 ] -- name: ger_strided_batched_bad_arg - category: pre_checkin - function: ger_strided_batched_bad_arg - precision: *single_double_precisions - stride_scale: [ -1, 0, 0.5, 1, 2 ] - batch_count: [ -5, 0, 1, 5, 10 ] - - name: ger_strided_batched_small category: quick function: ger_strided_batched @@ -120,7 +110,7 @@ Tests: matrix_size: *small_matrix_size_range incx_incy: *incx_incy_range alpha: [ -0.5, 2.0, 0.0 ] - stride_scale: [ 0.5, 1, 2 ] + stride_scale: [ 1, 2 ] batch_count: [ -5, 0, 1, 5, 10 ] - name: ger_strided_batched_medium @@ -130,7 +120,7 @@ Tests: matrix_size: *medium_matrix_size_range incx_incy: *incx_incy_range alpha: [ -0.5, 2.0, 0.0 ] - stride_scale: [ 0.5, 1, 2 ] + stride_scale: [ 1, 2 ] batch_count: [ 1, 5, 10 ] - name: ger_strided_batched_large @@ -140,5 +130,6 @@ Tests: matrix_size: *large_matrix_size_range incx_incy: *nightly_incx_incy_range alpha: [ -0.5, 2.0, 0.0 ] + stride_scale: [ 1 ] batch_count: [ 1, 3 ] ... diff --git a/clients/gtest/known_bugs.yaml b/clients/gtest/known_bugs.yaml index fa7592d3f..5ce181dad 100644 --- a/clients/gtest/known_bugs.yaml +++ b/clients/gtest/known_bugs.yaml @@ -3,7 +3,13 @@ # Wildcards can be used for the function Known bugs: +- { function: "gemm_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 512, N: 512, K: 512, lda: 512, ldb: 512, ldc: 512, ldd: 512, alpha: 5.0, alphai: 0.0, beta: 0.0, betai: 0.0 } +- { function: "gemm_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 512, N: 512, K: 512, lda: 512, ldb: 512, ldc: 512, ldd: 512, alpha: 0.0, alphai: 0.0, beta: 3.0, betai: 0.0 } +- { function: "gemm_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 512, N: 512, K: 512, lda: 512, ldb: 512, ldc: 512, ldd: 512, alpha: 1.0, alphai: 0.0, beta: 3.0, betai: 0.0 } +- { function: "gemm_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 512, N: 512, K: 512, lda: 512, ldb: 512, ldc: 512, ldd: 512, alpha: 1.0, alphai: 0.0, beta: 1.0, betai: 0.0 } - { function: "gemm_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 1024, N: 1024, K: 1024, lda: 1024, ldb: 1024, ldc: 1024, ldd: 1024, alpha: 5.0, alphai: 0.0, beta: 0.0, betai: 0.0 } - { function: "gemm_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 1024, N: 1024, K: 1024, lda: 1024, ldb: 1024, ldc: 1024, ldd: 1024, alpha: 0.0, alphai: 0.0, beta: 3.0, betai: 0.0 } - { function: "gemm_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 1024, N: 1024, K: 1024, lda: 1024, ldb: 1024, ldc: 1024, ldd: 1024, alpha: 1.0, alphai: 0.0, beta: 3.0, betai: 0.0 } - { function: "gemm_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 1024, N: 1024, K: 1024, lda: 1024, ldb: 1024, ldc: 1024, ldd: 1024, alpha: 1.0, alphai: 0.0, beta: 1.0, betai: 0.0 } +- { function: "gemm_strided_batched_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 512, N: 512, K: 512, lda: 512, ldb: 512, ldc: 512, ldd: 512, alpha: -2.0, alphai: 0.0, beta: -3.0, betai: 0.0, batch_count: 3, stride_a: 262144, stride_b: 262144, stride_c: 262144, stride_d: 262144 } +- { function: "gemm_strided_batched_ex", a_type: "bf16_r", b_type: "bf16_r", c_type: "bf16_r", d_type: "bf16_r", compute_type: "f32_r", transA: 'C', transB: 'N', M: 512, N: 512, K: 512, lda: 512, ldb: 512, ldc: 512, ldd: 512, alpha: 0.0, alphai: 0.0, beta: 1.0, betai: 0.0, batch_count: 3, stride_a: 262144, stride_b: 262144, stride_c: 262144, stride_d: 262144 } diff --git a/clients/gtest/rocblas_gtest.yaml b/clients/gtest/rocblas_gtest.yaml index 3cbf2a1ac..f1db4f620 100644 --- a/clients/gtest/rocblas_gtest.yaml +++ b/clients/gtest/rocblas_gtest.yaml @@ -1,9 +1,7 @@ include: blas1_gtest.yaml include: gemv_gtest.yaml -include: gemv_batched_gtest.yaml -include: gemv_strided_batched_gtest.yaml include: gemm_gtest.yaml -include :gemm_batched_gtest.yaml +include: gemm_batched_gtest.yaml include: gemm_strided_batched_gtest.yaml include: symv_gtest.yaml include: syr_gtest.yaml diff --git a/clients/gtest/syr_gtest.cpp b/clients/gtest/syr_gtest.cpp index 5f80fa3ce..7b9b38acd 100644 --- a/clients/gtest/syr_gtest.cpp +++ b/clients/gtest/syr_gtest.cpp @@ -7,6 +7,8 @@ #include "rocblas_datatype2string.hpp" #include "rocblas_test.hpp" #include "testing_syr.hpp" +#include "testing_syr_batched.hpp" +#include "testing_syr_strided_batched.hpp" #include "type_dispatch.hpp" #include #include @@ -14,7 +16,73 @@ namespace { - // By default, this test does not apply to any types. + // possible test cases + enum syr_test_type + { + SYR, + SYR_BATCHED, + SYR_STRIDED_BATCHED, + }; + + //syr test template + template