Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 0 additions & 16 deletions clients/benchmarks/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,22 +554,12 @@ try
std::string compute_type;
std::string initialization;

#ifdef USE_TENSILE_HOST
std::string host_lib_path;
#endif

rocblas_int device_id;
bool datafile = rocblas_parse_data(argc, argv);

options_description desc("rocblas-bench command line options");
desc.add_options()
// clang-format off

#ifdef USE_TENSILE_HOST
("lib",
value<std::string>(&host_lib_path),
"Host libriary path")
#endif
("sizem,m",
value<rocblas_int>(&arg.M)->default_value(128),
"Specific matrix size: sizem is only applicable to BLAS-2 & BLAS-3: the number of "
Expand Down Expand Up @@ -805,12 +795,6 @@ try
if(copied <= 0 || copied >= sizeof(arg.function))
throw std::invalid_argument("Invalid value for --function");

#ifdef USE_TENSILE_HOST
int copied_host = snprintf(arg.host_lib_path, sizeof(arg.host_lib_path), "%s", host_lib_path.c_str());
if(copied_host <= 0 || copied_host >= sizeof(arg.host_lib_path))
throw std::invalid_argument("Invalid value for --lib");
#endif

return run_bench_test(arg);
}
catch(const std::invalid_argument& exp)
Expand Down
60 changes: 30 additions & 30 deletions clients/gtest/blas1_gtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,39 +291,39 @@ TEST_P(NAME, blas1) \
\
INSTANTIATE_TEST_CATEGORIES(NAME)

// clang-format on

#define ARG1(Ti, To, Tc) Ti
#define ARG2(Ti, To, Tc) Ti, To
#define ARG3(Ti, To, Tc) Ti, To, Tc

BLAS1_TESTING(asum, ARG1)
BLAS1_TESTING(asum_batched, ARG1)
BLAS1_TESTING(asum_strided_batched, ARG1)
BLAS1_TESTING(nrm2, ARG1)
BLAS1_TESTING(nrm2_batched, ARG1)
BLAS1_TESTING(nrm2_strided_batched, ARG1)
BLAS1_TESTING(iamax, ARG1)
BLAS1_TESTING(iamin, ARG1)
BLAS1_TESTING(axpy, ARG1)
BLAS1_TESTING(copy, ARG1)
BLAS1_TESTING(copy_batched, ARG1)
BLAS1_TESTING(copy_strided_batched, ARG1)
BLAS1_TESTING(dot, ARG1)
BLAS1_TESTING(dotc, ARG1)
BLAS1_TESTING(dot_batched, ARG1)
BLAS1_TESTING(dotc_batched, ARG1)
BLAS1_TESTING(dot_strided_batched, ARG1)
BLAS1_TESTING(dotc_strided_batched, ARG1)
BLAS1_TESTING(scal, ARG2)
BLAS1_TESTING(scal_batched, ARG2)
BLAS1_TESTING(scal_strided_batched, ARG2)
BLAS1_TESTING(swap, ARG1)
BLAS1_TESTING(swap_batched, ARG1)
BLAS1_TESTING(swap_strided_batched, ARG1)
BLAS1_TESTING(rot, ARG3)
BLAS1_TESTING(rotg, ARG2)
BLAS1_TESTING(rotm, ARG1)
BLAS1_TESTING(rotmg, ARG1)

// clang-format on
BLAS1_TESTING(asum, ARG1)
BLAS1_TESTING(asum_batched, ARG1)
BLAS1_TESTING(asum_strided_batched, ARG1)
BLAS1_TESTING(nrm2, ARG1)
BLAS1_TESTING(nrm2_batched, ARG1)
BLAS1_TESTING(nrm2_strided_batched, ARG1)
BLAS1_TESTING(iamax, ARG1)
BLAS1_TESTING(iamin, ARG1)
BLAS1_TESTING(axpy, ARG1)
BLAS1_TESTING(copy, ARG1)
BLAS1_TESTING(copy_batched, ARG1)
BLAS1_TESTING(copy_strided_batched, ARG1)
BLAS1_TESTING(dot, ARG1)
BLAS1_TESTING(dotc, ARG1)
BLAS1_TESTING(dot_batched, ARG1)
BLAS1_TESTING(dotc_batched, ARG1)
BLAS1_TESTING(dot_strided_batched, ARG1)
BLAS1_TESTING(dotc_strided_batched, ARG1)
BLAS1_TESTING(scal, ARG2)
BLAS1_TESTING(scal_batched, ARG2)
BLAS1_TESTING(scal_strided_batched, ARG2)
BLAS1_TESTING(swap, ARG1)
BLAS1_TESTING(swap_batched, ARG1)
BLAS1_TESTING(swap_strided_batched, ARG1)
BLAS1_TESTING(rot, ARG3)
BLAS1_TESTING(rotg, ARG2)
BLAS1_TESTING(rotm, ARG1)
BLAS1_TESTING(rotmg, ARG1)

} // namespace
48 changes: 24 additions & 24 deletions clients/include/near.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,33 +41,33 @@ static constexpr double sum_error_tolerance<rocblas_double_complex> = 1 / 100000
#define NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, err, NEAR_ASSERT)
#define NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, err, NEAR_ASSERT)
#else
// clang-format off
#define NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, err, NEAR_ASSERT) \
do \
{ \
for(size_t k = 0; k < batch_count; k++) \
for(size_t j = 0; j < N; j++) \
for(size_t i = 0; i < M; i++) \
NEAR_ASSERT(hCPU[i + j * lda + k * strideA], \
hGPU[i + j * lda + k * strideA], \
err); \

#define NEAR_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, err, NEAR_ASSERT) \
do \
{ \
for(size_t k = 0; k < batch_count; k++) \
for(size_t j = 0; j < N; j++) \
for(size_t i = 0; i < M; i++) \
NEAR_ASSERT( \
hCPU[i + j * lda + k * strideA], hGPU[i + j * lda + k * strideA], err); \
} while(0)

#define NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, err, NEAR_ASSERT) \
do \
{ \
for(size_t k = 0; k < batch_count; k++) \
for(size_t j = 0; j < N; j++) \
for(size_t i = 0; i < M; i++) \
if(rocblas_isnan(hCPU[k][i + j * lda])) { \
ASSERT_TRUE(rocblas_isnan(hGPU[k][i + j * lda])); \
} else { \
NEAR_ASSERT(hCPU[k][i + j * lda], \
hGPU[k][i + j * lda], \
err); \
} \
#define NEAR_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, err, NEAR_ASSERT) \
do \
{ \
for(size_t k = 0; k < batch_count; k++) \
for(size_t j = 0; j < N; j++) \
for(size_t i = 0; i < M; i++) \
if(rocblas_isnan(hCPU[k][i + j * lda])) \
{ \
ASSERT_TRUE(rocblas_isnan(hGPU[k][i + j * lda])); \
} \
else \
{ \
NEAR_ASSERT(hCPU[k][i + j * lda], hGPU[k][i + j * lda], err); \
} \
} while(0)
// clang-format on

#endif

#define NEAR_ASSERT_HALF(a, b, err) ASSERT_NEAR(half_to_float(a), half_to_float(b), err)
Expand Down
4 changes: 0 additions & 4 deletions clients/include/rocblas_arguments.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,6 @@ struct Arguments

rocblas_initialization initialization;

#ifdef USE_TENSILE_HOST
char host_lib_path[4096];
#endif

// Validate input format.
// rocblas_gentest.py is expected to conform to this format.
// rocblas_gentest.py uses rocblas_common.yaml to generate this format.
Expand Down
6 changes: 2 additions & 4 deletions clients/include/rocblas_datatype2string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,20 +195,18 @@ constexpr rocblas_side char2rocblas_side(char value)
}
}

// clang-format off
inline rocblas_initialization string2rocblas_initialization(const std::string& value)
{
// clang-format off
return
value == "rand_int" ? rocblas_initialization_random_int :
value == "trig_float" ? rocblas_initialization_trig_float :
value == "hpl" ? rocblas_initialization_hpl :
static_cast<rocblas_initialization>(-1);
// clang-format on
}

inline rocblas_datatype string2rocblas_datatype(const std::string& value)
{
// clang-format off
return
value == "f16_r" || value == "h" ? rocblas_datatype_f16_r :
value == "f32_r" || value == "s" ? rocblas_datatype_f32_r :
Expand All @@ -227,7 +225,7 @@ inline rocblas_datatype string2rocblas_datatype(const std::string& value)
value == "u8_c" ? rocblas_datatype_u8_c :
value == "u32_c" ? rocblas_datatype_u32_c :
static_cast<rocblas_datatype>(-1);
// clang-format on
}
// clang-format on

#endif
8 changes: 2 additions & 6 deletions clients/include/testing_gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,8 @@ void testing_gemm(const Arguments& arg)
double gpu_time_used, cpu_time_used;
double rocblas_gflops, cblas_gflops;
double rocblas_error = 0.0;
#ifdef USE_TENSILE_HOST
const char* host_lib_path = arg.host_lib_path;
rocblas_local_handle handle(host_lib_path);
#else
rocblas_local_handle handle;
#endif
rocblas_local_handle handle;

rocblas_int A_row = transA == rocblas_operation_none ? M : K;
rocblas_int A_col = transA == rocblas_operation_none ? K : M;
rocblas_int B_row = transB == rocblas_operation_none ? K : N;
Expand Down
35 changes: 19 additions & 16 deletions clients/include/unit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,34 +20,37 @@
#define UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, UNIT_ASSERT_EQ)
#define UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, UNIT_ASSERT_EQ)
#else
// clang-format off
#define UNIT_CHECK(M, N, batch_count, lda, strideA, hCPU, hGPU, UNIT_ASSERT_EQ) \
do \
{ \
for(size_t k = 0; k < batch_count; k++) \
for(size_t j = 0; j < N; j++) \
for(size_t i = 0; i < M; i++) \
if (rocblas_isnan(hCPU[i + j * lda + k * strideA])) { \
if(rocblas_isnan(hCPU[i + j * lda + k * strideA])) \
{ \
ASSERT_TRUE(rocblas_isnan(hGPU[i + j * lda + k * strideA])); \
} else { \
} \
else \
{ \
UNIT_ASSERT_EQ(hCPU[i + j * lda + k * strideA], \
hGPU[i + j * lda + k * strideA]); \
} \
} while(0)
#define UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, UNIT_ASSERT_EQ) \
do \
{ \
for(size_t k = 0; k < batch_count; k++) \
for(size_t j = 0; j < N; j++) \
for(size_t i = 0; i < M; i++) \
if (rocblas_isnan(hCPU[k][i + j * lda])) { \
ASSERT_TRUE(rocblas_isnan(hGPU[k][i + j * lda])); \
} else { \
UNIT_ASSERT_EQ(hCPU[k][i + j * lda], \
hGPU[k][i + j * lda]); \
} \
#define UNIT_CHECK_B(M, N, batch_count, lda, hCPU, hGPU, UNIT_ASSERT_EQ) \
do \
{ \
for(size_t k = 0; k < batch_count; k++) \
for(size_t j = 0; j < N; j++) \
for(size_t i = 0; i < M; i++) \
if(rocblas_isnan(hCPU[k][i + j * lda])) \
{ \
ASSERT_TRUE(rocblas_isnan(hGPU[k][i + j * lda])); \
} \
else \
{ \
UNIT_ASSERT_EQ(hCPU[k][i + j * lda], hGPU[k][i + j * lda]); \
} \
} while(0)
// clang-format on
#endif

#define ASSERT_HALF_EQ(a, b) ASSERT_FLOAT_EQ(half_to_float(a), half_to_float(b))
Expand Down
6 changes: 0 additions & 6 deletions clients/include/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ class rocblas_local_handle
{
rocblas_create_handle(&handle);
}
#ifdef USE_TENSILE_HOST
rocblas_local_handle(const char* lib_path)
{
rocblas_create_host_handle(&handle, lib_path);
}
#endif
~rocblas_local_handle()
{
rocblas_destroy_handle(handle);
Expand Down
4 changes: 1 addition & 3 deletions library/include/rocblas-auxiliary.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ extern "C" {
/*! \brief create handle
*/
ROCBLAS_EXPORT rocblas_status rocblas_create_handle(rocblas_handle* handle);
#ifdef USE_TENSILE_HOST
ROCBLAS_EXPORT rocblas_status rocblas_create_host_handle(rocblas_handle* handle, const char*);
#endif

/*! \brief destroy handle
*/
ROCBLAS_EXPORT rocblas_status rocblas_destroy_handle(rocblas_handle handle);
Expand Down
Loading