Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions projects/rocsolver/clients/common/auxiliary/testing_bdsqr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T, typename S>
void bdsqr_checkBadArgs(const rocblas_handle handle,
Expand Down Expand Up @@ -434,7 +435,7 @@ void bdsqr_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -451,12 +452,12 @@ void bdsqr_getPerfData(const rocblas_handle handle,
bdsqr_initData<false, true, T>(handle, uplo, n, nv, nu, nc, dD, dE, dV, ldv, dU, ldu, dC,
ldc, dInfo, hD, hE, hV, hU, hC, hInfo, D, E, false);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_bdsqr(handle, uplo, n, nv, nu, nc, dD.data(), dE.data(), dV.data(), ldv,
dU.data(), ldu, dC.data(), ldc, dInfo.data());
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T>
Expand Down Expand Up @@ -627,7 +628,7 @@ void testing_bdsqr(Arguments& argus)
}

// collect performance data
if(argus.timing)
if(argus.timing && hot_calls > 0)
{
host_strided_batch_vector<T> hV(size_V, 1, size_V, 1);
host_strided_batch_vector<T> hU(size_U, 1, size_U, 1);
Expand Down
11 changes: 6 additions & 5 deletions projects/rocsolver/clients/common/auxiliary/testing_bdsvdx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T, typename U>
void bdsvdx_checkBadArgs(const rocblas_handle handle,
Expand Down Expand Up @@ -392,7 +393,7 @@ void bdsvdx_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -408,12 +409,12 @@ void bdsvdx_getPerfData(const rocblas_handle handle,
{
bdsvdx_initData<false, true, T>(handle, n, dD, dE, hD, hE);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_bdsvdx(handle, uplo, svect, srange, n, dD.data(), dE.data(), vl, vu, il, iu,
dNsv.data(), dS.data(), dZ.data(), ldz, dIfail.data(), dInfo.data());
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T>
Expand Down Expand Up @@ -559,7 +560,7 @@ void testing_bdsvdx(Arguments& argus)
hIfailRes, hInfo, hInfoRes, &max_error);

// collect performance data
if(argus.timing)
if(argus.timing && hot_calls > 0)
bdsvdx_getPerfData<T>(handle, uplo, svect, srange, n, dD, dE, vl, vu, il, iu, dNsv, dS, dZ,
ldz, dIfail, dInfo, hD, hE, hNsv, hS, hZ, hInfo, &gpu_time_used,
&cpu_time_used, hot_calls, argus.profile, argus.profile_kernels,
Expand Down
11 changes: 6 additions & 5 deletions projects/rocsolver/clients/common/auxiliary/testing_labrd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T, typename S, typename U>
void labrd_checkBadArgs(const rocblas_handle handle,
Expand Down Expand Up @@ -291,7 +292,7 @@ void labrd_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -308,12 +309,12 @@ void labrd_getPerfData(const rocblas_handle handle,
labrd_initData<false, true, T>(handle, m, n, nb, dA, lda, dD, dE, dTauq, dTaup, dX, ldx, dY,
ldy, hA, hD, hE, hTauq, hTaup, hX, hY);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_labrd(handle, m, n, nb, dA.data(), lda, dD.data(), dE.data(), dTauq.data(),
dTaup.data(), dX.data(), ldx, dY.data(), ldy);
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T>
Expand Down Expand Up @@ -432,7 +433,7 @@ void testing_labrd(Arguments& argus)
hARes, hD, hE, hTauq, hTaup, hX, hXRes, hY, hYRes, &max_error);

// collect performance data
if(argus.timing)
if(argus.timing && hot_calls > 0)
labrd_getPerfData<T>(handle, m, n, nb, dA, lda, dD, dE, dTauq, dTaup, dX, ldx, dY, ldy, hA,
hD, hE, hTauq, hTaup, hX, hY, &gpu_time_used, &cpu_time_used,
hot_calls, argus.profile, argus.profile_kernels, argus.perf);
Expand Down
11 changes: 6 additions & 5 deletions projects/rocsolver/clients/common/auxiliary/testing_lacgv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T, typename I>
void lacgv_checkBadArgs(const rocblas_handle handle, const I n, T dA, const I inc)
Expand Down Expand Up @@ -149,7 +150,7 @@ void lacgv_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -165,11 +166,11 @@ void lacgv_getPerfData(const rocblas_handle handle,
{
lacgv_initData<false, true, T>(handle, n, dA, inc, hA);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_lacgv(handle, n, dA.data(), inc);
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T, typename I>
Expand Down Expand Up @@ -240,7 +241,7 @@ void testing_lacgv(Arguments& argus)
lacgv_getError<T>(handle, n, dA, inc, hA, hAr, &max_error);

// collect performance data
if(argus.timing)
if(argus.timing && hot_calls > 0)
lacgv_getPerfData<T>(handle, n, dA, inc, hA, &gpu_time_used, &cpu_time_used, hot_calls,
argus.profile, argus.profile_kernels, argus.perf);

Expand Down
11 changes: 6 additions & 5 deletions projects/rocsolver/clients/common/auxiliary/testing_larf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T, typename I>
void larf_checkBadArgs(const rocblas_handle handle,
Expand Down Expand Up @@ -223,7 +224,7 @@ void larf_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -239,11 +240,11 @@ void larf_getPerfData(const rocblas_handle handle,
{
larf_initData<false, true, T>(handle, side, m, n, dx, inc, dt, dA, lda, xx, hx, ht, hA);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_larf(handle, side, m, n, dx.data(), inc, dt.data(), dA.data(), lda);
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T, typename I>
Expand Down Expand Up @@ -343,7 +344,7 @@ void testing_larf(Arguments& argus)
larf_getError<T>(handle, side, m, n, dx, inc, dt, dA, lda, xx, hx, ht, hA, hAr, &max_error);

// collect performance data
if(argus.timing)
if(argus.timing && hot_calls > 0)
larf_getPerfData<T>(handle, side, m, n, dx, inc, dt, dA, lda, xx, hx, ht, hA,
&gpu_time_used, &cpu_time_used, hot_calls, argus.profile,
argus.profile_kernels, argus.perf);
Expand Down
11 changes: 6 additions & 5 deletions projects/rocsolver/clients/common/auxiliary/testing_larfb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T>
void larfb_checkBadArgs(const rocblas_handle handle,
Expand Down Expand Up @@ -351,7 +352,7 @@ void larfb_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -368,12 +369,12 @@ void larfb_getPerfData(const rocblas_handle handle,
larfb_initData<false, true, T>(handle, side, trans, direct, storev, m, n, k, dV, ldv, dT,
ldt, dA, lda, hV, hT, hA, hW, sizeW);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_larfb(handle, side, trans, direct, storev, m, n, k, dV.data(), ldv, dT.data(),
ldt, dA.data(), lda);
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T>
Expand Down Expand Up @@ -488,7 +489,7 @@ void testing_larfb(Arguments& argus)
hV, hT, hA, hAr, &max_error);

// collect performance data
if(argus.timing)
if(argus.timing && hot_calls > 0)
larfb_getPerfData<T>(handle, side, trans, direct, storev, m, n, k, dV, ldv, dT, ldt, dA,
lda, hV, hT, hA, &gpu_time_used, &cpu_time_used, hot_calls,
argus.profile, argus.profile_kernels, argus.perf);
Expand Down
11 changes: 6 additions & 5 deletions projects/rocsolver/clients/common/auxiliary/testing_larfg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T, typename I>
void larfg_checkBadArgs(const rocblas_handle handle, const I n, T da, T dx, const I inc, T dt)
Expand Down Expand Up @@ -174,7 +175,7 @@ void larfg_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -190,11 +191,11 @@ void larfg_getPerfData(const rocblas_handle handle,
{
larfg_initData<false, true, T>(handle, n, da, dx, inc, dt, ha, hx, ht);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_larfg(handle, n, da.data(), dx.data(), inc, dt.data());
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T, typename I>
Expand Down Expand Up @@ -277,7 +278,7 @@ void testing_larfg(Arguments& argus)
larfg_getError<T>(handle, n, da, dx, inc, dt, ha, hx, hxr, ht, &max_error);

// collect performance data
if(argus.timing)
if(argus.timing && hot_calls > 0)
larfg_getPerfData<T>(handle, n, da, dx, inc, dt, ha, hx, ht, &gpu_time_used, &cpu_time_used,
hot_calls, argus.profile, argus.profile_kernels, argus.perf);

Expand Down
11 changes: 6 additions & 5 deletions projects/rocsolver/clients/common/auxiliary/testing_larft.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T>
void larft_checkBadArgs(const rocblas_handle handle,
Expand Down Expand Up @@ -258,7 +259,7 @@ void larft_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -275,11 +276,11 @@ void larft_getPerfData(const rocblas_handle handle,
larft_initData<false, true, T>(handle, direct, storev, n, k, dV, ldv, dt, dT, ldt, hV, ht,
hT, hw, size_w);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_larft(handle, direct, storev, n, k, dV.data(), ldv, dt.data(), dT.data(), ldt);
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T>
Expand Down Expand Up @@ -372,7 +373,7 @@ void testing_larft(Arguments& argus)
&max_error);

// collect performance data
if(argus.timing)
if(argus.timing && hot_calls > 0)
larft_getPerfData<T>(handle, direct, storev, n, k, dV, ldv, dt, dT, ldt, hV, ht, hT,
&gpu_time_used, &cpu_time_used, hot_calls, argus.profile,
argus.profile_kernels, argus.perf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "common/misc/rocsolver.hpp"
#include "common/misc/rocsolver_arguments.hpp"
#include "common/misc/rocsolver_test.hpp"
#include "common/misc/rocsolver_timer.hpp"

template <typename T, typename S>
void lasr_checkBadArgs(const rocblas_handle handle,
Expand Down Expand Up @@ -240,7 +241,7 @@ void lasr_getPerfData(const rocblas_handle handle,
// gpu-lapack performance
hipStream_t stream;
CHECK_ROCBLAS_ERROR(rocblas_get_stream(handle, &stream));
double start;
rocsolver_timer timer;

if(profile > 0)
{
Expand All @@ -256,11 +257,11 @@ void lasr_getPerfData(const rocblas_handle handle,
{
lasr_initData<false, true, T>(handle, side, pivot, direct, m, n, dC, dS, dA, lda, hC, hS, hA);

start = get_time_us_sync(stream);
timer.start(stream);
rocsolver_lasr(handle, side, pivot, direct, m, n, dC.data(), dS.data(), dA.data(), lda);
*gpu_time_used += get_time_us_sync(stream) - start;
timer.end(stream);
}
*gpu_time_used /= hot_calls;
*gpu_time_used = timer.get_combined();
}

template <typename T>
Expand Down
Loading