Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,82 @@ void syevd_heevd_eig7_initData(const rocblas_handle handle,
CHECK_HIP_ERROR(dA.transfer_from(hA));
}
}
template <bool CPU, bool GPU, typename T>
void syevd_heevd_eig7_initData_gpu(const rocblas_handle handle,
const rocblas_evect evect,
const rocblas_int n,
device_strided_batch_vector<T>& dA,
const rocblas_int lda,
const rocblas_int bc,
host_strided_batch_vector<T>& hA,
std::vector<T>& A,
bool test = true)
{
if(bc > 1)
{
syevd_heevd_eig7_initData<CPU, GPU>(handle, evect, n, dA, lda, bc, hA, A, test);
}

else
{
using S = decltype(std::real(T{}));

if(CPU)
{
// generates spectrum
S eps = std::numeric_limits<S>::epsilon();
for(auto i = 0; i < n - 1; ++i)
hA[0][i + i * lda] = (i + 1) * eps;
hA[0][n - 1 + (n - 1) * lda] = 1;
CHECK_HIP_ERROR(dA.transfer_from(hA));

// generates orthogonal matrix
rocblas_int n2 = n * n;
host_strided_batch_vector<T> hQ(n2, 1, n2, 1);
device_strided_batch_vector<T> dQ(n2, 1, n2, 1);
device_strided_batch_vector<T> dipiv(n, 1, n, 1);
rocblas_init<T>(hQ, true);
CHECK_HIP_ERROR(dQ.transfer_from(hQ));
rocsolver_geqr2_geqrf(false, true, handle, n, n, dQ.data(), n, n2, dipiv.data(), n, 1);

// generates matrix with given spectrum
rocsolver_ormxr_unmxr(true, handle, rocblas_side_left, rocblas_operation_transpose, n,
n, n, dQ.data(), n, dipiv.data(), dA.data(), lda);
rocsolver_ormxr_unmxr(true, handle, rocblas_side_right, rocblas_operation_none, n, n, n,
dQ.data(), n, dipiv.data(), dA.data(), lda);
CHECK_HIP_ERROR(hA.transfer_from(dA));

// make copy of original data to test vectors if required
if(test && evect == rocblas_evect_original)
{
for(rocblas_int i = 0; i < n; i++)
{
for(rocblas_int j = 0; j < n; j++)
A[i + j * lda] = hA[0][i + j * lda];
}
}
}

if(GPU)
{
// now copy to the GPU
CHECK_HIP_ERROR(dA.transfer_from(hA));
}
}
}
template <bool CPU, bool GPU, typename T>
void syevd_heevd_eig7_initData_gpu(const rocblas_handle handle,
const rocblas_evect evect,
const rocblas_int n,
device_batch_vector<T>& dA,
const rocblas_int lda,
const rocblas_int bc,
host_batch_vector<T>& hA,
std::vector<T>& A,
bool test = true)
{
syevd_heevd_eig7_initData<CPU, GPU>(handle, evect, n, dA, lda, bc, hA, A, test);
}

// Creates an `n` by `n` tridiagonal, Wilkinson matrix, which is formed as follows:
//
Expand Down Expand Up @@ -482,9 +558,13 @@ void syevd_heevd_initData(const rocblas_handle handle,
std::vector<T>& A,
bool test = true)
{
#define USE_GPU true
if((std::getenv("TEST_EIG7") != nullptr) || (std::getenv("SYEVD_TEST_EIG7") != nullptr))
{
syevd_heevd_eig7_initData<CPU, GPU>(handle, evect, n, dA, lda, bc, hA, A, test);
if(USE_GPU)
syevd_heevd_eig7_initData_gpu<CPU, GPU>(handle, evect, n, dA, lda, bc, hA, A, test);
else
syevd_heevd_eig7_initData<CPU, GPU>(handle, evect, n, dA, lda, bc, hA, A, test);
Comment on lines +561 to +567
}
else if((std::getenv("TEST_WILKINSON") != nullptr)
|| (std::getenv("SYEVD_TEST_WILKINSON") != nullptr))
Expand Down
38 changes: 18 additions & 20 deletions projects/rocsolver/library/src/auxiliary/rocauxiliary_stedc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,43 +61,41 @@ rocblas_status rocsolver_stedc_impl(rocblas_handle handle,
rocblas_int batch_count = 1;

// memory workspace sizes:
// size for lasrt stack/stedc workspace
size_t size_work_stack;
// size for temporary computations
size_t size_tempvect, size_tempgemm;
size_t size_tempvect, size_workSvec, size_workStmp;
// size for pointers to workspace (batched case)
size_t size_workArr;
// size for vector with positions of split blocks
size_t size_splits_map;
// size for vector with positions of split blocks and different indices
size_t size_workInt;
// size for temporary diagonal and z vectors.
size_t size_tmpz;
rocsolver_stedc_getMemorySize<false, T, S>(evect, n, batch_count, &size_work_stack,
&size_tempvect, &size_tempgemm, &size_tmpz,
&size_splits_map, &size_workArr);
size_t size_workSz;
rocsolver_stedc_getMemorySize<false, T, S>(evect, n, batch_count, &size_tempvect,
&size_workSvec, &size_workStmp, &size_workSz,
&size_workInt, &size_workArr);

if(rocblas_is_device_memory_size_query(handle))
return rocblas_set_optimal_device_memory_size(handle, size_work_stack, size_tempvect,
size_tempgemm, size_tmpz, size_splits_map,
return rocblas_set_optimal_device_memory_size(handle, size_tempvect, size_workSvec,
size_workStmp, size_workSz, size_workInt,
size_workArr);

// memory workspace allocation
void *work_stack, *tempvect, *tempgemm, *tmpz, *splits_map, *workArr;
rocblas_device_malloc mem(handle, size_work_stack, size_tempvect, size_tempgemm, size_tmpz,
size_splits_map, size_workArr);
void *tempvect, *workSvec, *workStmp, *workSz, *workInt, *workArr;
rocblas_device_malloc mem(handle, size_tempvect, size_workSvec, size_workStmp, size_workSz,
size_workInt, size_workArr);
if(!mem)
return rocblas_status_memory_error;

work_stack = mem[0];
tempvect = mem[1];
tempgemm = mem[2];
tmpz = mem[3];
splits_map = mem[4];
tempvect = mem[0];
workSvec = mem[1];
workStmp = mem[2];
workSz = mem[3];
workInt = mem[4];
workArr = mem[5];

// execution
return rocsolver_stedc_template<false, false, T>(
handle, evect, n, D, shiftD, strideD, E, shiftE, strideE, C, shiftC, ldc, strideC, info,
batch_count, work_stack, (S*)tempvect, (S*)tempgemm, (S*)tmpz, (rocblas_int*)splits_map,
batch_count, (S*)tempvect, workSvec, (S*)workStmp, (S*)workSz, (rocblas_int*)workInt,
(S**)workArr);
}

Expand Down
Loading
Loading