Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
5a107fa
Merge pull request #690 from amcamd/master
amcamd Sep 11, 2019
5c6dce4
Changed timeout from hours to minutes (#699)
amdkila Sep 13, 2019
ad53fd9
set clang include directory, fix for centos build error
amcamd Sep 13, 2019
98c58bc
Merge pull request #700 from amcamd/fix_centos
amcamd Sep 14, 2019
bb08062
hot fix to restore loading of DGEMM replacement kernels (#701)
zaliu Sep 17, 2019
6bf3b50
SLES support (#704)
saadrahim Sep 17, 2019
5471329
BF16 replacement kernels (#705)
zaliu Sep 19, 2019
ab003e4
Restore usebeta1 logic (#707)
zaliu Sep 19, 2019
45dce72
Supporting clang10 for SLES (#708)
saadrahim Sep 19, 2019
f19f5ae
Batched syr (#727)
TorreZuk Oct 4, 2019
88d4409
more BF16 TN sizes
zaliu Oct 5, 2019
1ae458c
Refactoring
Oct 5, 2019
ab30f40
Move tensile_host.hpp from public C API to private C++ implementation
Oct 5, 2019
b40411e
Work around missing complex; fix formatting
Oct 5, 2019
c6f375c
Remove --lib option and argument; make use of legacy handle API inste…
Oct 5, 2019
676b65a
gf908 BF16 TN 512x512x512 known issue
zaliu Oct 6, 2019
24abbf1
Merge pull request #740 from zaliu/bf16_more_sizes
zaliu Oct 7, 2019
a6d8347
Enable SLES packaging (#719)
saadrahim Oct 7, 2019
f7dc437
Refactor Ger and Gemv (#735)
mahmoodw Oct 7, 2019
d7d22db
Map values to value categories currently represented as double
Oct 7, 2019
ba0560b
Rot(m)(g) batched and strided_batched (#737)
daineAMD Oct 7, 2019
e6289b2
SWDEV 203994 (#743)
leekillough Oct 8, 2019
f394bba
merge master into develop before ROCm2.10
amcamd Oct 8, 2019
6ce0072
version for master branch release
amcamd Oct 8, 2019
65cc947
version for develop branch release
amcamd Oct 8, 2019
72c3b96
update Tensile package number
amcamd Oct 9, 2019
002c51f
Fixing SLES tests LD_LIBRARY_PATH and refactoring tests (#741)
saadrahim Oct 9, 2019
6be7cf6
Merge pull request #745 from amcamd/develop
amcamd Oct 9, 2019
1dbd838
New Winograd kernels added (#742)
amdkila Oct 9, 2019
d4b442a
Merge remote-tracking branch 'Bill/new_client_integration_2' into New…
Oct 9, 2019
2ded920
Merge remote-tracking branch 'ROCm/develop' into NewTensileClient
Oct 9, 2019
1037847
Merge remote-tracking branch 'Bill/new_client_integration_2' into New…
Oct 9, 2019
79f825f
Merge remote-tracking branch 'Bill/new_client_integration_2' into New…
Oct 9, 2019
eeed681
Changing Gemv and Ger stride type (#747)
mahmoodw Oct 9, 2019
c480415
Handle spaces and newline (#748)
leekillough Oct 11, 2019
6a824b9
Fix GEMM for half type
Oct 12, 2019
1252f1c
Tuned Shakespeare kernels (#749)
amdkila Oct 15, 2019
6250318
Add complex conjugate support.
Oct 16, 2019
732c952
Refactoring classes to be simpler
Oct 16, 2019
1c4f95a
Fix rocblas_half
Oct 16, 2019
18c12fd
Merge remote-tracking branch 'Bill/new_client_integration_2' into New…
Oct 17, 2019
d0cb9ea
Merge remote-tracking branch 'ROCm/develop' into NewTensileClient
Oct 17, 2019
51edc81
Merge remote-tracking branch 'Bill/new_client_integration_2' into New…
Oct 18, 2019
c1f93f3
Cleanup source
Oct 19, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions library/src/blas3/Tensile/gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,24 +383,6 @@ hipError_t call_tensile(const T* alpha,
rocblas_int sizeL,
rocblas_handle handle)
{
// Currently alpha and beta can only be single values as
// tensile does not support arrays for scalars yet.
#ifndef NDEBUG
std::cout << "Solution Name: "
<< tensileGetSolutionName<T>(trans_a,
trans_b,
strideC1,
strideC2,
strideA1,
strideA2,
strideB1,
strideB2,
sizeI,
sizeJ,
sizeK,
sizeL)
<< std::endl;
#endif

// Collect alpha / beta (either from host or device).
// Tensile doesn't support arrays of scalars for now, so we must handle
Expand Down
42 changes: 19 additions & 23 deletions library/src/tensile_host.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,52 +61,48 @@ auto create_gemm_contraction_problem_strided_batched(rocblas_operation trans_a,
{
auto transposeA = trans_a != rocblas_operation_none;
auto transposeB = trans_b != rocblas_operation_none;
auto dt = tensile_datatype<T>;

auto dt = tensile_datatype<T>;
Tensile::ContractionProblem::FreeIndices free{2};
Tensile::ContractionProblem::BoundIndices bound{1};
Tensile::ContractionProblem::BatchIndices batch{1};

Tensile::ContractionProblem::FreeIndices free(2);
Tensile::ContractionProblem::BoundIndices bound(1);
Tensile::ContractionProblem::BatchIndices batch(1);

free[0].isA=true;
free[0].isA = true;
free[0].i = free[0].c = free[0].d = 0;
free[1].isA=false;
free[1].isA = false;
free[1].i = free[1].c = free[1].d = 1;

batch[0].a = batch[0].b = batch[0].c = batch[0].d = 2;

Tensile::TensorDescriptor a, b, c, d;
Tensile::TensorDescriptor a, b;

if(transposeA)
{
a = Tensile::TensorDescriptor(dt, {k, m, batchSize}, {1, ld_a, stride_a});
free[0].i = 1;
a = {dt, {k, m, batchSize}, {1, ld_a, stride_a}};
free[0].i = 1;
bound[0].a = 0;
}
else
{
a = Tensile::TensorDescriptor(dt, {m, k, batchSize}, {1, ld_a, stride_a});
free[0].i = 0;
a = {dt, {m, k, batchSize}, {1, ld_a, stride_a}};
free[0].i = 0;
bound[0].a = 1;
}

if(transposeB)
{
b = Tensile::TensorDescriptor(dt, {n, k, batchSize}, {1, ld_b, stride_b});
free[1].i = 0;
b = {dt, {n, k, batchSize}, {1, ld_b, stride_b}};
free[1].i = 0;
bound[0].b = 1;
}
else
{
b = Tensile::TensorDescriptor(dt, {k, n, batchSize}, {1, ld_b, stride_b});
free[1].i = 1;
b = {dt, {k, n, batchSize}, {1, ld_b, stride_b}};
free[1].i = 1;
bound[0].b = 0;
}

c = Tensile::TensorDescriptor(dt, {m, n, batchSize}, {1, ld_c, stride_c});
d = Tensile::TensorDescriptor(dt, {m, n, batchSize}, {1, ld_c, stride_c});

Tensile::TensorOps nop;
Tensile::TensorDescriptor c{dt, {m, n, batchSize}, {1, ld_c, stride_c}};

Tensile::TensorOps aops;
if(is_complex<T> && trans_a == rocblas_operation_conjugate_transpose)
Expand All @@ -116,8 +112,8 @@ auto create_gemm_contraction_problem_strided_batched(rocblas_operation trans_a,
if(is_complex<T> && trans_b == rocblas_operation_conjugate_transpose)
bops = {Tensile::TensorOp::Type::ComplexConjugate};

Tensile::ContractionProblem problem(a, aops, b, bops, c, nop, d, nop,
free, batch, bound, value_category(beta));
Tensile::ContractionProblem problem{
a, aops, b, bops, c, {}, c, {}, free, batch, bound, value_category(beta)};

return problem;
}
Expand All @@ -140,6 +136,7 @@ auto create_gemm_contraction_problem(rocblas_operation trans_a,
{
auto transposeA = trans_a != rocblas_operation_none;
auto transposeB = trans_b != rocblas_operation_none;
auto dt = tensile_datatype<T>;

Tensile::ContractionProblem::FreeIndices freeIndex(2);
Tensile::ContractionProblem::BoundIndex boundIndex;
Expand All @@ -150,7 +147,6 @@ auto create_gemm_contraction_problem(rocblas_operation trans_a,
freeIndex[1].i = freeIndex[1].c = freeIndex[1].d = 1;

Tensile::TensorDescriptor a, b;
auto dt = tensile_datatype<T>;

if(transposeA)
{
Expand Down