Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
5a107fa
Merge pull request #690 from amcamd/master
amcamd Sep 11, 2019
5c6dce4
Changed timeout from hours to minutes (#699)
amdkila Sep 13, 2019
ad53fd9
set clang include directory, fix for centos build error
amcamd Sep 13, 2019
98c58bc
Merge pull request #700 from amcamd/fix_centos
amcamd Sep 14, 2019
bb08062
hot fix to restore loading of DGEMM replacement kernels (#701)
zaliu Sep 17, 2019
6bf3b50
SLES support (#704)
saadrahim Sep 17, 2019
5471329
BF16 replacement kernels (#705)
zaliu Sep 19, 2019
ab003e4
Restore usebeta1 logic (#707)
zaliu Sep 19, 2019
45dce72
Supporting clang10 for SLES (#708)
saadrahim Sep 19, 2019
f19f5ae
Batched syr (#727)
TorreZuk Oct 4, 2019
88d4409
more BF16 TN sizes
zaliu Oct 5, 2019
1ae458c
Refactoring
Oct 5, 2019
ab30f40
Move tensile_host.hpp from public C API to private C++ implementation
Oct 5, 2019
b40411e
Work around missing complex; fix formatting
Oct 5, 2019
c6f375c
Remove --lib option and argument; make use of legacy handle API inste…
Oct 5, 2019
676b65a
gf908 BF16 TN 512x512x512 known issue
zaliu Oct 6, 2019
24abbf1
Merge pull request #740 from zaliu/bf16_more_sizes
zaliu Oct 7, 2019
a6d8347
Enable SLES packaging (#719)
saadrahim Oct 7, 2019
f7dc437
Refactor Ger and Gemv (#735)
mahmoodw Oct 7, 2019
d7d22db
Map values to value categories currently represented as double
Oct 7, 2019
ba0560b
Rot(m)(g) batched and strided_batched (#737)
daineAMD Oct 7, 2019
e6289b2
SWDEV 203994 (#743)
leekillough Oct 8, 2019
f394bba
merge master into develop before ROCm2.10
amcamd Oct 8, 2019
6ce0072
version for master branch release
amcamd Oct 8, 2019
65cc947
version for develop branch release
amcamd Oct 8, 2019
72c3b96
update Tensile package number
amcamd Oct 9, 2019
002c51f
Fixing SLES tests LD_LIBRARY_PATH and refactoring tests (#741)
saadrahim Oct 9, 2019
6be7cf6
Merge pull request #745 from amcamd/develop
amcamd Oct 9, 2019
1dbd838
New Winograd kernels added (#742)
amdkila Oct 9, 2019
d4b442a
Merge remote-tracking branch 'Bill/new_client_integration_2' into New…
Oct 9, 2019
2ded920
Merge remote-tracking branch 'ROCm/develop' into NewTensileClient
Oct 9, 2019
1037847
Merge remote-tracking branch 'Bill/new_client_integration_2' into New…
Oct 9, 2019
79f825f
Merge remote-tracking branch 'Bill/new_client_integration_2' into New…
Oct 9, 2019
6a824b9
Fix GEMM for half type
Oct 12, 2019
6250318
Add complex conjugate support.
Oct 16, 2019
732c952
Refactoring classes to be simpler
Oct 16, 2019
1c4f95a
Fix rocblas_half
Oct 16, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions library/src/blas3/Tensile/gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ namespace
hipMemcpy(&beta_h, beta, sizeof(T), hipMemcpyDeviceToHost);
}

TensileHostCall<T> hostCall;
RocblasContractionProblem<T> problem(ContractionProblemType::GEMM,
trans_a,
trans_b,
Expand All @@ -179,7 +178,7 @@ namespace
C,
ld_c);

return callTensileContraction(&problem, handle->host);
return handle->host->runContractionProblem(problem);

#else

Expand Down
3 changes: 1 addition & 2 deletions library/src/blas3/Tensile/gemm_strided_batched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,6 @@ namespace
hipMemcpy(&beta_h, beta, sizeof(T), hipMemcpyDeviceToHost);
}

TensileHostCall<T> hostCall;
RocblasContractionProblem<T> problem(ContractionProblemType::GEMMStridedBatch,
trans_a,
trans_b,
Expand All @@ -217,7 +216,7 @@ namespace
stride_c,
b_c);

return callTensileContraction(&problem, handle->host);
return handle->host->runContractionProblem(problem);

#else
rocblas_status validArgs = validateArgs(handle,
Expand Down
15 changes: 8 additions & 7 deletions library/src/handle.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
/* ************************************************************************
* Copyright 2016-2019 Advanced Micro Devices, Inc.
* ************************************************************************ */
#if BUILD_WITH_TENSILE
#include "Tensile.h"
#endif
#include "handle.h"
#include <cstdio>
#include <cstdlib>
Expand All @@ -10,10 +13,12 @@
******************************************************************************/
_rocblas_handle::_rocblas_handle()
{
#if BUILD_WITH_TENSILE
static int dummy = (tensileInitialize(), 0);
#ifdef USE_TENSILE_HOST
host = createTensileHost();
if(!host)
throw rocblas_status_internal_error;
static TensileHost* hostImpl = createTensileHost();
host = hostImpl;
#endif
#endif

// default device is active device
Expand Down Expand Up @@ -69,10 +74,6 @@ _rocblas_handle::~_rocblas_handle()
}
if(device_memory)
(hipFree)(device_memory);

#ifdef USE_TENSILE_HOST
delete host;
#endif
}

/*******************************************************************************
Expand Down
72 changes: 32 additions & 40 deletions library/src/include/tensile_host.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,41 @@ enum ContractionProblemType
};

template <typename T>
class RocblasContractionProblem
struct RocblasContractionProblem
{
public:
ContractionProblemType problem_type;
rocblas_operation trans_a;
rocblas_operation trans_b;
unsigned long m;
unsigned long n;
unsigned long k;
size_t m;
size_t n;
size_t k;
const T alpha;
const T* A;
const unsigned long ld_a;
unsigned long stride_a;
size_t ld_a;
size_t stride_a;
const T* B;
unsigned long ld_b;
unsigned long stride_b;
size_t ld_b;
size_t stride_b;
const T beta;
T* C;
unsigned long ld_c;
unsigned long stride_c;
unsigned long batch_size;
size_t ld_c;
size_t stride_c;
size_t batch_size;

RocblasContractionProblem(ContractionProblemType problem_type,
rocblas_operation trans_a,
rocblas_operation trans_b,
unsigned long m,
unsigned long n,
unsigned long k,
size_t m,
size_t n,
size_t k,
const T alpha,
const T* A,
unsigned long ld_a,
size_t ld_a,
const T* B,
unsigned long ld_b,
size_t ld_b,
const T beta,
T* C,
unsigned long ld_c)
size_t ld_c)
: problem_type(problem_type)
, trans_a(trans_a)
, trans_b(trans_b)
Expand All @@ -72,21 +71,21 @@ class RocblasContractionProblem
RocblasContractionProblem(ContractionProblemType problem_type,
rocblas_operation trans_a,
rocblas_operation trans_b,
unsigned long m,
unsigned long n,
unsigned long k,
size_t m,
size_t n,
size_t k,
const T alpha,
const T* A,
unsigned long ld_a,
unsigned long stride_a,
size_t ld_a,
size_t stride_a,
const T* B,
unsigned long ld_b,
unsigned long stride_b,
size_t ld_b,
size_t stride_b,
const T beta,
T* C,
unsigned long ld_c,
unsigned long stride_c,
unsigned long batch_size)
size_t ld_c,
size_t stride_c,
size_t batch_size)
: problem_type(problem_type)
, trans_a(trans_a)
, trans_b(trans_b)
Expand All @@ -109,24 +108,17 @@ class RocblasContractionProblem
}
};

class TensileHost
struct TensileHost
{
public:
virtual void initializeHost(const char*) {}
};
template <typename T>
rocblas_status runContractionProblem(const RocblasContractionProblem<T>& problem);

template <typename T>
class TensileHostCall
{
public:
rocblas_status runContractionProblem(RocblasContractionProblem<T>* problem, TensileHost* host);
protected:
TensileHost() = default; // Prevent instantiating this class except as base class
};

TensileHost* createTensileHost();

template <typename T>
rocblas_status callTensileContraction(RocblasContractionProblem<T>* problem, TensileHost* host);

#endif

#endif // __TENSILE_HOST_HPP__
13 changes: 0 additions & 13 deletions library/src/rocblas_auxiliary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
* Copyright 2016-2019 Advanced Micro Devices, Inc.
*
* ************************************************************************ */
#if BUILD_WITH_TENSILE
#include "Tensile.h"
#endif
#include "handle.h"
#include "logging.h"
#include "rocblas-auxiliary.h"
Expand Down Expand Up @@ -71,20 +68,10 @@ extern "C" rocblas_status rocblas_create_handle(rocblas_handle* handle)
// allocate on heap
try
{
#if BUILD_WITH_TENSILE
static int dummy = (tensileInitialize(), 0);
#endif
*handle = new _rocblas_handle();

if((*handle)->layer_mode & rocblas_layer_mode_log_trace)
log_trace(*handle, "rocblas_create_handle");

#ifdef USE_TENSILE_HOST
const char* lib_path = getenv("ROCBLAS_TENSILE_LIBPATH");
if(!lib_path)
lib_path = "/opt/rocm/"; // TODO: Set default path
(*handle)->host->initializeHost(lib_path);
#endif
}
catch(...)
{
Expand Down
Loading