File tree Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Expand file tree Collapse file tree 2 files changed +11
-3
lines changed Original file line number Diff line number Diff line change @@ -227,6 +227,10 @@ typedef float2 dfloat2;
227227#define RDNA2
228228#endif
229229
230+ #if defined(__gfx1010__) || defined(__gfx1012__)
231+ #define RDNA1
232+ #endif
233+
230234#ifndef __has_builtin
231235 #define __has_builtin (x ) 0
232236#endif
Original file line number Diff line number Diff line change @@ -61,12 +61,16 @@ static constexpr __device__ int get_mmq_x_max_device() {
6161}
6262
6363static constexpr int get_mmq_y_host (const int cc) {
64- return int8_mma_available (cc) || cc >= CC_VOLTA ? 128 : 64 ;
64+ return cc >= CC_OFFSET_AMD ? (cc == CC_RDNA1 ? 64 : 128 ) : ( cc >= CC_VOLTA ? 128 : 64 ) ;
6565}
6666
6767static constexpr __device__ int get_mmq_y_device () {
6868#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
69+ #if defined(RDNA1)
70+ return 64 ;
71+ #else
6972 return 128 ;
73+ #endif // defined RDNA1
7074#else
7175#if __CUDA_ARCH__ >= CC_VOLTA
7276 return 128 ;
@@ -2400,9 +2404,9 @@ static __device__ void mul_mat_q_process_tile(
24002404
24012405template <ggml_type type, int mmq_x, int nwarps, bool need_check>
24022406#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
2403- #if defined(RDNA3) || defined(RDNA2)
2407+ #if defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
24042408 __launch_bounds__ (WARP_SIZE*nwarps, 2 )
2405- #endif // defined(RDNA3) || defined(RDNA2)
2409+ #endif // defined(RDNA3) || defined(RDNA2) || defined(RDNA1)
24062410#else
24072411#if __CUDA_ARCH__ >= CC_VOLTA
24082412 __launch_bounds__ (WARP_SIZE*nwarps, 1 )
You can’t perform that action at this time.
0 commit comments