-
Couldn't load subscription status.
- Fork 13.5k
Add support for new gfx1200 and gfx1201 targets #12372
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
2d7a1f9
f2872aa
42840e9
d768080
f763866
f18ad77
6b46213
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1216,7 +1216,8 @@ static void ggml_cuda_op_mul_mat_cublas( | |
|
|
||
| CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(id), stream)); | ||
|
|
||
| if (GGML_CUDA_CC_IS_CDNA(cc)) { | ||
| const int compute_capability = ggml_cuda_info().devices[ctx.device].cc; | ||
|
||
| if (GGML_CUDA_CC_IS_CDNA(compute_capability) || GGML_CUDA_CC_IS_RDNA4(compute_capability)) { | ||
|
||
| const float alpha = 1.0f; | ||
| const float beta = 0.0f; | ||
| CUBLAS_CHECK( | ||
|
|
@@ -1759,7 +1760,8 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co | |
| beta = &beta_f32; | ||
| } | ||
|
|
||
| if (GGML_CUDA_CC_IS_CDNA(ggml_cuda_info().devices[ctx.device].cc)) { | ||
| const int compute_capability = ggml_cuda_info().devices[ctx.device].cc; | ||
| if (GGML_CUDA_CC_IS_CDNA(compute_capability) || GGML_CUDA_CC_IS_RDNA4(compute_capability)) { | ||
| cu_compute_type = CUBLAS_COMPUTE_32F; | ||
| alpha = &alpha_f32; | ||
| beta = &beta_f32; | ||
|
|
@@ -1836,7 +1838,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co | |
| } | ||
| #endif | ||
|
|
||
| if (dst->op_params[0] == GGML_PREC_DEFAULT) { | ||
| if (dst->op_params[0] == GGML_PREC_DEFAULT && cu_data_type == CUDA_R_16F) { | ||
| const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(GGML_TYPE_F16); | ||
| to_fp32_cuda(dst_f16.get(), dst_ddf, ne_dst, main_stream); | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.