Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions ggml/src/ggml-opencl/ggml-opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ struct ggml_backend_opencl_context {
bool fp16_support;
bool has_vector_subgroup_broadcast;
bool disable_fusion;

bool adreno_has_large_buffer;
bool adreno_use_large_buffer;
ggml_cl_compiler_version adreno_cl_compiler_version;

int adreno_wave_size;
Expand Down Expand Up @@ -787,6 +790,10 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
" -cl-mad-enable -cl-unsafe-math-optimizations"
" -cl-finite-math-only -cl-fast-relaxed-math";

if (backend_ctx->adreno_use_large_buffer) {
compile_opts += " -qcom-enable-large-buffer ";
}

GGML_LOG_INFO("ggml_opencl: loading OpenCL kernels");

// add
Expand Down Expand Up @@ -3020,6 +3027,8 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
// Check if ext_buffer contains cl_khr_fp16
backend_ctx->fp16_support = strstr(ext_buffer, "cl_khr_fp16") != NULL;
GGML_LOG_INFO("ggml_opencl: device FP16 support: %s\n", backend_ctx->fp16_support ? "true" : "false");
// check Adreno large buffer support
backend_ctx->adreno_has_large_buffer = strstr(ext_buffer, "cl_qcom_large_buffer") != NULL;

// fp16 is required
if (!backend_ctx->fp16_support) {
Expand Down Expand Up @@ -3086,6 +3095,18 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
#endif // GGML_OPENCL_USE_ADRENO_KERNELS

// determine whether to use large buffer for Adreno
backend_ctx->adreno_use_large_buffer = getenv("GGML_OPENCL_ADRENO_USE_LARGE_BUFFER") != nullptr &&
backend_ctx->gpu_family == GPU_FAMILY::ADRENO;
if (backend_ctx->adreno_use_large_buffer) {
if (!backend_ctx->adreno_has_large_buffer) {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer requested but not supported by driver, will use regular buffer\n");
backend_ctx->adreno_use_large_buffer = false;
} else {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer enabled\n");
}
}

cl_int err;

// A local ref of cl_context for convenience
Expand Down Expand Up @@ -5660,6 +5681,11 @@ static ggml_backend_buffer_t ggml_backend_opencl_buffer_type_alloc_buffer(ggml_b

cl_int err;
cl_mem mem = clCreateBuffer(backend_ctx->context, CL_MEM_READ_WRITE, size, NULL, &err);
if (err != CL_SUCCESS && backend_ctx->adreno_use_large_buffer) {
cl_mem_properties props[] = { 0x41A6 /* CL_LARGE_BUFFER_QCOM */, 1, 0 };
mem = clCreateBufferWithProperties(backend_ctx->context, props, CL_MEM_READ_WRITE, size, NULL, &err);
}

if (err != CL_SUCCESS) {
GGML_LOG_INFO("%s: failed to allocate %.2f MiB\n", __func__, size / 1024.0 / 1024.0);
return nullptr;
Expand Down
Loading