Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 86 additions & 69 deletions ggml/src/ggml-opencl/ggml-opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,8 @@ struct ggml_backend_opencl_device_context {
GPU_FAMILY gpu_family = GPU_FAMILY::UNKNOWN;
ADRENO_GPU_GEN adreno_gen = ADRENO_GPU_GEN::ADRENO_UNKNOWN;

std::regex *opfilter = nullptr; // regex of ops to not claim
std::string opfilter_str; // regex string for opfilter
size_t global_mem_size = 0;
};

Expand Down Expand Up @@ -415,8 +417,6 @@ struct ggml_backend_opencl_context {
bool has_qcom_subgroup_shuffle = false; // cl_qcom_subgroup_shuffle
bool disable_fusion;

std::regex *opfilter = nullptr; // regex of ops to not claim

bool adreno_has_large_buffer;
bool adreno_use_large_buffer;
ggml_cl_compiler_version adreno_cl_compiler_version;
Expand All @@ -428,6 +428,8 @@ struct ggml_backend_opencl_context {
size_t image2d_max_width;
size_t image2d_max_height;

cl_device_svm_capabilities svm_caps;

cl_context context;
cl_command_queue queue;

Expand Down Expand Up @@ -3731,6 +3733,68 @@ static std::vector<ggml_backend_device> ggml_opencl_probe_devices(ggml_backend_r
return found_devices;
}

static void ggml_opencl_print_backend_info(ggml_backend_opencl_device_context * dev_ctx) {
GGML_ASSERT(dev_ctx);
GGML_ASSERT(dev_ctx->backend_ctx);

auto * backend_ctx = dev_ctx->backend_ctx;

GGML_LOG_INFO("ggml_opencl: OpenCL driver: %s\n",
backend_ctx->driver_version.c_str());
GGML_LOG_INFO("ggml_opencl: vector subgroup broadcast support: %s\n",
backend_ctx->has_vector_subgroup_broadcast ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: device FP16 support: %s\n",
backend_ctx->fp16_support ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: mem base addr align: %u\n",
backend_ctx->alignment);
GGML_LOG_INFO("ggml_opencl: global mem size: %zu MB\n",
backend_ctx->global_mem_size/1024/1024);
GGML_LOG_INFO("ggml_opencl: max mem alloc size: %zu MB\n",
backend_ctx->max_alloc_size/1024/1024);
GGML_LOG_INFO("ggml_opencl: device max image buffer size (pixels): %lu\n",
backend_ctx->image_max_buffer_size);
GGML_LOG_INFO("ggml_opencl: device max image2d size: %lu x %lu\n",
backend_ctx->image2d_max_width, backend_ctx->image2d_max_height);
GGML_LOG_INFO("ggml_opencl: device max workgroup size: %lu\n",
backend_ctx->max_workgroup_size);
GGML_LOG_INFO("ggml_opencl: SVM coarse grain buffer support: %s\n",
backend_ctx->svm_caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM fine grain buffer support: %s\n",
backend_ctx->svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM fine grain system support: %s\n",
backend_ctx->svm_caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM atomics support: %s\n",
backend_ctx->svm_caps & CL_DEVICE_SVM_ATOMICS ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: cl_qcom_subgroup_shuffle support: %s\n",
backend_ctx->has_qcom_subgroup_shuffle ? "true" : "false");

// Print out configurations
#ifdef GGML_OPENCL_SOA_Q
GGML_LOG_INFO("ggml_opencl: flattening quantized weights representation as struct of arrays (GGML_OPENCL_SOA_Q)\n");
#endif // GGML_OPENCL_SOA_Q

#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
if (backend_ctx->adreno_xmem_gemm_enabled) {
GGML_LOG_INFO("ggml_opencl: Adreno xmem F16xF32 GEMM enabled (temporary weight prepack)\n");
}
#endif // GGML_OPENCL_USE_ADRENO_KERNELS

if (backend_ctx->adreno_use_large_buffer) {
if (!backend_ctx->adreno_has_large_buffer) {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer requested but not supported by driver, will use regular buffer\n");
backend_ctx->adreno_use_large_buffer = false;
} else {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer enabled\n");
}
}

if (dev_ctx->opfilter) {
// for information only, the actual regex object is created in ggml_opencl_is_device_supported
GGML_LOG_INFO("ggml_opencl: opfilter regex = \"%s\"\n", dev_ctx->opfilter_str.c_str());
}
}

// check if device should be accepted
static bool ggml_opencl_is_device_supported(ggml_backend_dev_t dev) {
GGML_ASSERT(dev);
Expand Down Expand Up @@ -3799,6 +3863,13 @@ static bool ggml_opencl_is_device_supported(ggml_backend_dev_t dev) {
}

clGetDeviceInfo(dev_ctx->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &dev_ctx->global_mem_size, NULL);

const char * str_opfilter = getenv("GGML_OPENCL_OPFILTER");
if (str_opfilter) {
dev_ctx->opfilter_str = str_opfilter;
dev_ctx->opfilter = new std::regex(str_opfilter, std::regex_constants::icase);
}

return true;
}

Expand Down Expand Up @@ -3850,15 +3921,12 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
char *driver_version = (char *)alloca(driver_version_str_size + 1);
clGetDeviceInfo(device, CL_DRIVER_VERSION, driver_version_str_size, driver_version, NULL);
driver_version[driver_version_str_size] = '\0';
GGML_LOG_INFO("ggml_opencl: OpenCL driver: %s\n", driver_version);
backend_ctx->driver_version = driver_version;

backend_ctx->adreno_cl_compiler_version = get_adreno_cl_compiler_version(driver_version);
backend_ctx->has_vector_subgroup_broadcast =
(backend_ctx->adreno_cl_compiler_version.type == E031 && backend_ctx->adreno_cl_compiler_version.major >= 47) ||
(backend_ctx->adreno_cl_compiler_version.type == DX && backend_ctx->adreno_cl_compiler_version.major >= 17);
GGML_LOG_INFO("ggml_opencl: vector subgroup broadcast support: %s\n",
backend_ctx->has_vector_subgroup_broadcast ? "true" : "false");

size_t ext_str_size;
clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &ext_str_size);
Expand All @@ -3867,18 +3935,12 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
ext_buffer[ext_str_size] = '\0'; // ensure it is null terminated

// check support for qcom_subgroup_shuffle
if (opencl_c_version.major == 3 && strstr(ext_buffer, "cl_khr_subgroups") != NULL) {
GGML_LOG_INFO("ggml_opencl: cl_khr_subgroups support: true\n");
if (strstr(ext_buffer, "cl_qcom_subgroup_shuffle") != NULL) {
backend_ctx->has_qcom_subgroup_shuffle = true;
}
if (strstr(ext_buffer, "cl_qcom_subgroup_shuffle") != NULL) {
backend_ctx->has_qcom_subgroup_shuffle = true;
}
GGML_LOG_INFO("ggml_opencl: cl_qcom_subgroup_shuffle support: %s\n",
backend_ctx->has_qcom_subgroup_shuffle ? "true" : "false");

// Check if ext_buffer contains cl_khr_fp16
backend_ctx->fp16_support = strstr(ext_buffer, "cl_khr_fp16") != NULL;
GGML_LOG_INFO("ggml_opencl: device FP16 support: %s\n", backend_ctx->fp16_support ? "true" : "false");

// check Adreno large buffer support
backend_ctx->adreno_has_large_buffer = strstr(ext_buffer, "cl_qcom_large_buffer") != NULL;
Expand All @@ -3887,35 +3949,15 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), &base_align_in_bits, NULL));
GGML_ASSERT(base_align_in_bits % 8u == 0);
backend_ctx->alignment = base_align_in_bits / 8u;
GGML_LOG_INFO("ggml_opencl: mem base addr align: %u\n", backend_ctx->alignment);

backend_ctx->global_mem_size = dev_ctx->global_mem_size;
GGML_LOG_INFO("ggml_opencl: global mem size: %zu MB\n", backend_ctx->global_mem_size/1024/1024);

clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &backend_ctx->max_alloc_size, NULL);
GGML_LOG_INFO("ggml_opencl: max mem alloc size: %zu MB\n", backend_ctx->max_alloc_size/1024/1024);

clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &backend_ctx->image_max_buffer_size, NULL);
GGML_LOG_INFO("ggml_opencl: device max image buffer size (pixels): %lu\n", backend_ctx->image_max_buffer_size);

clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &backend_ctx->image2d_max_width, NULL);
clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &backend_ctx->image2d_max_height, NULL);
GGML_LOG_INFO("ggml_opencl: device max image2d size: %lu x %lu\n", backend_ctx->image2d_max_width, backend_ctx->image2d_max_height);

clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &backend_ctx->max_workgroup_size, NULL);
GGML_LOG_INFO("ggml_opencl: device max workgroup size: %lu\n", backend_ctx->max_workgroup_size);

// Check SVM.
cl_device_svm_capabilities svm_caps;
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &svm_caps, 0));
GGML_LOG_INFO("ggml_opencl: SVM coarse grain buffer support: %s\n",
svm_caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM fine grain buffer support: %s\n",
svm_caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM fine grain system support: %s\n",
svm_caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM ? "true" : "false");
GGML_LOG_INFO("ggml_opencl: SVM atomics support: %s\n",
svm_caps & CL_DEVICE_SVM_ATOMICS ? "true" : "false");
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &backend_ctx->max_alloc_size, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof(size_t), &backend_ctx->image_max_buffer_size, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &backend_ctx->image2d_max_width, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &backend_ctx->image2d_max_height, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &backend_ctx->max_workgroup_size, NULL));
CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &backend_ctx->svm_caps, 0));

if (opencl_c_version.major >= 3) {
// Assume it is not available for 3.0, since it is optional in 3.0.
Expand All @@ -3931,36 +3973,15 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {
backend_ctx->non_uniform_workgroups = true;
}

// Print out configurations
#ifdef GGML_OPENCL_SOA_Q
GGML_LOG_INFO("ggml_opencl: flattening quantized weights representation as struct of arrays (GGML_OPENCL_SOA_Q)\n");
#endif // GGML_OPENCL_SOA_Q

#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
GGML_LOG_INFO("ggml_opencl: using kernels optimized for Adreno (GGML_OPENCL_USE_ADRENO_KERNELS)\n");
#endif // GGML_OPENCL_USE_ADRENO_KERNELS

#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
// determine whether to use Adreno xmem GEMM
backend_ctx->adreno_xmem_gemm_enabled = getenv("GGML_OPENCL_ADRENO_XMEM_GEMM") != nullptr &&
backend_ctx->gpu_family == GPU_FAMILY::ADRENO;
if (getenv("GGML_OPENCL_ADRENO_XMEM_GEMM") != nullptr) {
GGML_LOG_INFO("ggml_opencl: Adreno xmem F16xF32 GEMM %s\n",
backend_ctx->adreno_xmem_gemm_enabled ?
"enabled (temporary weight prepack)" : "requested but unsupported by this driver");
}
#endif // GGML_OPENCL_USE_ADRENO_KERNELS
#endif

// determine whether to use large buffer for Adreno
backend_ctx->adreno_use_large_buffer = getenv("GGML_OPENCL_ADRENO_USE_LARGE_BUFFER") != nullptr &&
backend_ctx->gpu_family == GPU_FAMILY::ADRENO;
if (backend_ctx->adreno_use_large_buffer) {
if (!backend_ctx->adreno_has_large_buffer) {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer requested but not supported by driver, will use regular buffer\n");
backend_ctx->adreno_use_large_buffer = false;
} else {
GGML_LOG_INFO("ggml_opencl: Adreno large buffer enabled\n");
}
}

cl_int err;

Expand Down Expand Up @@ -4010,12 +4031,6 @@ static ggml_backend_opencl_context * ggml_cl_init(ggml_backend_dev_t dev) {

backend_ctx->disable_fusion = getenv("GGML_OPENCL_DISABLE_FUSION") != nullptr;

const char * str_opfilter = getenv("GGML_OPENCL_OPFILTER");
if (str_opfilter) {
backend_ctx->opfilter = new std::regex(str_opfilter, std::regex_constants::icase);
GGML_LOG_INFO("ggml_opencl: opfilter regex = \"%s\"\n", str_opfilter);
}

dev_ctx->backend_ctx = backend_ctx.release();
return dev_ctx->backend_ctx;
}
Expand Down Expand Up @@ -4825,7 +4840,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
ggml_backend_opencl_context * backend_ctx = dev_ctx->backend_ctx;

// reject ops that match the opfilter regex
if (backend_ctx->opfilter && std::regex_match(std::string(ggml_op_desc(op)), *backend_ctx->opfilter)) {
if (dev_ctx->opfilter && std::regex_match(std::string(ggml_op_desc(op)), *dev_ctx->opfilter)) {
return false;
}

Expand Down Expand Up @@ -7823,6 +7838,8 @@ static ggml_backend_t ggml_backend_opencl_device_init(ggml_backend_dev_t dev, co
/* .context = */ backend_ctx,
};

ggml_backend_opencl_device_context * dev_ctx = (ggml_backend_opencl_device_context *) dev->context;
ggml_opencl_print_backend_info(dev_ctx);
return backend;

GGML_UNUSED(params);
Expand Down
Loading