From dd34429afd75e906b2df2a5f73e7ffcb85330cd5 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 9 Jan 2026 14:06:48 +0100 Subject: [PATCH 01/37] ggml: add the ggml-remoting frontend/backend to the build system --- CMakePresets.json | 2 ++ ggml/CMakeLists.txt | 3 +++ ggml/src/CMakeLists.txt | 2 ++ ggml/src/ggml-backend-reg.cpp | 9 +++++++++ 4 files changed, 16 insertions(+) diff --git a/CMakePresets.json b/CMakePresets.json index b5afeb3c0f2..77c654089ab 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -30,6 +30,8 @@ { "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } }, { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } }, { "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } }, + { "name": "remoting_frontend", "hidden": true, "cacheVariables": { "GGML_REMOTING_FRONTEND": "ON" } }, + { "name": "remoting_backend", "hidden": true, "cacheVariables": { "GGML_REMOTING_BACKEND": "ON" } }, { "name": "x64-windows-llvm", "hidden": true, diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 0176ca1ce93..e3776172ab1 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -228,6 +228,8 @@ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU) option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF) option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON) option(GGML_ZDNN "ggml: use zDNN" OFF) +option(GGML_REMOTING_FRONTEND "ggml: use the API Remoting frontend" OFF) +option(GGML_REMOTING_BACKEND "ggml: use the API Remoting backend" OFF) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) @@ -320,6 +322,7 @@ set(GGML_PUBLIC_HEADERS include/ggml-opt.h include/ggml-metal.h include/ggml-rpc.h + include/ggml-remoting-frontend.h include/ggml-sycl.h include/ggml-vulkan.h include/ggml-webgpu.h diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 6192a870466..d2261c02a08 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -451,6 +451,8 @@ ggml_add_backend(HIP) ggml_add_backend(METAL) ggml_add_backend(MUSA) ggml_add_backend(RPC) +ggml_add_backend(RemotingFrontend) +ggml_add_backend(RemotingBackend) ggml_add_backend(SYCL) ggml_add_backend(Vulkan) ggml_add_backend(WebGPU) diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 4181a714ad6..e4d2004c123 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -69,6 +69,10 @@ #include "ggml-rpc.h" #endif +#ifdef GGML_USE_REMOTINGFRONTEND +#include "ggml-remoting-frontend.h" +#endif + #ifdef GGML_USE_CANN #include "ggml-cann.h" #endif @@ -204,6 +208,10 @@ struct ggml_backend_registry { #ifdef GGML_USE_ZDNN register_backend(ggml_backend_zdnn_reg()); #endif +#ifdef GGML_USE_REMOTINGFRONTEND + register_backend(ggml_backend_remoting_frontend_reg()); +#endif + #ifdef GGML_USE_OPENCL register_backend(ggml_backend_opencl_reg()); #endif @@ -620,6 +628,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ggml_backend_load_best("rpc", silent, dir_path); ggml_backend_load_best("sycl", silent, dir_path); ggml_backend_load_best("vulkan", silent, dir_path); + ggml_backend_load_best("remotingfrontend", silent, dir_path); ggml_backend_load_best("opencl", silent, dir_path); ggml_backend_load_best("hexagon", silent, dir_path); ggml_backend_load_best("musa", silent, dir_path); From 93245a3d2690a62b9730402bb2b5f17b0542bf19 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 9 Jan 2026 14:07:35 +0100 Subject: [PATCH 02/37] ggml-remotingfrontend: guest-side backend for API Remoting acceleration --- ggml/include/ggml-remoting-frontend.h | 16 + ggml/src/ggml-remotingfrontend/CMakeLists.txt | 42 + .../apir_cs_ggml-rpc-front.cpp | 87 + .../ggml-backend-buffer-type.cpp | 100 ++ .../ggml-backend-buffer.cpp | 110 ++ .../ggml-backend-device.cpp | 155 ++ .../ggml-backend-reg.cpp | 150 ++ .../ggml-remotingfrontend/ggml-backend.cpp | 80 + .../ggml-remoting-frontend.cpp | 26 + .../src/ggml-remotingfrontend/ggml-remoting.h | 75 + .../ggmlremoting_functions.yaml | 168 ++ .../ggml-remotingfrontend/include/apir_hw.h | 9 + .../include/drm-uapi/drm.h | 1408 +++++++++++++++++ .../include/drm-uapi/virtgpu_drm.h | 276 ++++ .../ggml-remotingfrontend/include/venus_hw.h | 74 + .../regenerate_remoting.py | 278 ++++ ggml/src/ggml-remotingfrontend/virtgpu-apir.h | 17 + .../virtgpu-forward-backend.cpp | 51 + .../virtgpu-forward-buffer-type.cpp | 131 ++ .../virtgpu-forward-buffer.cpp | 166 ++ .../virtgpu-forward-device.cpp | 209 +++ .../virtgpu-forward-impl.h | 27 + .../virtgpu-forward.gen.h | 32 + .../src/ggml-remotingfrontend/virtgpu-shm.cpp | 105 ++ ggml/src/ggml-remotingfrontend/virtgpu-shm.h | 22 + .../ggml-remotingfrontend/virtgpu-utils.cpp | 186 +++ .../src/ggml-remotingfrontend/virtgpu-utils.h | 133 ++ ggml/src/ggml-remotingfrontend/virtgpu.cpp | 526 ++++++ ggml/src/ggml-remotingfrontend/virtgpu.h | 101 ++ 29 files changed, 4760 insertions(+) create mode 100644 ggml/include/ggml-remoting-frontend.h create mode 100644 ggml/src/ggml-remotingfrontend/CMakeLists.txt create mode 100644 ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp create mode 100644 ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp create mode 100644 ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp create mode 100644 ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp create mode 100644 ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp create mode 100644 ggml/src/ggml-remotingfrontend/ggml-backend.cpp create mode 100644 ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp create mode 100644 ggml/src/ggml-remotingfrontend/ggml-remoting.h create mode 100644 ggml/src/ggml-remotingfrontend/ggmlremoting_functions.yaml create mode 100644 ggml/src/ggml-remotingfrontend/include/apir_hw.h create mode 100644 ggml/src/ggml-remotingfrontend/include/drm-uapi/drm.h create mode 100644 ggml/src/ggml-remotingfrontend/include/drm-uapi/virtgpu_drm.h create mode 100644 ggml/src/ggml-remotingfrontend/include/venus_hw.h create mode 100755 ggml/src/ggml-remotingfrontend/regenerate_remoting.py create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-apir.h create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-shm.h create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu-utils.h create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu.cpp create mode 100644 ggml/src/ggml-remotingfrontend/virtgpu.h diff --git a/ggml/include/ggml-remoting-frontend.h b/ggml/include/ggml-remoting-frontend.h new file mode 100644 index 00000000000..4c7cd585ea4 --- /dev/null +++ b/ggml/include/ggml-remoting-frontend.h @@ -0,0 +1,16 @@ +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend" + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_remoting_frontend_reg(); + +#ifdef __cplusplus +} +#endif diff --git a/ggml/src/ggml-remotingfrontend/CMakeLists.txt b/ggml/src/ggml-remotingfrontend/CMakeLists.txt new file mode 100644 index 00000000000..a4a7b17d6ce --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/CMakeLists.txt @@ -0,0 +1,42 @@ +cmake_minimum_required(VERSION 3.19) +cmake_policy(SET CMP0114 NEW) + +message(STATUS "Enable API Remoting frontend") + +find_package(PkgConfig REQUIRED) +pkg_check_modules(DRM REQUIRED libdrm) + +ggml_add_backend_library(ggml-remotingfrontend + ggml-backend-buffer.cpp + ggml-backend.cpp + ggml-backend-device.cpp + ggml-backend-reg.cpp + ggml-backend-buffer-type.cpp + virtgpu-apir.h + virtgpu-forward.gen.h + virtgpu.cpp + virtgpu-shm.cpp + virtgpu-utils.cpp + virtgpu-forward-device.cpp + virtgpu-forward-buffer-type.cpp + virtgpu-forward-buffer.cpp + virtgpu-forward-backend.cpp + virtgpu-forward-impl.h + apir_cs_ggml-rpc-front.cpp + ../../include/ggml-remoting-frontend.h + ) + + # Debug: Show what pkg-config found + message(STATUS "DRM_INCLUDE_DIRS: ${DRM_INCLUDE_DIRS}") + message(STATUS "DRM_LIBRARIES: ${DRM_LIBRARIES}") + message(STATUS "DRM_CFLAGS_OTHER: ${DRM_CFLAGS_OTHER}") + target_include_directories(ggml-remotingfrontend PUBLIC /usr/include/libdrm/) + +target_link_libraries(ggml-remotingfrontend PUBLIC ${DRM_LIBRARIES}) +target_include_directories(ggml-remotingfrontend PUBLIC ${DRM_INCLUDE_DIRS}) +target_compile_options(ggml-remotingfrontend PUBLIC ${DRM_CFLAGS_OTHER}) + +target_include_directories(ggml-remotingfrontend PUBLIC ./include) +target_include_directories(ggml-remotingfrontend PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + +target_compile_options(ggml-remotingfrontend PRIVATE -std=c++20) diff --git a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp b/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp new file mode 100644 index 00000000000..a338e3cc9e1 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp @@ -0,0 +1,87 @@ +#include +#include +#include +#include + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "../ggml-remotingbackend/shared/apir_cs_rpc.h" + +#include "ggml-remoting.h" + +apir_rpc_tensor +apir_serialize_tensor(const ggml_tensor * tensor) { + apir_rpc_tensor result; + result.id = reinterpret_cast(tensor); + result.type = tensor->type; + if (tensor->buffer) { + ggml_backend_buffer_t buffer = tensor->buffer; + + result.buffer = BUFFER_TO_HOST_HANDLE(buffer); + } else { + result.buffer = 0; + } + for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) { + result.ne[i] = tensor->ne[i]; + result.nb[i] = tensor->nb[i]; + } + result.op = tensor->op; + for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) { + result.op_params[i] = tensor->op_params[i]; + } + result.flags = tensor->flags; + for (uint32_t i = 0; i < GGML_MAX_SRC; i++) { + result.src[i] = reinterpret_cast(tensor->src[i]); + } + result.view_src = reinterpret_cast(tensor->view_src); + result.view_offs = tensor->view_offs; + result.data = reinterpret_cast(tensor->data); + if (tensor->data) { + if (!tensor->buffer) { + FATAL("tensor has data but not buffer :/"); + } + // tensor->data is serialized as an offset to the buffer base address + result.data -= reinterpret_cast(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base); + } + snprintf(result.name, GGML_MAX_NAME, "%s", tensor->name); + return result; +} + +void +apir_add_tensor(ggml_tensor * tensor, std::vector & tensors, std::unordered_set & visited) { + if (tensor == nullptr) { + return; + } + if (visited.find(tensor) != visited.end()) { + return; + } + visited.insert(tensor); + for (int i = 0; i < GGML_MAX_SRC; i++) { + apir_add_tensor(tensor->src[i], tensors, visited); + } + apir_add_tensor(tensor->view_src, tensors, visited); + tensors.push_back(apir_serialize_tensor(tensor)); +} + +void +apir_serialize_graph(const ggml_cgraph * cgraph, std::vector & output) { + uint32_t n_nodes = cgraph->n_nodes; + std::vector tensors; + std::unordered_set visited; + for (uint32_t i = 0; i < n_nodes; i++) { + apir_add_tensor(cgraph->nodes[i], tensors, visited); + } + // serialization format: + // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(apir_rpc_tensor)) | + uint32_t n_tensors = tensors.size(); + int output_size = sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(apir_rpc_tensor); + output.resize(output_size, 0); + memcpy(output.data(), &n_nodes, sizeof(n_nodes)); + for (uint32_t i = 0; i < n_nodes; i++) { + memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t)); + } + uint32_t * out_ntensors = (uint32_t *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t)); + *out_ntensors = n_tensors; + apir_rpc_tensor * out_tensors = (apir_rpc_tensor *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t)); + memcpy(out_tensors, tensors.data(), n_tensors * sizeof(apir_rpc_tensor)); +} diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp new file mode 100644 index 00000000000..7af60209f9c --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp @@ -0,0 +1,100 @@ +#include "ggml-remoting.h" + +static ggml_backend_buffer_t +ggml_backend_remoting_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { + struct virtgpu *gpu = BUFT_TO_GPU(buft); + + struct ggml_backend_remoting_buffer_context *context = (struct ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + if (!context) { + FATAL("Couldn't allocate the buffer context ..."); + } + + context->gpu = gpu; + + bool async__unused, host_buffer__unused, events__unused; + bool buffer_from_host_ptr; + apir_device_get_props(gpu, + &async__unused, + &host_buffer__unused, + &buffer_from_host_ptr, + &events__unused + ); + + if (buffer_from_host_ptr) { + context->apir_context = apir_device_buffer_from_ptr(gpu, size, size); + context->base = context->apir_context.shmem.mmap_ptr; + context->is_from_ptr = true; + } else { + context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size); + context->is_from_ptr = false; + context->base = NULL; + } + + ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft, ggml_backend_remoting_buffer_interface, (void *) context, size); + + return buffer; +} + +static const char * +ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { + struct virtgpu *gpu = BUFT_TO_GPU(buft); + + return apir_buffer_type_get_name(gpu, buft); +} + +static size_t +ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { + struct virtgpu *gpu = BUFT_TO_GPU(buft); + + static size_t align = 0; + + if (align == 0) { + align = apir_buffer_type_get_alignment(gpu, buft); + } + + return align; +} + +static size_t +ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { + struct virtgpu *gpu = BUFT_TO_GPU(buft); + + static size_t max_size = 0; + if (max_size == 0) { + max_size = apir_buffer_type_get_max_size(gpu, buft); + } + + return max_size; +} + +static bool +ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) { + struct virtgpu *gpu = BUFT_TO_GPU(buft); + + return apir_buffer_type_is_host(gpu, buft); +} + +static size_t +ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { + struct virtgpu *gpu = BUFT_TO_GPU(buft); + + return apir_buffer_type_get_alloc_size(gpu, buft, tensor); +} + +const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface = { + /* .get_name = */ ggml_backend_remoting_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_remoting_buffer_type_alloc_buffer, + /* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size, + /* .is_host = */ NULL, +}; + +const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface = { + /* .get_name = */ ggml_backend_remoting_buffer_type_get_name, + /* .alloc_buffer = */ NULL, + /* .get_alignment = */ ggml_backend_remoting_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_remoting_buffer_type_get_max_size, + /* .get_alloc_size = */ ggml_backend_remoting_buffer_type_get_alloc_size, + /* .is_host = */ NULL, +}; diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp new file mode 100644 index 00000000000..87c34d4a188 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp @@ -0,0 +1,110 @@ +#include "ggml-remoting.h" + +#define BUFFER_TO_GPU(name) \ + ((struct ggml_backend_remoting_buffer_context *) (name)->context)->gpu + +struct timer_data get_tensor_timer = {0, 0, 0, "get_tensor"}; +struct timer_data set_tensor_timer = {0, 0, 0, "set_tensor"}; +struct timer_data cpy_tensor_timer = {0, 0, 0, "cpy_tensor"}; + +struct timer_data get_tensor_from_ptr_timer = {0, 0, 0, "get_tensor_from_ptr"}; +struct timer_data set_tensor_from_ptr_timer = {0, 0, 0, "set_tensor_from_ptr"}; + +static void * ggml_backend_remoting_buffer_get_base(ggml_backend_buffer_t buffer) { + struct ggml_backend_remoting_buffer_context *context = (struct ggml_backend_remoting_buffer_context *) buffer->context; + if (context->base) { + return context->base; + } + + context->base = apir_buffer_get_base(BUFFER_TO_GPU(buffer), + BUFFER_TO_APIR_CONTEXT(buffer)); + + return context->base; +} + +static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { + struct virtgpu *gpu = BUFFER_TO_GPU(buffer); + + struct ggml_backend_remoting_buffer_context *context = BUFFER_TO_GGML_CONTEXT(buffer); + if (context->is_from_ptr) { + memcpy((char *)tensor->data + offset, data, size); + } else { + apir_buffer_set_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size); + } + + return; +} + +static void ggml_backend_remoting_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { + struct virtgpu *gpu = BUFFER_TO_GPU(buffer); + struct ggml_backend_remoting_buffer_context *context = BUFFER_TO_GGML_CONTEXT(buffer); + if (context->is_from_ptr) { + memcpy(data, (const char *)tensor->data + offset, size); + } else { + apir_buffer_get_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size); + } +} + +static void ggml_backend_remoting_buffer_set_tensor_from_ptr(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { + UNUSED(buffer); + + memcpy((char *)tensor->data + offset, data, size); + + return; +} + +static void ggml_backend_remoting_buffer_get_tensor_from_ptr(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { + UNUSED(buffer); + + memcpy(data, (const char *)tensor->data + offset, size); +} + +static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { + struct virtgpu *gpu = BUFFER_TO_GPU(buffer); + + bool ret = apir_buffer_cpy_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), src, dst); + + return ret; +} + +static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { + struct virtgpu *gpu = BUFFER_TO_GPU(buffer); + + apir_buffer_clear(gpu, BUFFER_TO_APIR_CONTEXT(buffer), value); + + return; +} + +static void ggml_backend_remoting_buffer_free_buffer(ggml_backend_buffer_t buffer) { + struct virtgpu *gpu = BUFFER_TO_GPU(buffer); + + apir_buffer_free_buffer(gpu, BUFFER_TO_APIR_CONTEXT(buffer)); + + struct ggml_backend_remoting_buffer_context *context = BUFFER_TO_GGML_CONTEXT(buffer); + free(context); + buffer->context = NULL; +} + +const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface = { + /* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer, + /* .get_base = */ ggml_backend_remoting_buffer_get_base, + /* .init_tensor = */ NULL, + /* .memset_tensor = */ NULL, + /* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor, + /* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor, + /* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor, + /* .clear = */ ggml_backend_remoting_buffer_clear, + /* .reset = */ NULL, +}; + +const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface = { + /* .free_buffer = */ ggml_backend_remoting_buffer_free_buffer, + /* .get_base = */ ggml_backend_remoting_buffer_get_base, + /* .init_tensor = */ NULL, + /* .memset_tensor = */ NULL, + /* .set_tensor = */ ggml_backend_remoting_buffer_set_tensor_from_ptr, + /* .get_tensor = */ ggml_backend_remoting_buffer_get_tensor_from_ptr, + /* .cpy_tensor = */ ggml_backend_remoting_buffer_cpy_tensor, + /* .clear = */ ggml_backend_remoting_buffer_clear, + /* .reset = */ NULL, +}; diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp new file mode 100644 index 00000000000..5be945b558c --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp @@ -0,0 +1,155 @@ +#include "ggml-remoting.h" + +static const char * +ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { + struct virtgpu *gpu = DEV_TO_GPU(dev); + + return apir_device_get_name(gpu); +} + +static const char * +ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) { + struct virtgpu *gpu = DEV_TO_GPU(dev); + + return apir_device_get_description(gpu); +} + +static enum ggml_backend_dev_type +ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) { + struct virtgpu *gpu = DEV_TO_GPU(dev); + + static enum ggml_backend_dev_type type; + static bool has_type = false; + if (!has_type) { + has_type = true; + type = (enum ggml_backend_dev_type) apir_device_get_type(gpu); + } + + return type; +} + +static void +ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { + struct virtgpu *gpu = DEV_TO_GPU(dev); + + return apir_device_get_memory(gpu, free, total); +} + +static bool +ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { +#if USE_ALWAYS_TRUE_SUPPORTS_OP == 1 + /* ggml-rpc cheats it like this */ + /* with the current implementation of serialize_tensor, the src/view aren't properly passed */ + UNUSED(dev); + UNUSED(op); + + return true; +#else + struct virtgpu *gpu = DEV_TO_GPU(dev); + + return apir_device_supports_op(gpu, op); +#endif +} + +static bool +ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { + bool supported = buft->device == dev; + + return supported; +} + +static bool +ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { + UNUSED(dev); + UNUSED(op); + + return false; +} + +static void +ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { + props->name = ggml_backend_remoting_device_get_name(dev); + props->description = ggml_backend_remoting_device_get_description(dev); + props->type = ggml_backend_remoting_device_get_type(dev); + ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total); + + struct virtgpu *gpu = DEV_TO_GPU(dev); + apir_device_get_props(gpu, + &props->caps.async, + &props->caps.host_buffer, + &props->caps.buffer_from_host_ptr, + &props->caps.events + ); + + props->caps.buffer_from_host_ptr = false; + props->caps.async = false; + props->caps.events = false; +} + +ggml_backend_buffer_type_t +ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { + struct virtgpu *gpu = DEV_TO_GPU(dev); + + apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); + + static struct ggml_backend_buffer_type buft { + /* .iface = */ ggml_backend_remoting_buffer_type_interface, + /* .device = */ dev, + /* .context = */ (void *) ctx, + }; + + return &buft; +} + +static ggml_backend_buffer_type_t +ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { + struct virtgpu *gpu = DEV_TO_GPU(dev); + + apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); + + static struct ggml_backend_buffer_type buft { + /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, + /* .device = */ dev, + /* .context = */ (void *) ctx, + }; + + return &buft; +} + +static ggml_backend_buffer_t +ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { + + struct virtgpu *gpu = DEV_TO_GPU(dev); + + struct ggml_backend_remoting_buffer_context *context = (struct ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + if (!context) { + FATAL("Couldn't allocate the buffer context ..."); + } + + context->gpu = gpu; + context->apir_context = apir_device_buffer_from_ptr(gpu, size, max_tensor_size); + context->base = ptr; + context->is_from_ptr = true; + + ggml_backend_buffer_t buffer = ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev), ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size); + + return buffer; +} + +const struct ggml_backend_device_i ggml_backend_remoting_device_interface = { + /* .get_name = */ ggml_backend_remoting_device_get_name, + /* .get_description = */ ggml_backend_remoting_device_get_description, + /* .get_memory = */ ggml_backend_remoting_device_get_memory, + /* .get_type = */ ggml_backend_remoting_device_get_type, + /* .get_props = */ ggml_backend_remoting_device_get_props, + /* .init_backend = */ ggml_backend_remoting_device_init, + /* .get_buffer_type = */ ggml_backend_remoting_device_get_buffer_type, + /* .get_host_buffer_type = */ NULL, + /* .buffer_from_host_ptr = */ ggml_backend_remoting_device_buffer_from_ptr, + /* .supports_op = */ ggml_backend_remoting_device_supports_op, + /* .supports_buft = */ ggml_backend_remoting_device_supports_buft, + /* .offload_op = */ ggml_backend_remoting_device_offload_op, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_synchronize = */ NULL, +}; diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp new file mode 100644 index 00000000000..31527eac6cf --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp @@ -0,0 +1,150 @@ +#include +#include + +#include "ggml-remoting.h" + +static struct virtgpu *apir_initialize() { + static struct virtgpu *apir_gpu_instance = NULL; + static bool apir_initialized = false; + + if (apir_initialized) { + return apir_gpu_instance; + } + + apir_gpu_instance = create_virtgpu(); + if (!apir_gpu_instance) { + FATAL("failed to initialize the virtgpu :/"); + } + + apir_initialized = true; + + return apir_gpu_instance; +} + +static int ggml_backend_remoting_get_device_count() { + struct virtgpu *gpu = apir_initialize(); + if (!gpu) { + WARNING("apir_initialize failed :/"); + return 0; + } + + return apir_device_get_count(gpu); +} + +static size_t ggml_backend_remoting_reg_get_device_count(ggml_backend_reg_t reg) { + UNUSED(reg); + + return ggml_backend_remoting_get_device_count(); +} + +static std::vector devices; + +ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device) { + GGML_ASSERT(device < devices.size()); + return devices[device]; +} + +static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { + if (devices.size() > 0) { + INFO("%s: already initialized", __func__); + return; + } + + struct virtgpu *gpu = apir_initialize(); + if (!gpu) { + FATAL("apir_initialize failed :/"); + return; + } + + static bool initialized = false; + + { + static std::mutex mutex; + std::lock_guard lock(mutex); + if (!initialized) { + + for (int i = 0; i < ggml_backend_remoting_get_device_count(); i++) { + ggml_backend_remoting_device_context *ctx = new ggml_backend_remoting_device_context; + char desc[256] = "API Remoting device"; + + ctx->device = i; + ctx->name = GGML_REMOTING_FRONTEND_NAME + std::to_string(i); + ctx->description = desc; + ctx->gpu = gpu; + + ggml_backend_dev_t dev = new ggml_backend_device { + /* .iface = */ ggml_backend_remoting_device_interface, + /* .reg = */ reg, + /* .context = */ ctx, + }; + devices.push_back(dev); + } + initialized = true; + } + } +} + +static ggml_backend_dev_t ggml_backend_remoting_reg_get_device(ggml_backend_reg_t reg, size_t device) { + UNUSED(reg); + + return ggml_backend_remoting_get_device(device); +} + +static const char *ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) { + UNUSED(reg); + + return GGML_REMOTING_FRONTEND_NAME; +} + +static const struct ggml_backend_reg_i ggml_backend_remoting_reg_i = { + /* .get_name = */ ggml_backend_remoting_reg_get_name, + /* .get_device_count = */ ggml_backend_remoting_reg_get_device_count, + /* .get_device = */ ggml_backend_remoting_reg_get_device, + /* .get_proc_address = */ NULL, +}; + + +static void showTime() { + show_timer(&graph_compute_timer); + show_timer(&get_tensor_timer); + show_timer(&set_tensor_timer); + show_timer(&wait_host_reply_timer); + + if (get_tensor_from_ptr_timer.count) { + show_timer(&get_tensor_from_ptr_timer); + show_timer(&set_tensor_from_ptr_timer); + } + + if (cpy_tensor_timer.count) { + show_timer(&cpy_tensor_timer); + } +} + +ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { + struct virtgpu *gpu = apir_initialize(); + if (!gpu) { + FATAL("apir_initialize failed :/"); + return NULL; + } + + static ggml_backend_reg reg = { + /* .api_version = */ GGML_BACKEND_API_VERSION, + /* .iface = */ ggml_backend_remoting_reg_i, + /* .context = */ gpu, + }; + + static bool initialized = false; + if (initialized) { + return ® + } + initialized = true; + + ggml_backend_remoting_reg_init_devices(®); + + int cr = atexit(showTime); + GGML_ASSERT(cr == 0); + + MESSAGE("%s: initialzed", __func__); + + return ® +} diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend.cpp new file mode 100644 index 00000000000..6c6495ac909 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/ggml-backend.cpp @@ -0,0 +1,80 @@ +#include "ggml-remoting.h" + +static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) { + UNUSED(backend); + + return "API Remoting backend"; +} + +static void ggml_backend_remoting_free(ggml_backend_t backend) { + delete backend; +} + +struct timer_data graph_compute_timer = {0, 0, 0, "compute_timer"}; + +static ggml_status ggml_backend_remoting_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { + struct virtgpu *gpu = DEV_TO_GPU(backend->device); + + start_timer(&graph_compute_timer); + + ggml_status status = apir_backend_graph_compute(gpu, cgraph); + + stop_timer(&graph_compute_timer); + + return status; +} + +static void ggml_backend_remoting_graph_optimize(ggml_backend_t backend, ggml_cgraph * cgraph) { + struct virtgpu *gpu = DEV_TO_GPU(backend->device); +#if true + UNUSED(gpu); + UNUSED(cgraph); + + // not working yet +#else + start_timer(&graph_compute_timer); + + apir_backend_graph_optimize(gpu, cgraph); + + stop_timer(&graph_compute_timer); +#endif +} + +static ggml_backend_i ggml_backend_remoting_interface = { + /* .get_name = */ ggml_backend_remoting_get_name, + /* .free = */ ggml_backend_remoting_free, + /* .set_tensor_async = */ NULL, // ggml_backend_remoting_set_tensor_async, + /* .get_tensor_async = */ NULL, // ggml_backend_remoting_get_tensor_async, + /* .cpy_tensor_async = */ NULL, // ggml_backend_remoting_cpy_tensor_async, + /* .synchronize = */ NULL, // ggml_backend_remoting_synchronize, + /* .graph_plan_create = */ NULL, + /* .graph_plan_free = */ NULL, + /* .graph_plan_update = */ NULL, + /* .graph_plan_compute = */ NULL, + /* .graph_compute = */ ggml_backend_remoting_graph_compute, + /* .event_record = */ NULL, + /* .event_wait = */ NULL, + /* .graph_optimize = */ ggml_backend_remoting_graph_optimize, +}; + +static ggml_guid_t ggml_backend_remoting_guid() { + static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x14, 0x03, 0x86, 0x02, 0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b }; + + return &guid; +} + + +ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params) { + UNUSED(params); + + ggml_backend_remoting_device_context * ctx = (ggml_backend_remoting_device_context *)dev->context; + + ggml_backend_t remoting_backend = new ggml_backend { + /* .guid = */ ggml_backend_remoting_guid(), + /* .interface = */ ggml_backend_remoting_interface, + /* .device = */ ggml_backend_reg_dev_get(ggml_backend_remoting_frontend_reg(), ctx->device), + /* .context = */ ctx, + }; + + return remoting_backend; +} diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp b/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp new file mode 100644 index 00000000000..87679fe59a8 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ggml-remoting-frontend.h" +#include "remoting.h" + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" + + + +int ggml_backend_remoting_get_device_count(); + + + + +struct remoting_device_struct { + std::mutex mutex; +}; diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting.h b/ggml/src/ggml-remotingfrontend/ggml-remoting.h new file mode 100644 index 00000000000..9c5f14360d3 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/ggml-remoting.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + +#include "ggml-remoting-frontend.h" + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" + +#include "virtgpu.h" + +// USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes + +#define USE_ALWAYS_TRUE_SUPPORTS_OP 1 +#define USE_METAL_GUEST_SUPPORTS_OP 0 + +#define DEV_TO_GPU(name) \ + ((struct ggml_backend_remoting_device_context *) (name)->context)->gpu + +#define BUFFER_TO_GGML_CONTEXT(name) \ + ((struct ggml_backend_remoting_buffer_context *) (name)->context) + +#define BUFFER_TO_APIR_CONTEXT(name) \ + &((struct ggml_backend_remoting_buffer_context *) (name)->context)->apir_context + +#define BUFFER_TO_HOST_HANDLE(name) \ + ((struct ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle + +#define GET_DEVICE_CONTEXT() \ + (struct ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context + +#define BUFT_TO_GPU(name) \ + ((struct ggml_backend_remoting_device_context *) (name)->device->context)->gpu + +struct ggml_backend_remoting_device_context { + size_t device; + std::string name; + std::string description; + + std::vector> shared_memory; + + struct virtgpu *gpu; +}; + +struct ggml_backend_remoting_buffer_context { + apir_buffer_context_t apir_context; + + struct virtgpu *gpu; + + void *base; + + bool is_from_ptr; +}; + +extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface; +extern const struct ggml_backend_device_i ggml_backend_remoting_device_interface; +extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface; +extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface; +extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface; + +ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device); +ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params); +ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev); + +static inline apir_buffer_type_host_handle_t +ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) { + // in the backend, the buffer handle is the buffer pointer + return (apir_buffer_type_host_handle_t) buft->context; +} + +static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) { + return BUFFER_TO_HOST_HANDLE(buffer); +} diff --git a/ggml/src/ggml-remotingfrontend/ggmlremoting_functions.yaml b/ggml/src/ggml-remotingfrontend/ggmlremoting_functions.yaml new file mode 100644 index 00000000000..d45f0ebb178 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/ggmlremoting_functions.yaml @@ -0,0 +1,168 @@ +# YAML schema for GGML remoting API functions +# This defines the structure for generating the remoting layer code + +# Configuration for the generated files +config: + # Base path for the generated files + base_path: "ggml/src" + + # Header files to update + files: + apir_backend_header: "ggml-remotingbackend/shared/apir_backend.gen.h" + backend_dispatched_header: "ggml-remotingbackend/backend-dispatched.gen.h" + virtgpu_forward_header: "ggml-remotingfrontend/virtgpu-forward.gen.h" + +# Simplified function definitions with grouping and metadata combined +functions: + device: + group_description: "device" + functions: + get_device_count: + # No specific metadata - uses default void return and base params + + get_count: + frontend_return: "int" + + get_name: + frontend_return: "const char *" + + get_description: + frontend_return: "const char *" + + get_type: + frontend_return: "uint32_t" + + get_memory: + frontend_return: "void" + frontend_extra_params: + - "size_t *free" + - "size_t *total" + + supports_op: + frontend_return: "bool" + frontend_extra_params: + - "const ggml_tensor *op" + + get_buffer_type: + frontend_return: "apir_buffer_type_host_handle_t" + + get_props: + frontend_return: "void" + frontend_extra_params: + - "bool *async" + - "bool *host_buffer" + - "bool *buffer_from_host_ptr" + - "bool *events" + + buffer_from_ptr: + frontend_return: "apir_buffer_context_t" + frontend_extra_params: + - "size_t size" + - "size_t max_tensor_size" + + buffer_type: + group_description: "buffer-type" + functions: + get_name: + frontend_return: "const char *" + frontend_extra_params: + - "ggml_backend_buffer_type_t buft" + + get_alignment: + frontend_return: "size_t" + frontend_extra_params: + - "ggml_backend_buffer_type_t buft" + + get_max_size: + frontend_return: "size_t" + frontend_extra_params: + - "ggml_backend_buffer_type_t buft" + + is_host: + frontend_return: "bool" + frontend_extra_params: + - "ggml_backend_buffer_type_t buft" + + alloc_buffer: + frontend_return: "apir_buffer_context_t" + frontend_extra_params: + - "ggml_backend_buffer_type_t buffer_buft" + - "size_t size" + + get_alloc_size: + frontend_return: "size_t" + frontend_extra_params: + - "ggml_backend_buffer_type_t buft" + - "const ggml_tensor *op" + + buffer: + group_description: "buffer" + functions: + get_base: + frontend_return: "void *" + frontend_extra_params: + - "apir_buffer_context_t *buffer_context" + + set_tensor: + frontend_return: "void" + frontend_extra_params: + - "apir_buffer_context_t *buffer_context" + - "ggml_tensor *tensor" + - "const void *data" + - "size_t offset" + - "size_t size" + + get_tensor: + frontend_return: "void" + frontend_extra_params: + - "apir_buffer_context_t *buffer_context" + - "const ggml_tensor *tensor" + - "void *data" + - "size_t offset" + - "size_t size" + + cpy_tensor: + frontend_return: "bool" + frontend_extra_params: + - "apir_buffer_context_t *buffer_context" + - "const ggml_tensor *src" + - "const ggml_tensor *dst" + + clear: + frontend_return: "void" + frontend_extra_params: + - "apir_buffer_context_t *buffer_context" + - "uint8_t value" + + free_buffer: + frontend_return: "void" + frontend_extra_params: + - "apir_buffer_context_t *buffer_context" + + backend: + group_description: "backend" + functions: + graph_compute: + frontend_return: "ggml_status" + frontend_extra_params: + - "ggml_cgraph *cgraph" + + graph_optimize: + frontend_return: "ggml_cgraph *" + frontend_extra_params: + - "ggml_cgraph *cgraph" + enabled: false + +# Naming patterns used for code generation +naming_patterns: + # How to generate enum names + enum_prefix: "APIR_COMMAND_TYPE_" + + # How to generate backend function names + backend_function_prefix: "backend_" + + # How to generate frontend function names + frontend_function_prefix: "apir_" + + # Standard frontend first parameter + frontend_base_param: "struct virtgpu *gpu" diff --git a/ggml/src/ggml-remotingfrontend/include/apir_hw.h b/ggml/src/ggml-remotingfrontend/include/apir_hw.h new file mode 100644 index 00000000000..33af045ca2b --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/include/apir_hw.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +struct virgl_renderer_capset_apir { + uint32_t apir_version; + uint32_t supports_blob_resources; + uint32_t reserved[4]; // For future expansion +}; diff --git a/ggml/src/ggml-remotingfrontend/include/drm-uapi/drm.h b/ggml/src/ggml-remotingfrontend/include/drm-uapi/drm.h new file mode 100644 index 00000000000..4e4f7c2c39e --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/include/drm-uapi/drm.h @@ -0,0 +1,1408 @@ +/* + * Header for the Direct Rendering Manager + * + * Author: Rickard E. (Rik) Faith + * + * Acknowledgments: + * Dec 1999, Richard Henderson , move to generic cmpxchg. + */ + +/* + * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DRM_H_ +#define _DRM_H_ + +#if defined(__linux__) + +#include +#include +typedef unsigned int drm_handle_t; + +#else /* One of the BSDs */ + +#include +#include +#include +typedef int8_t __s8; +typedef uint8_t __u8; +typedef int16_t __s16; +typedef uint16_t __u16; +typedef int32_t __s32; +typedef uint32_t __u32; +typedef int64_t __s64; +typedef uint64_t __u64; +typedef size_t __kernel_size_t; +typedef unsigned long drm_handle_t; + +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ +#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ +#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ +#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ + +#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ +#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ +#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) +#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) +#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) + +typedef unsigned int drm_context_t; +typedef unsigned int drm_drawable_t; +typedef unsigned int drm_magic_t; + +/* + * Cliprect. + * + * \warning: If you change this structure, make sure you change + * XF86DRIClipRectRec in the server as well + * + * \note KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ +struct drm_clip_rect { + unsigned short x1; + unsigned short y1; + unsigned short x2; + unsigned short y2; +}; + +/* + * Drawable information. + */ +struct drm_drawable_info { + unsigned int num_rects; + struct drm_clip_rect *rects; +}; + +/* + * Texture region, + */ +struct drm_tex_region { + unsigned char next; + unsigned char prev; + unsigned char in_use; + unsigned char padding; + unsigned int age; +}; + +/* + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +}; + +/* + * DRM_IOCTL_VERSION ioctl argument type. + * + * \sa drmGetVersion(). + */ +struct drm_version { + int version_major; /**< Major version */ + int version_minor; /**< Minor version */ + int version_patchlevel; /**< Patch level */ + __kernel_size_t name_len; /**< Length of name buffer */ + char *name; /**< Name of driver */ + __kernel_size_t date_len; /**< Length of date buffer */ + char *date; /**< User-space buffer to hold date */ + __kernel_size_t desc_len; /**< Length of desc buffer */ + char *desc; /**< User-space buffer to hold desc */ +}; + +/* + * DRM_IOCTL_GET_UNIQUE ioctl argument type. + * + * \sa drmGetBusid() and drmSetBusId(). + */ +struct drm_unique { + __kernel_size_t unique_len; /**< Length of unique */ + char *unique; /**< Unique name for driver instantiation */ +}; + +struct drm_list { + int count; /**< Length of user-space structures */ + struct drm_version *version; +}; + +struct drm_block { + int unused; +}; + +/* + * DRM_IOCTL_CONTROL ioctl argument type. + * + * \sa drmCtlInstHandler() and drmCtlUninstHandler(). + */ +struct drm_control { + enum { + DRM_ADD_COMMAND, + DRM_RM_COMMAND, + DRM_INST_HANDLER, + DRM_UNINST_HANDLER + } func; + int irq; +}; + +/* + * Type of memory to map. + */ +enum drm_map_type { + _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ + _DRM_REGISTERS = 1, /**< no caching, no core dump */ + _DRM_SHM = 2, /**< shared, cached */ + _DRM_AGP = 3, /**< AGP/GART */ + _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ + _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ +}; + +/* + * Memory mapping flags. + */ +enum drm_map_flags { + _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ + _DRM_READ_ONLY = 0x02, + _DRM_LOCKED = 0x04, /**< shared, cached, locked */ + _DRM_KERNEL = 0x08, /**< kernel requires access */ + _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ + _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ + _DRM_REMOVABLE = 0x40, /**< Removable mapping */ + _DRM_DRIVER = 0x80 /**< Managed by driver */ +}; + +struct drm_ctx_priv_map { + unsigned int ctx_id; /**< Context requesting private mapping */ + void *handle; /**< Handle of map */ +}; + +/* + * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls + * argument type. + * + * \sa drmAddMap(). + */ +struct drm_map { + unsigned long offset; /**< Requested physical address (0 for SAREA)*/ + unsigned long size; /**< Requested physical size (bytes) */ + enum drm_map_type type; /**< Type of memory to map */ + enum drm_map_flags flags; /**< Flags */ + void *handle; /**< User-space: "Handle" to pass to mmap() */ + /**< Kernel-space: kernel-virtual address */ + int mtrr; /**< MTRR slot used */ + /* Private data */ +}; + +/* + * DRM_IOCTL_GET_CLIENT ioctl argument type. + */ +struct drm_client { + int idx; /**< Which client desired? */ + int auth; /**< Is client authenticated? */ + unsigned long pid; /**< Process ID */ + unsigned long uid; /**< User ID */ + unsigned long magic; /**< Magic */ + unsigned long iocs; /**< Ioctl count */ +}; + +enum drm_stat_type { + _DRM_STAT_LOCK, + _DRM_STAT_OPENS, + _DRM_STAT_CLOSES, + _DRM_STAT_IOCTLS, + _DRM_STAT_LOCKS, + _DRM_STAT_UNLOCKS, + _DRM_STAT_VALUE, /**< Generic value */ + _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ + _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ + + _DRM_STAT_IRQ, /**< IRQ */ + _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ + _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ + _DRM_STAT_DMA, /**< DMA */ + _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ + _DRM_STAT_MISSED /**< Missed DMA opportunity */ + /* Add to the *END* of the list */ +}; + +/* + * DRM_IOCTL_GET_STATS ioctl argument type. + */ +struct drm_stats { + unsigned long count; + struct { + unsigned long value; + enum drm_stat_type type; + } data[15]; +}; + +/* + * Hardware locking flags. + */ +enum drm_lock_flags { + _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ + _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ + _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ + _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ + /* These *HALT* flags aren't supported yet + -- they will be used to support the + full-screen DGA-like mode. */ + _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ + _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ +}; + +/* + * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. + * + * \sa drmGetLock() and drmUnlock(). + */ +struct drm_lock { + int context; + enum drm_lock_flags flags; +}; + +/* + * DMA flags + * + * \warning + * These values \e must match xf86drm.h. + * + * \sa drm_dma. + */ +enum drm_dma_flags { + /* Flags for DMA buffer dispatch */ + _DRM_DMA_BLOCK = 0x01, /**< + * Block until buffer dispatched. + * + * \note The buffer may not yet have + * been processed by the hardware -- + * getting a hardware lock with the + * hardware quiescent will ensure + * that the buffer has been + * processed. + */ + _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ + _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ + + /* Flags for DMA buffer request */ + _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ + _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ + _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ +}; + +/* + * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. + * + * \sa drmAddBufs(). + */ +struct drm_buf_desc { + int count; /**< Number of buffers of this size */ + int size; /**< Size in bytes */ + int low_mark; /**< Low water mark */ + int high_mark; /**< High water mark */ + enum { + _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ + _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ + _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ + _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ + _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ + } flags; + unsigned long agp_start; /**< + * Start address of where the AGP buffers are + * in the AGP aperture + */ +}; + +/* + * DRM_IOCTL_INFO_BUFS ioctl argument type. + */ +struct drm_buf_info { + int count; /**< Entries in list */ + struct drm_buf_desc *list; +}; + +/* + * DRM_IOCTL_FREE_BUFS ioctl argument type. + */ +struct drm_buf_free { + int count; + int *list; +}; + +/* + * Buffer information + * + * \sa drm_buf_map. + */ +struct drm_buf_pub { + int idx; /**< Index into the master buffer list */ + int total; /**< Buffer size */ + int used; /**< Amount of buffer in use (for DMA) */ + void *address; /**< Address of buffer */ +}; + +/* + * DRM_IOCTL_MAP_BUFS ioctl argument type. + */ +struct drm_buf_map { + int count; /**< Length of the buffer list */ +#ifdef __cplusplus + void *virt; +#else + void *virtual; /**< Mmap'd area in user-virtual */ +#endif + struct drm_buf_pub *list; /**< Buffer information */ +}; + +/* + * DRM_IOCTL_DMA ioctl argument type. + * + * Indices here refer to the offset into the buffer list in drm_buf_get. + * + * \sa drmDMA(). + */ +struct drm_dma { + int context; /**< Context handle */ + int send_count; /**< Number of buffers to send */ + int *send_indices; /**< List of handles to buffers */ + int *send_sizes; /**< Lengths of data to send */ + enum drm_dma_flags flags; /**< Flags */ + int request_count; /**< Number of buffers requested */ + int request_size; /**< Desired size for buffers */ + int *request_indices; /**< Buffer information */ + int *request_sizes; + int granted_count; /**< Number of buffers granted */ +}; + +enum drm_ctx_flags { + _DRM_CONTEXT_PRESERVED = 0x01, + _DRM_CONTEXT_2DONLY = 0x02 +}; + +/* + * DRM_IOCTL_ADD_CTX ioctl argument type. + * + * \sa drmCreateContext() and drmDestroyContext(). + */ +struct drm_ctx { + drm_context_t handle; + enum drm_ctx_flags flags; +}; + +/* + * DRM_IOCTL_RES_CTX ioctl argument type. + */ +struct drm_ctx_res { + int count; + struct drm_ctx *contexts; +}; + +/* + * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. + */ +struct drm_draw { + drm_drawable_t handle; +}; + +/* + * DRM_IOCTL_UPDATE_DRAW ioctl argument type. + */ +typedef enum { + DRM_DRAWABLE_CLIPRECTS +} drm_drawable_info_type_t; + +struct drm_update_draw { + drm_drawable_t handle; + unsigned int type; + unsigned int num; + unsigned long long data; +}; + +/* + * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. + */ +struct drm_auth { + drm_magic_t magic; +}; + +/* + * DRM_IOCTL_IRQ_BUSID ioctl argument type. + * + * \sa drmGetInterruptFromBusID(). + */ +struct drm_irq_busid { + int irq; /**< IRQ number */ + int busnum; /**< bus number */ + int devnum; /**< device number */ + int funcnum; /**< function number */ +}; + +enum drm_vblank_seq_type { + _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ + _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ + /* bits 1-6 are reserved for high crtcs */ + _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, + _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ + _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ + _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ + _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ + _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ +}; +#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 + +#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) +#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ + _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) + +struct drm_wait_vblank_request { + enum drm_vblank_seq_type type; + unsigned int sequence; + unsigned long signal; +}; + +struct drm_wait_vblank_reply { + enum drm_vblank_seq_type type; + unsigned int sequence; + long tval_sec; + long tval_usec; +}; + +/* + * DRM_IOCTL_WAIT_VBLANK ioctl argument type. + * + * \sa drmWaitVBlank(). + */ +union drm_wait_vblank { + struct drm_wait_vblank_request request; + struct drm_wait_vblank_reply reply; +}; + +#define _DRM_PRE_MODESET 1 +#define _DRM_POST_MODESET 2 + +/* + * DRM_IOCTL_MODESET_CTL ioctl argument type + * + * \sa drmModesetCtl(). + */ +struct drm_modeset_ctl { + __u32 crtc; + __u32 cmd; +}; + +/* + * DRM_IOCTL_AGP_ENABLE ioctl argument type. + * + * \sa drmAgpEnable(). + */ +struct drm_agp_mode { + unsigned long mode; /**< AGP mode */ +}; + +/* + * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. + * + * \sa drmAgpAlloc() and drmAgpFree(). + */ +struct drm_agp_buffer { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for binding / unbinding */ + unsigned long type; /**< Type of memory to allocate */ + unsigned long physical; /**< Physical used by i810 */ +}; + +/* + * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. + * + * \sa drmAgpBind() and drmAgpUnbind(). + */ +struct drm_agp_binding { + unsigned long handle; /**< From drm_agp_buffer */ + unsigned long offset; /**< In bytes -- will round to page boundary */ +}; + +/* + * DRM_IOCTL_AGP_INFO ioctl argument type. + * + * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), + * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), + * drmAgpVendorId() and drmAgpDeviceId(). + */ +struct drm_agp_info { + int agp_version_major; + int agp_version_minor; + unsigned long mode; + unsigned long aperture_base; /* physical address */ + unsigned long aperture_size; /* bytes */ + unsigned long memory_allowed; /* bytes */ + unsigned long memory_used; + + /* PCI information */ + unsigned short id_vendor; + unsigned short id_device; +}; + +/* + * DRM_IOCTL_SG_ALLOC ioctl argument type. + */ +struct drm_scatter_gather { + unsigned long size; /**< In bytes -- will round to page boundary */ + unsigned long handle; /**< Used for mapping / unmapping */ +}; + +/* + * DRM_IOCTL_SET_VERSION ioctl argument type. + */ +struct drm_set_version { + int drm_di_major; + int drm_di_minor; + int drm_dd_major; + int drm_dd_minor; +}; + +/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ +struct drm_gem_close { + /** Handle of the object to be closed. */ + __u32 handle; + __u32 pad; +}; + +/* DRM_IOCTL_GEM_FLINK ioctl argument type */ +struct drm_gem_flink { + /** Handle for the object being named */ + __u32 handle; + + /** Returned global name */ + __u32 name; +}; + +/* DRM_IOCTL_GEM_OPEN ioctl argument type */ +struct drm_gem_open { + /** Name of object being opened */ + __u32 name; + + /** Returned handle for the object */ + __u32 handle; + + /** Returned size of the object */ + __u64 size; +}; + +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ +#define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a :ref:`CRTC index` + * in the high bits of &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ +#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ +#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ +#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. + */ +#define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ +#define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. + */ +#define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ +#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy + * page-flips. + */ +#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. + * + * Note that the cross-driver contract is to merely return a valid size; + * drivers are free to attach another meaning on top, eg. i915 returns the + * maximum plane size. + */ +#define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ +#define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ +#define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ +#define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ +#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. + */ +#define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * :ref:`drm_sync_objects`. + */ +#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 +/** + * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic + * commits. + */ +#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 + +/* DRM_IOCTL_GET_CAP ioctl argument type */ +struct drm_get_cap { + __u64 capability; + __u64 value; +}; + +/** + * DRM_CLIENT_CAP_STEREO_3D + * + * If set to 1, the DRM core will expose the stereo 3D capabilities of the + * monitor by advertising the supported 3D layouts in the flags of struct + * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 3.13. + */ +#define DRM_CLIENT_CAP_STEREO_3D 1 + +/** + * DRM_CLIENT_CAP_UNIVERSAL_PLANES + * + * If set to 1, the DRM core will expose all planes (overlay, primary, and + * cursor) to userspace. + * + * This capability has been introduced in kernel version 3.15. Starting from + * kernel version 3.17, this capability is always supported for all drivers. + */ +#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 + +/** + * DRM_CLIENT_CAP_ATOMIC + * + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. + * + * If the driver doesn't support atomic mode-setting, enabling this capability + * will fail with -EOPNOTSUPP. + * + * This capability has been introduced in kernel version 4.0. Starting from + * kernel version 4.2, this capability is always supported for atomic-capable + * drivers. + */ +#define DRM_CLIENT_CAP_ATOMIC 3 + +/** + * DRM_CLIENT_CAP_ASPECT_RATIO + * + * If set to 1, the DRM core will provide aspect ratio information in modes. + * See ``DRM_MODE_FLAG_PIC_AR_*``. + * + * This capability is always supported for all drivers starting from kernel + * version 4.18. + */ +#define DRM_CLIENT_CAP_ASPECT_RATIO 4 + +/** + * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS + * + * If set to 1, the DRM core will expose special connectors to be used for + * writing back to memory the scene setup in the commit. The client must enable + * &DRM_CLIENT_CAP_ATOMIC first. + * + * This capability is always supported for atomic-capable drivers starting from + * kernel version 4.19. + */ +#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 + +/** + * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT + * + * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and + * virtualbox) have additional restrictions for cursor planes (thus + * making cursor planes on those drivers not truly universal,) e.g. + * they need cursor planes to act like one would expect from a mouse + * cursor and have correctly set hotspot properties. + * If this client cap is not set the DRM core will hide cursor plane on + * those virtualized drivers because not setting it implies that the + * client is not capable of dealing with those extra restictions. + * Clients which do set cursor hotspot and treat the cursor plane + * like a mouse cursor should set this property. + * The client must enable &DRM_CLIENT_CAP_ATOMIC first. + * + * Setting this property on drivers which do not special case + * cursor planes (i.e. non-virtualized drivers) will return + * EOPNOTSUPP, which can be used by userspace to gauge + * requirements of the hardware/drivers they're running on. + * + * This capability is always supported for atomic-capable virtualized + * drivers starting from kernel version 6.6. + */ +#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 + +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +struct drm_set_client_cap { + __u64 capability; + __u64 value; +}; + +#define DRM_RDWR O_RDWR +#define DRM_CLOEXEC O_CLOEXEC +struct drm_prime_handle { + __u32 handle; + + /** Flags.. only applicable for handle->fd */ + __u32 flags; + + /** Returned dmabuf file descriptor */ + __s32 fd; +}; + +struct drm_syncobj_create { + __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) + __u32 flags; +}; + +struct drm_syncobj_destroy { + __u32 handle; + __u32 pad; +}; + +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +struct drm_syncobj_handle { + __u32 handle; + __u32 flags; + + __s32 fd; + __u32 pad; +}; + +struct drm_syncobj_transfer { + __u32 src_handle; + __u32 dst_handle; + __u64 src_point; + __u64 dst_point; + __u32 flags; + __u32 pad; +}; + +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +struct drm_syncobj_timeline_wait { + __u64 handles; + /* wait on specific timeline point for every handles*/ + __u64 points; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; + /** + * @deadline_nsec - fence deadline hint + * + * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing + * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is + * set. + */ + __u64 deadline_nsec; +}; + +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; +}; + + +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + +#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */ +struct drm_syncobj_timeline_array { + __u64 handles; + __u64 points; + __u32 count_handles; + __u32 flags; +}; + + +/* Query current scanout sequence number */ +struct drm_crtc_get_sequence { + __u32 crtc_id; /* requested crtc_id */ + __u32 active; /* return: crtc output is active */ + __u64 sequence; /* return: most recent vblank sequence */ + __s64 sequence_ns; /* return: most recent time of first pixel out */ +}; + +/* Queue event to be delivered at specified sequence. Time stamp marks + * when the first pixel of the refresh cycle leaves the display engine + * for the display + */ +#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */ +#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */ + +struct drm_crtc_queue_sequence { + __u32 crtc_id; + __u32 flags; + __u64 sequence; /* on input, target sequence. on output, actual sequence */ + __u64 user_data; /* user data passed to event */ +}; + +#if defined(__cplusplus) +} +#endif + +#include "drm_mode.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_IOCTL_BASE 'd' +#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) +#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) +#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) +#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) + +#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) +#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) +#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) +#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) +#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) +#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) +#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) +#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +/** + * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. + * + * GEM handles are not reference-counted by the kernel. User-space is + * responsible for managing their lifetime. For example, if user-space imports + * the same memory object twice on the same DRM file description, the same GEM + * handle is returned by both imports, and user-space needs to ensure + * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen + * when a memory object is allocated, then exported and imported again on the + * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception + * and always returns fresh new GEM handles even if an existing GEM handle + * already refers to the same memory object before the IOCTL is performed. + */ +#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) +#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) +#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) +#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) + +#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) +#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) +#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) +#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) +#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control) +#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) +#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) +#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc) +#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) +#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) +#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free) + +#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map) + +#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map) +#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) + +#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) +#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) + +#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) +#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) +#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx) +#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) +#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx) +#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx) +#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) +#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) +#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) +#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) +#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock) +#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) +#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) + +/** + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. + * + * User-space sets &drm_prime_handle.handle with the GEM handle to export and + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in + * &drm_prime_handle.fd. + * + * The export can fail for any driver-specific reason, e.g. because export is + * not supported for this specific GEM handle (but might be for others). + * + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. + */ +#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +/** + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. + * + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to + * import, and gets back a GEM handle in &drm_prime_handle.handle. + * &drm_prime_handle.flags is unused. + * + * If an existing GEM handle refers to the memory object backing the DMA-BUF, + * that GEM handle is returned. Therefore user-space which needs to handle + * arbitrary DMA-BUFs must have a user-space lookup data structure to manually + * reference-count duplicated GEM handles. For more information see + * &DRM_IOCTL_GEM_CLOSE. + * + * The import can fail for any driver-specific reason, e.g. because import is + * only supported for DMA-BUFs allocated on this DRM device. + * + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. + */ +#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) + +#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) +#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31) +#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode) +#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info) +#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer) +#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding) +#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding) + +#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) +#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather) + +#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) + +#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) +#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) + +#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) + +#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) +#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) +#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) +#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) +#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) +#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector) +#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */ +#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */ + +#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) +#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property) +#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) +#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) +#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ +#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) +#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) +#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) + +/** + * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. + * + * KMS dumb buffers provide a very primitive way to allocate a buffer object + * suitable for scanout and map it for software rendering. KMS dumb buffers are + * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb + * buffers are not suitable to be displayed on any other device than the KMS + * device where they were allocated from. Also see + * :ref:`kms_dumb_buffer_objects`. + * + * The IOCTL argument is a struct drm_mode_create_dumb. + * + * User-space is expected to create a KMS dumb buffer via this IOCTL, then add + * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via + * &DRM_IOCTL_MODE_MAP_DUMB. + * + * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. + * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate + * driver preferences for dumb buffers. + */ +#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) +#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) +#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) +#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res) +#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) +#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) +#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) +#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property) +#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2) +#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic) +#define DRM_IOCTL_MODE_CREATEPROPBLOB DRM_IOWR(0xBD, struct drm_mode_create_blob) +#define DRM_IOCTL_MODE_DESTROYPROPBLOB DRM_IOWR(0xBE, struct drm_mode_destroy_blob) + +#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) +#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) +#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) + +#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) +#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) +#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) +#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) + +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) +#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) +#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) + +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Fresh new GEM handles are always + * returned, even if another GEM handle referring to the same memory object + * already exists on the DRM file description. The caller is responsible for + * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same + * new handle will be returned for multiple planes in case they use the same + * memory object. Planes are valid until one has a zero handle -- this can be + * used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + * + * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space + * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately + * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not + * double-close handles which are specified multiple times in the array. + */ +#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) + +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + +/** + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. + * + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept + * alive. When the plane no longer uses the framebuffer (because the + * framebuffer is replaced with another one, or the plane is disabled), the + * framebuffer is cleaned up. + * + * This is useful to implement flicker-free transitions between two processes. + * + * Depending on the threat model, user-space may want to ensure that the + * framebuffer doesn't expose any sensitive user information: closed + * framebuffers attached to a plane can be read back by the next DRM master. + */ +#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) + +/* + * Device specific ioctls should only be in their respective headers + * The device specific ioctl range is from 0x40 to 0x9f. + * Generic IOCTLS restart at 0xA0. + * + * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and + * drmCommandReadWrite(). + */ +#define DRM_COMMAND_BASE 0x40 +#define DRM_COMMAND_END 0xA0 + +/** + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). + * + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. + * + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. + */ +struct drm_event { + __u32 type; + __u32 length; +}; + +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ +#define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ +#define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ +#define DRM_EVENT_CRTC_SEQUENCE 0x03 + +struct drm_event_vblank { + struct drm_event base; + __u64 user_data; + __u32 tv_sec; + __u32 tv_usec; + __u32 sequence; + __u32 crtc_id; /* 0 on older kernels that do not support this */ +}; + +/* Event delivered at sequence. Time stamp marks when the first pixel + * of the refresh cycle leaves the display engine for the display + */ +struct drm_event_crtc_sequence { + struct drm_event base; + __u64 user_data; + __s64 time_ns; + __u64 sequence; +}; + +/* typedef area */ +typedef struct drm_clip_rect drm_clip_rect_t; +typedef struct drm_drawable_info drm_drawable_info_t; +typedef struct drm_tex_region drm_tex_region_t; +typedef struct drm_hw_lock drm_hw_lock_t; +typedef struct drm_version drm_version_t; +typedef struct drm_unique drm_unique_t; +typedef struct drm_list drm_list_t; +typedef struct drm_block drm_block_t; +typedef struct drm_control drm_control_t; +typedef enum drm_map_type drm_map_type_t; +typedef enum drm_map_flags drm_map_flags_t; +typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; +typedef struct drm_map drm_map_t; +typedef struct drm_client drm_client_t; +typedef enum drm_stat_type drm_stat_type_t; +typedef struct drm_stats drm_stats_t; +typedef enum drm_lock_flags drm_lock_flags_t; +typedef struct drm_lock drm_lock_t; +typedef enum drm_dma_flags drm_dma_flags_t; +typedef struct drm_buf_desc drm_buf_desc_t; +typedef struct drm_buf_info drm_buf_info_t; +typedef struct drm_buf_free drm_buf_free_t; +typedef struct drm_buf_pub drm_buf_pub_t; +typedef struct drm_buf_map drm_buf_map_t; +typedef struct drm_dma drm_dma_t; +typedef union drm_wait_vblank drm_wait_vblank_t; +typedef struct drm_agp_mode drm_agp_mode_t; +typedef enum drm_ctx_flags drm_ctx_flags_t; +typedef struct drm_ctx drm_ctx_t; +typedef struct drm_ctx_res drm_ctx_res_t; +typedef struct drm_draw drm_draw_t; +typedef struct drm_update_draw drm_update_draw_t; +typedef struct drm_auth drm_auth_t; +typedef struct drm_irq_busid drm_irq_busid_t; +typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; + +typedef struct drm_agp_buffer drm_agp_buffer_t; +typedef struct drm_agp_binding drm_agp_binding_t; +typedef struct drm_agp_info drm_agp_info_t; +typedef struct drm_scatter_gather drm_scatter_gather_t; +typedef struct drm_set_version drm_set_version_t; + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/ggml/src/ggml-remotingfrontend/include/drm-uapi/virtgpu_drm.h b/ggml/src/ggml-remotingfrontend/include/drm-uapi/virtgpu_drm.h new file mode 100644 index 00000000000..9debb320c34 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/include/drm-uapi/virtgpu_drm.h @@ -0,0 +1,276 @@ +/* + * Copyright 2013 Red Hat + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef VIRTGPU_DRM_H +#define VIRTGPU_DRM_H + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Please note that modifications to all structs defined here are + * subject to backwards-compatibility constraints. + * + * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel + * compatibility Keep fields aligned to their size + */ + +#define DRM_VIRTGPU_MAP 0x01 +#define DRM_VIRTGPU_EXECBUFFER 0x02 +#define DRM_VIRTGPU_GETPARAM 0x03 +#define DRM_VIRTGPU_RESOURCE_CREATE 0x04 +#define DRM_VIRTGPU_RESOURCE_INFO 0x05 +#define DRM_VIRTGPU_TRANSFER_FROM_HOST 0x06 +#define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07 +#define DRM_VIRTGPU_WAIT 0x08 +#define DRM_VIRTGPU_GET_CAPS 0x09 +#define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a +#define DRM_VIRTGPU_CONTEXT_INIT 0x0b + +#define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 +#define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 +#define VIRTGPU_EXECBUF_RING_IDX 0x04 +#define VIRTGPU_EXECBUF_FLAGS (\ + VIRTGPU_EXECBUF_FENCE_FD_IN |\ + VIRTGPU_EXECBUF_FENCE_FD_OUT |\ + VIRTGPU_EXECBUF_RING_IDX |\ + 0) + +struct drm_virtgpu_map { + __u64 offset; /* use for mmap system call */ + __u32 handle; + __u32 pad; +}; + +#define VIRTGPU_EXECBUF_SYNCOBJ_RESET 0x01 +#define VIRTGPU_EXECBUF_SYNCOBJ_FLAGS ( \ + VIRTGPU_EXECBUF_SYNCOBJ_RESET | \ + 0) +struct drm_virtgpu_execbuffer_syncobj { + __u32 handle; + __u32 flags; + __u64 point; +}; + +/* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */ +struct drm_virtgpu_execbuffer { + __u32 flags; + __u32 size; + __u64 command; /* void* */ + __u64 bo_handles; + __u32 num_bo_handles; + __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */ + __u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */ + __u32 syncobj_stride; /* size of @drm_virtgpu_execbuffer_syncobj */ + __u32 num_in_syncobjs; + __u32 num_out_syncobjs; + __u64 in_syncobjs; + __u64 out_syncobjs; +}; + +#define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ +#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ +#define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */ +#define VIRTGPU_PARAM_HOST_VISIBLE 4 /* Host blob resources are mappable */ +#define VIRTGPU_PARAM_CROSS_DEVICE 5 /* Cross virtio-device resource sharing */ +#define VIRTGPU_PARAM_CONTEXT_INIT 6 /* DRM_VIRTGPU_CONTEXT_INIT */ +#define VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs 7 /* Bitmask of supported capability set ids */ +#define VIRTGPU_PARAM_EXPLICIT_DEBUG_NAME 8 /* Ability to set debug name from userspace */ + +struct drm_virtgpu_getparam { + __u64 param; + __u64 value; +}; + +/* NO_BO flags? NO resource flag? */ +/* resource flag for y_0_top */ +struct drm_virtgpu_resource_create { + __u32 target; + __u32 format; + __u32 bind; + __u32 width; + __u32 height; + __u32 depth; + __u32 array_size; + __u32 last_level; + __u32 nr_samples; + __u32 flags; + __u32 bo_handle; /* if this is set - recreate a new resource attached to this bo ? */ + __u32 res_handle; /* returned by kernel */ + __u32 size; /* validate transfer in the host */ + __u32 stride; /* validate transfer in the host */ +}; + +struct drm_virtgpu_resource_info { + __u32 bo_handle; + __u32 res_handle; + __u32 size; + __u32 blob_mem; +}; + +struct drm_virtgpu_3d_box { + __u32 x; + __u32 y; + __u32 z; + __u32 w; + __u32 h; + __u32 d; +}; + +struct drm_virtgpu_3d_transfer_to_host { + __u32 bo_handle; + struct drm_virtgpu_3d_box box; + __u32 level; + __u32 offset; + __u32 stride; + __u32 layer_stride; +}; + +struct drm_virtgpu_3d_transfer_from_host { + __u32 bo_handle; + struct drm_virtgpu_3d_box box; + __u32 level; + __u32 offset; + __u32 stride; + __u32 layer_stride; +}; + +#define VIRTGPU_WAIT_NOWAIT 1 /* like it */ +struct drm_virtgpu_3d_wait { + __u32 handle; /* 0 is an invalid handle */ + __u32 flags; +}; + +#define VIRTGPU_DRM_CAPSET_VIRGL 1 +#define VIRTGPU_DRM_CAPSET_VIRGL2 2 +#define VIRTGPU_DRM_CAPSET_GFXSTREAM_VULKAN 3 +#define VIRTGPU_DRM_CAPSET_VENUS 4 +#define VIRTGPU_DRM_CAPSET_CROSS_DOMAIN 5 +#define VIRTGPU_DRM_CAPSET_DRM 6 +struct drm_virtgpu_get_caps { + __u32 cap_set_id; + __u32 cap_set_ver; + __u64 addr; + __u32 size; + __u32 pad; +}; + +struct drm_virtgpu_resource_create_blob { +#define VIRTGPU_BLOB_MEM_GUEST 0x0001 +#define VIRTGPU_BLOB_MEM_HOST3D 0x0002 +#define VIRTGPU_BLOB_MEM_HOST3D_GUEST 0x0003 + +#define VIRTGPU_BLOB_FLAG_USE_MAPPABLE 0x0001 +#define VIRTGPU_BLOB_FLAG_USE_SHAREABLE 0x0002 +#define VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004 + /* zero is invalid blob_mem */ + __u32 blob_mem; + __u32 blob_flags; + __u32 bo_handle; + __u32 res_handle; + __u64 size; + + /* + * for 3D contexts with VIRTGPU_BLOB_MEM_HOST3D_GUEST and + * VIRTGPU_BLOB_MEM_HOST3D otherwise, must be zero. + */ + __u32 pad; + __u32 cmd_size; + __u64 cmd; + __u64 blob_id; +}; + +#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001 +#define VIRTGPU_CONTEXT_PARAM_NUM_RINGS 0x0002 +#define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003 +#define VIRTGPU_CONTEXT_PARAM_DEBUG_NAME 0x0004 +struct drm_virtgpu_context_set_param { + __u64 param; + __u64 value; +}; + +struct drm_virtgpu_context_init { + __u32 num_params; + __u32 pad; + + /* pointer to drm_virtgpu_context_set_param array */ + __u64 ctx_set_params; +}; + +/* + * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in + * effect. The event size is sizeof(drm_event), since there is no additional + * payload. + */ +#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000 + +#define DRM_IOCTL_VIRTGPU_MAP \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) + +#define DRM_IOCTL_VIRTGPU_EXECBUFFER \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\ + struct drm_virtgpu_execbuffer) + +#define DRM_IOCTL_VIRTGPU_GETPARAM \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GETPARAM,\ + struct drm_virtgpu_getparam) + +#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE, \ + struct drm_virtgpu_resource_create) + +#define DRM_IOCTL_VIRTGPU_RESOURCE_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_INFO, \ + struct drm_virtgpu_resource_info) + +#define DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_FROM_HOST, \ + struct drm_virtgpu_3d_transfer_from_host) + +#define DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_TO_HOST, \ + struct drm_virtgpu_3d_transfer_to_host) + +#define DRM_IOCTL_VIRTGPU_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WAIT, \ + struct drm_virtgpu_3d_wait) + +#define DRM_IOCTL_VIRTGPU_GET_CAPS \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \ + struct drm_virtgpu_get_caps) + +#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB, \ + struct drm_virtgpu_resource_create_blob) + +#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT, \ + struct drm_virtgpu_context_init) + +#if defined(__cplusplus) +} +#endif + +#endif diff --git a/ggml/src/ggml-remotingfrontend/include/venus_hw.h b/ggml/src/ggml-remotingfrontend/include/venus_hw.h new file mode 100644 index 00000000000..3ef774b8259 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/include/venus_hw.h @@ -0,0 +1,74 @@ +/* + * Copyright 2020 Chromium + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef VENUS_HW_H +#define VENUS_HW_H + +#include + +struct virgl_renderer_capset_venus { + uint32_t wire_format_version; + uint32_t vk_xml_version; + uint32_t vk_ext_command_serialization_spec_version; + uint32_t vk_mesa_venus_protocol_spec_version; + + /* This flag indicates render server config, and will be needed until drm + * virtio-gpu blob mem gets fixed to attach_resource before resource_map. + */ + uint32_t supports_blob_id_0; + + /* Extension number N, where N is defined by the Vulkan spec, corresponds + * to bit [N / 32] & (1 << N % 32). The below mask1 covers the first 1023 + * Vulkan extensions (numbered from 1 to 1023). + * + * Bit (mask1[0] & 0x1) is used for backward compatibility purpose. When + * that bit is set, the extension mask(s) are valid. Otherwise, all the + * extensions are assumed to be supported by the renderer side protocol. + */ + uint32_t vk_extension_mask1[32]; + + /* The single-threaded renderer cannot afford potential blocking calls. It + * also leads to GPU lost if the wait depends on a following command. This + * capset allows such blocking calls to passthrough from the clients, and + * shifts the responsibilities to the client drivers. + */ + uint32_t allow_vk_wait_syncs; + + /* This flag indicates that the renderer supports multiple fencing + * timelines. The client driver is expected to associate each VkQueue with + * one of these timelines at queue creation by binding it with an unused + * ring_idx. Queues created without a ring_idx binding are associated to a + * shared legacy timeline. The special ring_idx==0 is reserved for CPU + * fences that are signaled by the renderer immediately upon consumption of + * the associated renderer submission. + */ + uint32_t supports_multiple_timelines; + + /* This flag indicates to the guest that hypervisor does not support memory + * pages injections and blob allocations must be done by guest from the + * dedicated heap (Host visible memory). + */ + uint32_t use_guest_vram; +}; + +#endif /* VENUS_HW_H */ diff --git a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py b/ggml/src/ggml-remotingfrontend/regenerate_remoting.py new file mode 100755 index 00000000000..82fab6cad5e --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/regenerate_remoting.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +""" +# Generated by Claude AI + +Script to completely regenerate the GGML remoting codebase from YAML configuration. + +This script reads api_functions.yaml and regenerates all the header files and +implementation templates for the GGML remoting layer. + +Usage: + python regenerate_remoting.py + +The script will: +1. Read ggmlremoting_functions.yaml configuration +2. Generate updated header files +3. Generate implementation templates in dedicated files +4. Show a summary of what was generated +""" + +import yaml +from typing import Dict, List, Any, Tuple +from pathlib import Path +import os + +NL = '\n' # can't have f"{'\n'}" in f-strings + +class RemotingCodebaseGenerator: + def __init__(self, yaml_path: str = "ggmlremoting_functions.yaml"): + """Initialize the generator with the YAML configuration.""" + self.yaml_path = yaml_path + + if not Path(yaml_path).exists(): + raise FileNotFoundError(f"Configuration file {yaml_path} not found") + + with open(yaml_path, 'r') as f: + self.config = yaml.safe_load(f) + + self.functions = self.config['functions'] + self.naming_patterns = self.config['naming_patterns'] + self.config_data = self.config['config'] + + def generate_enum_name(self, group_name: str, function_name: str) -> str: + """Generate the APIR_COMMAND_TYPE enum name for a function.""" + prefix = self.naming_patterns['enum_prefix'] + return f"{prefix}{group_name.upper()}_{function_name.upper()}" + + def generate_backend_function_name(self, group_name: str, function_name: str) -> str: + """Generate the backend function name.""" + function_key = f"{group_name}_{function_name}" + overrides = self.naming_patterns.get('backend_function_overrides', {}) + + if function_key in overrides: + return overrides[function_key] + + prefix = self.naming_patterns['backend_function_prefix'] + return f"{prefix}{group_name}_{function_name}" + + def generate_frontend_function_name(self, group_name: str, function_name: str) -> str: + """Generate the frontend function name.""" + prefix = self.naming_patterns['frontend_function_prefix'] + return f"{prefix}{group_name}_{function_name}" + + def get_enabled_functions(self) -> List[Dict[str, Any]]: + """Get all enabled functions with their metadata.""" + functions = [] + enum_value = 0 + + for group_name, group_data in self.functions.items(): + group_description = group_data['group_description'] + + for function_name, func_metadata in group_data['functions'].items(): + # Handle case where func_metadata is None or empty (functions with only comments) + if func_metadata is None: + func_metadata = {} + + # Functions are enabled by default unless explicitly disabled + if func_metadata.get('enabled', True): + functions.append({ + 'group_name': group_name, + 'function_name': function_name, + 'enum_name': self.generate_enum_name(group_name, function_name), + 'enum_value': enum_value, + 'backend_function': self.generate_backend_function_name(group_name, function_name), + 'frontend_function': self.generate_frontend_function_name(group_name, function_name), + 'frontend_return': func_metadata.get('frontend_return', 'void'), + 'frontend_extra_params': func_metadata.get('frontend_extra_params', []), + 'group_description': group_description, + 'newly_added': func_metadata.get('newly_added', False) + }) + enum_value += 1 + + return functions + + def generate_apir_backend_header(self) -> str: + """Generate the complete apir_backend.h file.""" + functions = self.get_enabled_functions() + + # Generate the enum section + enum_lines = ["typedef enum ApirBackendCommandType {"] + current_group = None + + for func in functions: + # Add comment for new group + if func['group_name'] != current_group: + enum_lines.append("") + enum_lines.append(f" /* {func['group_description']} */") + current_group = func['group_name'] + + enum_lines.append(f" {func['enum_name']} = {func['enum_value']},") + + # Add the count + total_count = len(functions) + enum_lines.append(f"\n // last command_type index + 1") + enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},") + enum_lines.append("} ApirBackendCommandType;") + + # Full header template + header_content = NL.join(enum_lines) + "\n" + + return header_content + + def generate_backend_dispatched_header(self) -> str: + """Generate the complete backend-dispatched.h file.""" + functions = self.get_enabled_functions() + + # Function declarations + decl_lines = [] + current_group = None + + for func in functions: + if func['group_name'] != current_group: + decl_lines.append(f"\n/* {func['group_description']} */") + current_group = func['group_name'] + + signature = "uint32_t" + params = "struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx" + decl_lines.append(f"{signature} {func['backend_function']}({params});") + + # Switch cases + switch_lines = [] + current_group = None + + for func in functions: + if func['group_name'] != current_group: + switch_lines.append(f" /* {func['group_description']} */") + current_group = func['group_name'] + + switch_lines.append(f" case {func['enum_name']}: return \"{func['backend_function']}\";") + + # Dispatch table + table_lines = [] + current_group = None + + for func in functions: + if func['group_name'] != current_group: + table_lines.append(f"\n /* {func['group_description']} */") + table_lines.append("") + current_group = func['group_name'] + + + table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']},") + total_count = len(functions) + + header_content = f'''\ +#pragma once + +{NL.join(decl_lines)} + +static inline const char *backend_dispatch_command_name(ApirBackendCommandType type) +{{ + switch (type) {{ +{NL.join(switch_lines)} + + default: return "unknown"; + }} +}} + +extern "C" {{ +static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = {{ + {NL.join(table_lines)} +}}; +}} +''' + return header_content + + def generate_virtgpu_forward_header(self) -> str: + """Generate the complete virtgpu-forward.gen.h file.""" + functions = self.get_enabled_functions() + + decl_lines = [] + current_group = None + + for func in functions: + if func['group_name'] != current_group: + decl_lines.append("") + decl_lines.append(f"/* {func['group_description']} */") + current_group = func['group_name'] + + # Build parameter list + params = [self.naming_patterns['frontend_base_param']] + params.extend(func['frontend_extra_params']) + param_str = ', '.join(params) + + decl_lines.append(f"{func['frontend_return']} {func['frontend_function']}({param_str});") + + header_content = f'''\ +#pragma once +{NL.join(decl_lines)} +''' + return header_content + + def regenerate_codebase(self) -> None: + """Regenerate the entire remoting codebase.""" + print("🔄 Regenerating GGML Remoting Codebase...") + print("=" * 50) + + # Detect if we're running from frontend directory + current_dir = os.getcwd() + is_frontend_dir = current_dir.endswith('ggml-remotingfrontend') + + if is_frontend_dir: + # Running from ggml/src/ggml-remotingfrontend + print("📍 Detected frontend directory execution") + backend_base = Path("../ggml-remotingbackend") + frontend_base = Path(".") + else: + # Running from project root (fallback to original behavior) + print("📍 Detected project root execution") + base_path = self.config_data.get('base_path', 'ggml/src') + backend_base = Path(base_path) / "ggml-remotingbackend" + frontend_base = Path(base_path) / "ggml-remotingfrontend" + + # Compute final file paths + apir_backend_path = backend_base / "shared" / "apir_backend.gen.h" + backend_dispatched_path = backend_base / "backend-dispatched.gen.h" + virtgpu_forward_path = frontend_base / "virtgpu-forward.gen.h" + + # Create output directories for each file + apir_backend_path.parent.mkdir(parents=True, exist_ok=True) + backend_dispatched_path.parent.mkdir(parents=True, exist_ok=True) + virtgpu_forward_path.parent.mkdir(parents=True, exist_ok=True) + + # Generate header files + print("📁 Generating header files...") + + apir_backend_content = self.generate_apir_backend_header() + apir_backend_path.write_text(apir_backend_content) + print(f" ✅ {apir_backend_path.resolve()}") + + backend_dispatched_content = self.generate_backend_dispatched_header() + backend_dispatched_path.write_text(backend_dispatched_content) + print(f" ✅ {backend_dispatched_path.resolve()}") + + virtgpu_forward_content = self.generate_virtgpu_forward_header() + virtgpu_forward_path.write_text(virtgpu_forward_content) + print(f" ✅ {virtgpu_forward_path.resolve()}") + + # Generate summary + functions = self.get_enabled_functions() + total_functions = len(functions) + + print("\n📊 Generation Summary:") + print("=" * 50) + print(f" Total functions: {total_functions}") + print(f" Function groups: {len(self.functions)}") + print(f" Header files: 3") + print(f" Working directory: {current_dir}") + +def main(): + try: + generator = RemotingCodebaseGenerator() + generator.regenerate_codebase() + except Exception as e: + print(f"❌ Error: {e}") + exit(1) + +if __name__ == "__main__": + main() diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-apir.h b/ggml/src/ggml-remotingfrontend/virtgpu-apir.h new file mode 100644 index 00000000000..230fa19f3a6 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-apir.h @@ -0,0 +1,17 @@ +#include "ggml.h" +#include "ggml-impl.h" +#include "ggml-alloc.h" + +#include "virtgpu-shm.h" +#include "virtgpu-utils.h" + +#include "../ggml-remotingbackend/shared/apir_backend.h" + +typedef struct { + apir_buffer_host_handle_t host_handle; + + struct virtgpu_shmem shmem; + apir_buffer_type_host_handle_t buft_host_handle; +} apir_buffer_context_t; + +#include "virtgpu-forward.gen.h" diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp new file mode 100644 index 00000000000..73c213641ac --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp @@ -0,0 +1,51 @@ +#include "virtgpu-forward-impl.h" + +static long long current_time_ms() { + struct timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time + return (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec; +} + +ggml_status +apir_backend_graph_compute(struct virtgpu *gpu, ggml_cgraph *cgraph) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE); + + std::vector cgraph_data; + size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data); + + struct virtgpu_shmem temp_shmem; // Local storage for large buffers + struct virtgpu_shmem *shmem = &temp_shmem; + + if (cgraph_size <= gpu->data_shmem.mmap_size) { + // prefer the init-time allocated page, if large enough + shmem = &gpu->data_shmem; + } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { + FATAL("Couldn't allocate the guest-host shared buffer :/"); + } + + apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); + + apir_encode_size_t(encoder, &cgraph_size); + + char *shmem_data = (char *) shmem->mmap_ptr; + struct apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size); + + apir_encode_cgraph_data(&secondary_enc, cgraph_data); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + ggml_status status = GGML_STATUS_ABORTED; + apir_decode_ggml_status(decoder, &status); + + remote_call_finish(gpu, encoder, decoder); + + if (shmem != &gpu->data_shmem) { + virtgpu_shmem_destroy(gpu, shmem); + } + + return status; +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp new file mode 100644 index 00000000000..9609e5e3149 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp @@ -0,0 +1,131 @@ +#include "virtgpu-forward-impl.h" + +const char * +apir_buffer_type_get_name(struct virtgpu *gpu, ggml_backend_buffer_type_t buft) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME); + + apir_encode_ggml_buffer_type(encoder, buft); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + const size_t string_size = apir_decode_array_size_unchecked(decoder); + char *string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + if (!string) { + FATAL("%s: Could not allocate the device name buffer", __func__); + } + apir_decode_char_array(decoder, string, string_size); + + + remote_call_finish(gpu, encoder, decoder); + + return string; +} + +size_t +apir_buffer_type_get_alignment(struct virtgpu *gpu, ggml_backend_buffer_type_t buft) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT); + + apir_encode_ggml_buffer_type(encoder, buft); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + size_t alignment; + apir_decode_size_t(decoder, &alignment); + + remote_call_finish(gpu, encoder, decoder); + + return alignment; +} + +size_t +apir_buffer_type_get_max_size(struct virtgpu *gpu, ggml_backend_buffer_type_t buft) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE); + + apir_encode_ggml_buffer_type(encoder, buft); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + size_t max_size; + apir_decode_size_t(decoder, &max_size); + + remote_call_finish(gpu, encoder, decoder); + + return max_size; +} + +bool +apir_buffer_type_is_host(struct virtgpu *gpu, ggml_backend_buffer_type_t buft) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST); + + apir_encode_ggml_buffer_type(encoder, buft); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + bool is_host; + apir_decode_bool_t(decoder, &is_host); + + remote_call_finish(gpu, encoder, decoder); + + return is_host; +} + +apir_buffer_context_t +apir_buffer_type_alloc_buffer(struct virtgpu *gpu, ggml_backend_buffer_type_t buft, size_t size) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + apir_buffer_context_t buffer_context; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER); + + apir_encode_ggml_buffer_type(encoder, buft); + + apir_encode_size_t(encoder, &size); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle); + + remote_call_finish(gpu, encoder, decoder); + + return buffer_context; +} + +size_t +apir_buffer_type_get_alloc_size(struct virtgpu *gpu, ggml_backend_buffer_type_t buft, const ggml_tensor *op) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE); + + apir_encode_ggml_buffer_type(encoder, buft); + + apir_encode_ggml_tensor_inline(encoder, op); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + size_t alloc_size; + apir_decode_size_t(decoder, &alloc_size); + + remote_call_finish(gpu, encoder, decoder); + + return alloc_size; +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp new file mode 100644 index 00000000000..87b7bc897bd --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp @@ -0,0 +1,166 @@ +#include "virtgpu-forward-impl.h" + +void * +apir_buffer_get_base(struct virtgpu *gpu, apir_buffer_context_t *buffer_context) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_BASE); + + apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + uintptr_t base; + apir_decode_uintptr_t(decoder, &base); + + remote_call_finish(gpu, encoder, decoder); + + return (void *) base; +} + +void +apir_buffer_set_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, + ggml_tensor *tensor, const void *data, size_t offset, size_t size) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_SET_TENSOR); + + apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); + apir_encode_ggml_tensor(encoder, tensor); + + struct virtgpu_shmem temp_shmem; // Local storage for large buffers + struct virtgpu_shmem *shmem = &temp_shmem; + + if (size <= gpu->data_shmem.mmap_size) { + // prefer the init-time allocated page, if large enough + shmem = &gpu->data_shmem; + + } else if (virtgpu_shmem_create(gpu, size, shmem)) { + FATAL("Couldn't allocate the guest-host shared buffer :/"); + } + + memcpy(shmem->mmap_ptr, data, size); + apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); + + apir_encode_size_t(encoder, &offset); + apir_encode_size_t(encoder, &size); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + remote_call_finish(gpu, encoder, decoder); + + if (shmem != &gpu->data_shmem) { + virtgpu_shmem_destroy(gpu, shmem); + } + + return; +} + +#if false +void +apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, + const ggml_tensor *tensor, void *data, size_t offset, size_t size) { + UNUSED(gpu); + UNUSED(tensor); + char *buffer_base_addr = (char *) buffer_context->shmem.mmap_ptr; + + memcpy(data, buffer_base_addr+offset, size); +} +#else +void +apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, + const ggml_tensor *tensor, void *data, size_t offset, size_t size) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_TENSOR); + + apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); + apir_encode_ggml_tensor(encoder, tensor); + + struct virtgpu_shmem temp_shmem; // Local storage for large buffers + struct virtgpu_shmem *shmem = &temp_shmem; + + if (size <= gpu->data_shmem.mmap_size) { + // prefer the init-time allocated page, if large enough + shmem = &gpu->data_shmem; + + } else if (virtgpu_shmem_create(gpu, size, shmem)) { + FATAL("Couldn't allocate the guest-host shared buffer :/"); + } + + apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); + apir_encode_size_t(encoder, &offset); + apir_encode_size_t(encoder, &size); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + memcpy(data, shmem->mmap_ptr, size); + + remote_call_finish(gpu, encoder, decoder); + + if (shmem != &gpu->data_shmem) { + virtgpu_shmem_destroy(gpu, shmem); + } +} +#endif + +bool +apir_buffer_cpy_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, const ggml_tensor *src, const ggml_tensor *dst) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR); + + apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); + apir_encode_ggml_tensor(encoder, src); + apir_encode_ggml_tensor(encoder, dst); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + bool ret_val; + apir_decode_bool_t(decoder, &ret_val); + + remote_call_finish(gpu, encoder, decoder); + + return ret_val; +} + +void +apir_buffer_clear(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, + uint8_t value) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CLEAR); + + apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); + apir_encode_uint8_t(encoder, &value); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + remote_call_finish(gpu, encoder, decoder); +} + + +void +apir_buffer_free_buffer(struct virtgpu *gpu, apir_buffer_context_t *buffer_context) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER); + + apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + remote_call_finish(gpu, encoder, decoder); +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp new file mode 100644 index 00000000000..1b99128d735 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp @@ -0,0 +1,209 @@ +#include "virtgpu-forward-impl.h" +#include "virtgpu-shm.h" + +int +apir_device_get_count(struct virtgpu *gpu) { + static int32_t dev_count = -1; + if (dev_count != -1) { + return dev_count; + } + + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT); + REMOTE_CALL(gpu, encoder, decoder, ret); + + apir_decode_int32_t(decoder, &dev_count); + + remote_call_finish(gpu, encoder, decoder); + + return dev_count; +} + +const char * +apir_device_get_name(struct virtgpu *gpu) { + static char *string = nullptr; + if (string) { + return string; + } + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_NAME); + REMOTE_CALL(gpu, encoder, decoder, ret); + + const size_t string_size = apir_decode_array_size_unchecked(decoder); + string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + if (!string) { + FATAL("%s: Could not allocate the device name buffer", __func__); + } + apir_decode_char_array(decoder, string, string_size); + + remote_call_finish(gpu, encoder, decoder); + + return string; +} + +const char * +apir_device_get_description(struct virtgpu *gpu) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + const size_t string_size = apir_decode_array_size_unchecked(decoder); + char *string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + if (!string) { + FATAL("%s: Could not allocate the device description buffer", __func__); + return NULL; + } + apir_decode_char_array(decoder, string, string_size); + + remote_call_finish(gpu, encoder, decoder); + + return string; +} + +uint32_t +apir_device_get_type(struct virtgpu *gpu) { + static uint32_t dev_type = 255; + if (dev_type != 255) { + return dev_type; + } + + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_TYPE); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + apir_decode_uint32_t(decoder, &dev_type); + + remote_call_finish(gpu, encoder, decoder); + + return dev_type; +} + +void +apir_device_get_memory(struct virtgpu *gpu, size_t *free, size_t *total) { + static size_t dev_free = 0; + static size_t dev_total = 0; + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_MEMORY); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + apir_decode_size_t(decoder, &dev_free); + apir_decode_size_t(decoder, &dev_total); + + *free = dev_free; + *total = dev_total; + + remote_call_finish(gpu, encoder, decoder); + + return; +} + +bool +apir_device_supports_op(struct virtgpu *gpu, const ggml_tensor *op) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP); + + apir_encode_ggml_tensor_inline(encoder, op); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + bool supports_op; + apir_decode_bool_t(decoder, &supports_op); + + remote_call_finish(gpu, encoder, decoder); + + return supports_op; +} + +apir_buffer_type_host_handle_t +apir_device_get_buffer_type(struct virtgpu *gpu) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + apir_buffer_type_host_handle_t buft_handle; + apir_decode_apir_buffer_type_host_handle_t(decoder, &buft_handle); + + remote_call_finish(gpu, encoder, decoder); + + return buft_handle; +} + +void +apir_device_get_props(struct virtgpu *gpu, + bool *async, + bool *host_buffer, + bool *buffer_from_host_ptr, + bool *events) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_PROPS); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + apir_decode_bool_t(decoder, async); + apir_decode_bool_t(decoder, host_buffer); + apir_decode_bool_t(decoder, buffer_from_host_ptr); + apir_decode_bool_t(decoder, events); + + remote_call_finish(gpu, encoder, decoder); + + return; +} + +apir_buffer_context_t +apir_device_buffer_from_ptr(struct virtgpu *gpu, + size_t size, + size_t max_tensor_size) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirForwardReturnCode ret; + + apir_buffer_context_t buffer_context; + + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR); + + if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) { + FATAL("Couldn't allocate the guest-host shared buffer :/"); + } + + apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id); + + apir_encode_size_t(encoder, &size); + apir_encode_size_t(encoder, &max_tensor_size); + + REMOTE_CALL(gpu, encoder, decoder, ret); + + apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle); + buffer_context.buft_host_handle = apir_decode_apir_buffer_type_host_handle(decoder); + + remote_call_finish(gpu, encoder, decoder); + + return buffer_context; +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h new file mode 100644 index 00000000000..8f9a9695d48 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h @@ -0,0 +1,27 @@ +#include "ggml-backend-impl.h" +#include "ggml-remoting.h" +#include "virtgpu.h" +#include "../ggml-remotingbackend/shared/apir_backend.h" +#include "../ggml-remotingbackend/shared/apir_cs_ggml.h" + +#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ + do { \ + int32_t forward_flag = (int32_t) apir_command_type__; \ + encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_Forward, forward_flag); \ + if (!encoder_name) { \ + FATAL("%s: failed to prepare the remote call encoder :/", __func__); \ + } \ + } while(0) + + +#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ + do { \ + ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ + if (!decoder_name) { \ + FATAL("%s: failed to kick the remote call :/", __func__); \ + } \ + if (ret_name < APIR_FORWARD_BASE_INDEX) { \ + FATAL("%s: failed to forward the API call: %s: code %d", __func__, apir_forward_error(ret_name), ret_name); \ + } \ + ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ + } while(0) diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h new file mode 100644 index 00000000000..7d0848ffdc4 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h @@ -0,0 +1,32 @@ +#pragma once + +/* device */ +void apir_device_get_device_count(struct virtgpu *gpu); +int apir_device_get_count(struct virtgpu *gpu); +const char * apir_device_get_name(struct virtgpu *gpu); +const char * apir_device_get_description(struct virtgpu *gpu); +uint32_t apir_device_get_type(struct virtgpu *gpu); +void apir_device_get_memory(struct virtgpu *gpu, size_t *free, size_t *total); +bool apir_device_supports_op(struct virtgpu *gpu, const ggml_tensor *op); +apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu *gpu); +void apir_device_get_props(struct virtgpu *gpu, bool *async, bool *host_buffer, bool *buffer_from_host_ptr, bool *events); +apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu *gpu, size_t size, size_t max_tensor_size); + +/* buffer-type */ +const char * apir_buffer_type_get_name(struct virtgpu *gpu, ggml_backend_buffer_type_t buft); +size_t apir_buffer_type_get_alignment(struct virtgpu *gpu, ggml_backend_buffer_type_t buft); +size_t apir_buffer_type_get_max_size(struct virtgpu *gpu, ggml_backend_buffer_type_t buft); +bool apir_buffer_type_is_host(struct virtgpu *gpu, ggml_backend_buffer_type_t buft); +apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu *gpu, ggml_backend_buffer_type_t buffer_buft, size_t size); +size_t apir_buffer_type_get_alloc_size(struct virtgpu *gpu, ggml_backend_buffer_type_t buft, const ggml_tensor *op); + +/* buffer */ +void * apir_buffer_get_base(struct virtgpu *gpu, apir_buffer_context_t *buffer_context); +void apir_buffer_set_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, ggml_tensor *tensor, const void *data, size_t offset, size_t size); +void apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, const ggml_tensor *tensor, void *data, size_t offset, size_t size); +bool apir_buffer_cpy_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, const ggml_tensor *src, const ggml_tensor *dst); +void apir_buffer_clear(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, uint8_t value); +void apir_buffer_free_buffer(struct virtgpu *gpu, apir_buffer_context_t *buffer_context); + +/* backend */ +ggml_status apir_backend_graph_compute(struct virtgpu *gpu, ggml_cgraph *cgraph); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp new file mode 100644 index 00000000000..c921fc1813a --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp @@ -0,0 +1,105 @@ +#include + +#include "virtgpu.h" +#include "virtgpu-shm.h" + +static uint32_t +virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu, + uint32_t blob_mem, + uint32_t blob_flags, + size_t blob_size, + uint64_t blob_id, + uint32_t *res_id) +{ +#ifdef SIMULATE_BO_SIZE_FIX + blob_size = align64(blob_size, 4096); +#endif + + struct drm_virtgpu_resource_create_blob args = { + .blob_mem = blob_mem, + .blob_flags = blob_flags, + .bo_handle = 0, + .res_handle = 0, + .size = blob_size, + .pad = 0, + .cmd_size = 0, + .cmd = 0, + .blob_id = blob_id, + }; + + if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args)) + return 0; + + *res_id = args.res_handle; + return args.bo_handle; +} + +static void +virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle) +{ + struct drm_gem_close args = { + .handle = gem_handle, + .pad = 0, + }; + + const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args); + assert(!ret); +#ifdef NDEBUG + UNUSED(ret); +#endif +} + +static void * +virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size) +{ + struct drm_virtgpu_map args = { + .offset = 0, + .handle = gem_handle, + .pad = 0, + }; + + if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args)) + return NULL; + + void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd, + args.offset); + if (ptr == MAP_FAILED) + return NULL; + + return ptr; +} + +void +virtgpu_shmem_destroy(struct virtgpu *gpu, + struct virtgpu_shmem *shmem) +{ + munmap(shmem->mmap_ptr, shmem->mmap_size); + virtgpu_ioctl_gem_close(gpu, shmem->gem_handle); +} + +int +virtgpu_shmem_create(struct virtgpu *gpu, size_t size, struct virtgpu_shmem *shmem) +{ + size = align64(size, 16384); + + uint32_t res_id; + uint32_t gem_handle = virtgpu_ioctl_resource_create_blob( + gpu, VIRTGPU_BLOB_MEM_HOST3D, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0, + &res_id); + + if (!gem_handle) + return 1; + + void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size); + if (!ptr) { + virtgpu_ioctl_gem_close(gpu, gem_handle); + return 1; + } + + shmem->res_id = res_id; + shmem->mmap_size = size; + shmem->mmap_ptr = ptr; + shmem->gem_handle = gem_handle; + + return 0; +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.h b/ggml/src/ggml-remotingfrontend/virtgpu-shm.h new file mode 100644 index 00000000000..bcd361217ad --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-shm.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "virtgpu-utils.h" + +struct virtgpu; + +struct virtgpu_shmem { + uint32_t res_id; + size_t mmap_size; + void *mmap_ptr; + + uint32_t gem_handle; +}; + +int virtgpu_shmem_create(struct virtgpu *gpu, size_t size, struct virtgpu_shmem *shmem); +void virtgpu_shmem_destroy(struct virtgpu *gpu, struct virtgpu_shmem *shmem); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp new file mode 100644 index 00000000000..100f495add1 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp @@ -0,0 +1,186 @@ +#include "virtgpu-utils.h" +#include +#include +#include + +#define NODE_ALLOC_ALIGN 64 +#define NODE_PTR_MASK (~((uintptr_t)NODE_ALLOC_ALIGN - 1)) +#define NODE_LEVEL_MASK ((uintptr_t)NODE_ALLOC_ALIGN - 1) +#define NULL_NODE 0 + +#define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align) +#define os_free_aligned(_ptr) free(_ptr) +#define p_atomic_cmpxchg(v, old, _new) \ + __sync_val_compare_and_swap((v), (old), (_new)) + +static inline uint64_t +util_logbase2_64(uint64_t n) +{ +#if defined(HAVE___BUILTIN_CLZLL) + return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1)); +#else + uint64_t pos = 0ull; + if (n >= 1ull<<32) { n >>= 32; pos += 32; } + if (n >= 1ull<<16) { n >>= 16; pos += 16; } + if (n >= 1ull<< 8) { n >>= 8; pos += 8; } + if (n >= 1ull<< 4) { n >>= 4; pos += 4; } + if (n >= 1ull<< 2) { n >>= 2; pos += 2; } + if (n >= 1ull<< 1) { pos += 1; } + return pos; +#endif +} + +void +util_sparse_array_init(struct util_sparse_array *arr, + size_t elem_size, size_t node_size) +{ + memset(arr, 0, sizeof(*arr)); + arr->elem_size = elem_size; + arr->node_size_log2 = util_logbase2_64(node_size); + assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2)); +} + +static inline void * +os_malloc_aligned(size_t size, size_t alignment) +{ + void *ptr; + alignment = (alignment + sizeof(void*) - 1) & ~(sizeof(void*) - 1); + if(posix_memalign(&ptr, alignment, size) != 0) + return NULL; + return ptr; +} + +static inline void * +_util_sparse_array_node_data(uintptr_t handle) +{ + return (void *)(handle & NODE_PTR_MASK); +} + +static inline unsigned +_util_sparse_array_node_level(uintptr_t handle) +{ + return handle & NODE_LEVEL_MASK; +} + +static inline void +_util_sparse_array_node_finish(struct util_sparse_array *arr, + uintptr_t node) +{ + if (_util_sparse_array_node_level(node) > 0) { + uintptr_t *children = (uintptr_t *) _util_sparse_array_node_data(node); + size_t node_size = 1ull << arr->node_size_log2; + for (size_t i = 0; i < node_size; i++) { + if (children[i]) + _util_sparse_array_node_finish(arr, children[i]); + } + } + + os_free_aligned(_util_sparse_array_node_data(node)); +} + +static inline uintptr_t +_util_sparse_array_node(void *data, unsigned level) +{ + assert(data != NULL); + assert(((uintptr_t)data & NODE_LEVEL_MASK) == 0); + assert((level & NODE_PTR_MASK) == 0); + return (uintptr_t)data | level; +} + +inline uintptr_t +_util_sparse_array_node_alloc(struct util_sparse_array *arr, + unsigned level) +{ + size_t size; + if (level == 0) { + size = arr->elem_size << arr->node_size_log2; + } else { + size = sizeof(uintptr_t) << arr->node_size_log2; + } + + void *data = os_malloc_aligned(size, NODE_ALLOC_ALIGN); + memset(data, 0, size); + + return _util_sparse_array_node(data, level); +} + +static inline uintptr_t +_util_sparse_array_set_or_free_node(uintptr_t *node_ptr, + uintptr_t cmp_node, + uintptr_t node) +{ + uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node); + + if (prev_node != cmp_node) { + /* We lost the race. Free this one and return the one that was already + * allocated. + */ + os_free_aligned(_util_sparse_array_node_data(node)); + return prev_node; + } else { + return node; + } +} + +void * +util_sparse_array_get(struct util_sparse_array *arr, uint64_t idx) +{ + const unsigned node_size_log2 = arr->node_size_log2; + uintptr_t root = p_atomic_read(&arr->root); + if (unlikely(!root)) { + unsigned root_level = 0; + uint64_t idx_iter = idx >> node_size_log2; + while (idx_iter) { + idx_iter >>= node_size_log2; + root_level++; + } + uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level); + root = _util_sparse_array_set_or_free_node(&arr->root, + NULL_NODE, new_root); + } + + while (1) { + unsigned root_level = _util_sparse_array_node_level(root); + uint64_t root_idx = idx >> (root_level * node_size_log2); + if (likely(root_idx < (1ull << node_size_log2))) + break; + + /* In this case, we have a root but its level is low enough that the + * requested index is out-of-bounds. + */ + uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1); + + uintptr_t *new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root); + new_root_children[0] = root; + + /* We only add one at a time instead of the whole tree because it's + * easier to ensure correctness of both the tree building and the + * clean-up path. Because we're only adding one node we never have to + * worry about trying to free multiple things without freeing the old + * things. + */ + root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root); + } + + void *node_data = _util_sparse_array_node_data(root); + unsigned node_level = _util_sparse_array_node_level(root); + while (node_level > 0) { + uint64_t child_idx = (idx >> (node_level * node_size_log2)) & + ((1ull << node_size_log2) - 1); + + uintptr_t *children = (uintptr_t *) node_data; + uintptr_t child = p_atomic_read(&children[child_idx]); + + if (unlikely(!child)) { + child = _util_sparse_array_node_alloc(arr, node_level - 1); + child = _util_sparse_array_set_or_free_node(&children[child_idx], + NULL_NODE, child); + } + + node_data = _util_sparse_array_node_data(child); + node_level = _util_sparse_array_node_level(child); + } + + uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1); + return (void *)((char *)node_data + (elem_idx * arr->elem_size)); +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.h b/ggml/src/ggml-remotingfrontend/virtgpu-utils.h new file mode 100644 index 00000000000..dd911a63b59 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu-utils.h @@ -0,0 +1,133 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define unlikely(x) __builtin_expect(!!(x), 0) +#define likely(x) __builtin_expect(!!(x), 1) + +#ifndef UNUSED +#define UNUSED(x) (void)(x) +#endif + +/** Checks is a value is a power of two. Does not handle zero. */ +#define IS_POT(v) (((v) & ((v) - 1)) == 0) + +/** Checks is a value is a power of two. Zero handled. */ +#define IS_POT_NONZERO(v) ((v) != 0 && IS_POT(v)) + +/** Align a value to a power of two */ +#define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1)) + +#define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE) + +void thks_bye(); +void breakpoint(); + +#ifndef NDEBUG +inline void +INFO(const char *format, ...) { + fprintf(stderr, "INFO: "); + + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); +} +#else +inline void +INFO(...) {} +#endif + +inline void +MESSAGE(const char *format, ...) { + fprintf(stderr, "APIR: "); + + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); +} + +inline void +WARNING(const char *format, ...) { + fprintf(stderr, "WARNING: "); + + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); +} + +inline void +ERROR(const char *format, ...) { + fprintf(stderr, "ERROR: "); + + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); +} + +inline void +FATAL(const char *format, ...) { + fprintf(stderr, "FATAL: "); + + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); + + abort(); +} + +static inline bool +util_is_power_of_two_nonzero64(uint64_t v) +{ + return IS_POT_NONZERO(v); +} + +static inline uint64_t +align64(uint64_t value, uint64_t alignment) +{ + assert(util_is_power_of_two_nonzero64(alignment)); + return ALIGN_POT(value, alignment); +} + +struct list_head +{ + struct list_head *prev; + struct list_head *next; +}; + +struct util_sparse_array { + size_t elem_size; + unsigned node_size_log2; + + uintptr_t root; +}; + +void *util_sparse_array_get(struct util_sparse_array *arr, uint64_t idx); +void util_sparse_array_init(struct util_sparse_array *arr, + size_t elem_size, size_t node_size); + +inline void +os_time_sleep(int64_t usecs) +{ + struct timespec time; + time.tv_sec = usecs / 1000000; + time.tv_nsec = (usecs % 1000000) * 1000; + while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR); +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-remotingfrontend/virtgpu.cpp new file mode 100644 index 00000000000..cc418c89306 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu.cpp @@ -0,0 +1,526 @@ +#include +#include +#include +#include + +#include + +#include "virtgpu.h" + +static virt_gpu_result_t virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev); +static virt_gpu_result_t virtgpu_open(struct virtgpu *gpu); + + +static virt_gpu_result_t virtgpu_init_capset(struct virtgpu *gpu); +static virt_gpu_result_t virtgpu_init_context(struct virtgpu *gpu); + +static int virtgpu_ioctl_context_init(struct virtgpu *gpu, + enum virgl_renderer_capset capset_id); +static int +virtgpu_ioctl_get_caps(struct virtgpu *gpu, + enum virgl_renderer_capset id, + uint32_t version, + void *capset, + size_t capset_size); +static uint64_t virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param); +static void virtgpu_init_renderer_info(struct virtgpu *gpu); + +struct timer_data wait_host_reply_timer = {0, 0, 0, "wait_host_reply"}; + +static void log_call_duration(long long call_duration_ns, const char *name); + +const uint64_t APIR_HANDSHAKE_MAX_WAIT_MS = 2*1000; // 2s +const uint64_t APIR_LOADLIBRARY_MAX_WAIT_MS = 60*1000; // 60s + +static int +virtgpu_handshake(struct virtgpu *gpu) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + + encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HandShake, 0); + if (!encoder) { + FATAL("%s: failed to prepare the remote call encoder :/", __func__); + return 1; + } + + /* write handshake props */ + + uint32_t guest_major = APIR_PROTOCOL_MAJOR; + uint32_t guest_minor = APIR_PROTOCOL_MINOR; + apir_encode_uint32_t(encoder, &guest_major); + apir_encode_uint32_t(encoder, &guest_minor); + + /* *** */ + + uint32_t ret_magic; + long long call_duration_ns; + ret_magic = remote_call(gpu, encoder, &decoder, APIR_HANDSHAKE_MAX_WAIT_MS, &call_duration_ns); + log_call_duration(call_duration_ns, "API Remoting handshake"); + + if (!decoder) { + FATAL("%s: failed to initiate the communication with the virglrenderer library. " + "Most likely, the wrong virglrenderer library was loaded in the hypervisor.", __func__); + return 1; + } + + /* read handshake return values */ + + uint32_t host_major; + uint32_t host_minor; + + if (ret_magic != APIR_HANDSHAKE_MAGIC) { + FATAL("%s: handshake with the virglrenderer failed (code=%d | %s):/", + __func__, ret_magic, apir_backend_initialize_error(ret_magic)); + } else { + apir_decode_uint32_t(decoder, &host_major); + apir_decode_uint32_t(decoder, &host_minor); + } + + remote_call_finish(gpu, encoder, decoder); + + if (ret_magic != APIR_HANDSHAKE_MAGIC) { + return 1; + } + + INFO("%s: Guest is running with %u.%u", __func__, guest_major, guest_minor); + INFO("%s: Host is running with %u.%u", __func__, host_major, host_minor); + + if (guest_major != host_major) { + ERROR("Host major (%d) and guest major (%d) version differ", host_major, guest_major); + } else if (guest_minor != host_minor) { + WARNING("Host minor (%d) and guest minor (%d) version differ", host_minor, guest_minor); + } + + return 0; +} + +static ApirLoadLibraryReturnCode +virtgpu_load_library(struct virtgpu *gpu) { + struct apir_encoder *encoder; + struct apir_decoder *decoder; + ApirLoadLibraryReturnCode ret; + + encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LoadLibrary, 0); + if (!encoder) { + FATAL("%s: hypercall error: failed to prepare the remote call encoder :/", __func__); + return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; + } + + long long call_duration_ns; + + ret = (ApirLoadLibraryReturnCode) remote_call(gpu, encoder, &decoder, + APIR_LOADLIBRARY_MAX_WAIT_MS, &call_duration_ns); + log_call_duration(call_duration_ns, "API Remoting LoadLibrary"); + + if (!decoder) { + FATAL("%s: hypercall error: failed to kick the API remoting hypercall. :/", __func__); + return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; + } + + remote_call_finish(gpu, encoder, decoder); + + if (ret == APIR_LOAD_LIBRARY_SUCCESS) { + INFO("%s: The API Remoting backend was successfully loaded and initialized", __func__); + + return ret; + } + + // something wrong happened, find out what. + + if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { + FATAL("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", + __func__, apir_load_library_error(ret), ret); + return ret; + } + + INFO("%s: virglrenderer successfully loaded the API Remoting backend library", __func__); + + ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX); + + if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { + FATAL("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s):/", + __func__, apir_ret, apir_load_library_error(apir_ret)); + } else { + uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX; + FATAL("%s: the API Remoting backend library initialize its backend library: apir code=%d):/", + __func__, lib_ret); + } + return ret; +} + +struct virtgpu * +create_virtgpu() { + struct virtgpu *gpu = new struct virtgpu(); + + gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr; + + util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem), 1024); + + if (virtgpu_open(gpu) != APIR_SUCCESS) { + FATAL("%s: failed to open the virtgpu device :/", __func__); + return NULL; + } + + if (virtgpu_init_capset(gpu) != APIR_SUCCESS) { + FATAL("%s: failed to initialize the GPU capset :/", __func__); + return NULL; + } + + if (virtgpu_init_context(gpu) != APIR_SUCCESS) { + FATAL("%s: failed to initialize the GPU context :/", __func__); + return NULL; + } + + if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) { + FATAL("%s: failed to create the shared reply memory pages :/", __func__); + return NULL; + } + + if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) { + FATAL("%s: failed to create the shared data memory pages :/", __func__); + return NULL; + } + + if (virtgpu_handshake(gpu)) { + FATAL("%s: failed to handshake with the virglrenderer library :/", __func__); + return NULL; + } + + if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) { + FATAL("%s: failed to load the backend library :/", __func__); + return NULL; + } + + return gpu; +} + +static virt_gpu_result_t +virtgpu_open(struct virtgpu *gpu) +{ + drmDevicePtr devs[8]; + int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs)); + if (count < 0) { + ERROR("%s: failed to enumerate DRM devices", __func__); + return APIR_ERROR_INITIALIZATION_FAILED; + } + + virt_gpu_result_t result = APIR_ERROR_INITIALIZATION_FAILED; + for (int i = 0; i < count; i++) { + result = virtgpu_open_device(gpu, devs[i]); + if (result == APIR_SUCCESS) + break; + } + + drmFreeDevices(devs, count); + + return result; +} + +static virt_gpu_result_t +virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev) +{ + const char *node_path = dev->nodes[DRM_NODE_RENDER]; + + int fd = open(node_path, O_RDWR | O_CLOEXEC); + if (fd < 0) { + MESSAGE("failed to open %s", node_path); + return APIR_ERROR_INITIALIZATION_FAILED; + } + + drmVersionPtr version = drmGetVersion(fd); + if (!version || strcmp(version->name, "virtio_gpu") || + version->version_major != 0) { + if (version) { + MESSAGE("unknown DRM driver %s version %d", + version->name, version->version_major); + } else { + MESSAGE("failed to get DRM driver version"); + } + + if (version) + drmFreeVersion(version); + close(fd); + return APIR_ERROR_INITIALIZATION_FAILED; + } + + gpu->fd = fd; + + drmFreeVersion(version); + + MESSAGE("using DRM device %s", node_path); + + return APIR_SUCCESS; +} + +static virt_gpu_result_t +virtgpu_init_context(struct virtgpu *gpu) +{ + assert(!gpu->capset.version); + const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id); + if (ret) { + MESSAGE("failed to initialize context: %s", strerror(errno)); + return APIR_ERROR_INITIALIZATION_FAILED; + } + + return APIR_SUCCESS; +} + +static virt_gpu_result_t +virtgpu_init_capset(struct virtgpu *gpu) +{ + if (gpu->use_apir_capset) { + MESSAGE("Using the APIR capset"); + gpu->capset.id = VIRGL_RENDERER_CAPSET_APIR; + } else { + MESSAGE("Using the Venus capset"); + gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS; + } + gpu->capset.version = 0; + + int ret = \ + virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, + &gpu->capset.data, sizeof(gpu->capset.data)); + + if (ret) { + MESSAGE("failed to get APIR v%d capset: %s", gpu->capset.version, strerror(errno)); + return APIR_ERROR_INITIALIZATION_FAILED; + } + + assert(gpu->capset.data.supports_blob_resources); + + return APIR_SUCCESS; +} + +static int +virtgpu_ioctl_context_init(struct virtgpu *gpu, + enum virgl_renderer_capset capset_id) +{ + struct drm_virtgpu_context_set_param ctx_set_params[3] = { + { + .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID, + .value = capset_id, + }, + { + .param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS, + .value = 1, + }, + { + .param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK, + .value = 0, /* don't generate drm_events on fence signaling */ + }, + }; + + struct drm_virtgpu_context_init args = { + .num_params = ARRAY_SIZE(ctx_set_params), + .pad = 0, + .ctx_set_params = (uintptr_t)&ctx_set_params, + }; + + return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args); +} + +static int +virtgpu_ioctl_get_caps(struct virtgpu *gpu, + enum virgl_renderer_capset id, + uint32_t version, + void *capset, + size_t capset_size) +{ + struct drm_virtgpu_get_caps args = { + .cap_set_id = id, + .cap_set_ver = version, + .addr = (uintptr_t)capset, + .size = (__u32) capset_size, + .pad = 0, + }; + + return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args); +} + +static uint64_t +virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param) +{ + /* val must be zeroed because kernel only writes the lower 32 bits */ + uint64_t val = 0; + struct drm_virtgpu_getparam args = { + .param = param, + .value = (uintptr_t)&val, + }; + + const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args); + return ret ? 0 : val; +} + + +struct apir_encoder * +remote_call_prepare( + struct virtgpu *gpu, + ApirCommandType apir_cmd_type, + int32_t cmd_flags) +{ + /* + * Prepare the command encoder and its buffer + */ + + static char encoder_buffer[4096]; + + static struct apir_encoder enc; + enc = { + encoder_buffer, + encoder_buffer, + encoder_buffer + sizeof(encoder_buffer), + }; + + /* + * Fill the command encoder with the common args: + * - cmd_type (int32_t) + * - cmd_flags (int32_t) + * - reply res id (uint32_t) + */ + + int32_t cmd_type = apir_cmd_type; + + // for testing during the hypervisor transition + if (!gpu->use_apir_capset) { + cmd_type += VENUS_COMMAND_TYPE_LENGTH; + } + apir_encode_int32_t(&enc, &cmd_type); + apir_encode_int32_t(&enc, &cmd_flags); + + uint32_t reply_res_id = gpu->reply_shmem.res_id; + apir_encode_uint32_t(&enc, &reply_res_id); + + return &enc; +} + +void +remote_call_finish( + struct virtgpu *gpu, + struct apir_encoder *enc, + struct apir_decoder *dec) { + UNUSED(gpu); + + if (!enc) { + ERROR("Invalid (null) encoder :/"); + } + + if (!dec) { + ERROR("Invalid (null) decoder :/"); + } + + // encoder and decoder are statically allocated, nothing to do to release them +} + +uint32_t +remote_call( + struct virtgpu *gpu, + struct apir_encoder *encoder, + struct apir_decoder **decoder, + float max_wait_ms, + long long *call_duration_ns) +{ + /* + * Prepare the reply notification pointer + */ + + volatile std::atomic_uint *atomic_reply_notif = (volatile std::atomic_uint *) gpu->reply_shmem.mmap_ptr; + *atomic_reply_notif = 0; + + /* + * Trigger the execbuf ioctl + */ + + struct drm_virtgpu_execbuffer args = { + .flags = VIRTGPU_EXECBUF_RING_IDX, + .size = (uint32_t) (encoder->cur - encoder->start), + .command = (uintptr_t) encoder->start, + + .bo_handles = 0, + .num_bo_handles = 0, + + .fence_fd = 0, + .ring_idx = 0, + .syncobj_stride = 0, + .num_in_syncobjs = 0, + .num_out_syncobjs = 0, + .in_syncobjs = 0, + .out_syncobjs = 0, + }; + + *decoder = NULL; + + int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args); + + if (ret != 0) { + FATAL("%s: the virtgpu EXECBUFFER ioctl failed (%d) :/ \n", ret); + } + + /* + * Wait for the response notification + */ + + start_timer(&wait_host_reply_timer); + + struct timespec ts_start, ts_end; + clock_gettime(CLOCK_MONOTONIC, &ts_start); + long long start_time = (long long)ts_start.tv_sec * 1000000000LL + ts_start.tv_nsec; + + bool timedout = false; + uint32_t notif_value = 0; + while (true) { + notif_value = std::atomic_load_explicit(atomic_reply_notif, std::memory_order_acquire); + + if (notif_value != 0) { + break; + } + + int64_t base_sleep_us = 15; + + os_time_sleep(base_sleep_us); + + if (max_wait_ms) { + clock_gettime(CLOCK_MONOTONIC, &ts_end); + long long end_time = (long long)ts_end.tv_sec * 1000000000LL + ts_end.tv_nsec; + float duration_ms = (end_time - start_time) / 1000000; + + if (duration_ms > max_wait_ms) { + timedout = true; + break; + } + } + } + + if (call_duration_ns) { + *call_duration_ns = stop_timer(&wait_host_reply_timer); + } + + if (max_wait_ms && timedout) { + ERROR("timed out waiting for the host answer..."); + return APIR_FORWARD_TIMEOUT; + } + + /* + * Prepare the decoder + */ + static struct apir_decoder response_dec; + response_dec.cur = (char *) gpu->reply_shmem.mmap_ptr + sizeof(*atomic_reply_notif); + response_dec.end = (char *) gpu->reply_shmem.mmap_ptr + gpu->reply_shmem.mmap_size; + *decoder = &response_dec; + + // extract the actual return value from the notif flag + uint32_t returned_value = notif_value - 1; + return returned_value; +} + +static void log_call_duration(long long call_duration_ns, const char *name) { + double call_duration_ms = (double) call_duration_ns / 1e6; // 1 millisecond = 1e6 nanoseconds + double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds + + if (call_duration_s > 1) { + MESSAGE("%s: waited %.2fs for the %s host reply...", __func__, call_duration_s, name); + } else if (call_duration_ms > 1) { + MESSAGE("%s: waited %.2fms for the %s host reply...", __func__, call_duration_ms, name); + } else { + MESSAGE("%s: waited %lldns for the %s host reply...", __func__, call_duration_ns, name); + } +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.h b/ggml/src/ggml-remotingfrontend/virtgpu.h new file mode 100644 index 00000000000..1f445852cd0 --- /dev/null +++ b/ggml/src/ggml-remotingfrontend/virtgpu.h @@ -0,0 +1,101 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "virtgpu-apir.h" +#include "virtgpu-utils.h" +#include "../ggml-remotingbackend/shared/api_remoting.h" +#include "../ggml-remotingbackend/shared/apir_cs.h" + +#include "virtgpu-shm.h" + +#define VIRGL_RENDERER_UNSTABLE_APIS 1 +#include "drm-uapi/virtgpu_drm.h" +#include "apir_hw.h" +#include "venus_hw.h" + +// must match https://gitlab.freedesktop.org/kpouget/virglrenderer/-/blob/main/src/virglrenderer_hw.h?ref_type=heads +enum virgl_renderer_capset { + VIRGL_RENDERER_CAPSET_VIRGL = 1, + VIRGL_RENDERER_CAPSET_VIRGL2 = 2, + /* 3 is reserved for gfxstream */ + VIRGL_RENDERER_CAPSET_VENUS = 4, + /* 5 is reserved for cross-domain */ + VIRGL_RENDERER_CAPSET_DRM = 6, + + VIRGL_RENDERER_CAPSET_APIR = 10, +}; + +#define VENUS_COMMAND_TYPE_LENGTH 331 + +/* from src/virtio/vulkan/vn_renderer_virtgpu.c */ +#define VIRTGPU_PCI_VENDOR_ID 0x1af4 +#define VIRTGPU_PCI_DEVICE_ID 0x1050 +#define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004 +#define VIRTGPU_PARAM_GUEST_VRAM 9 + +#define SHMEM_DATA_SIZE 0x1830000 // 24MiB +#define SHMEM_REPLY_SIZE 0x4000 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +typedef enum virt_gpu_result_t { + APIR_SUCCESS = 0, + APIR_ERROR_INITIALIZATION_FAILED = -1, +} virt_gpu_result_t; + +#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) + +struct virtgpu { + struct remoting_dev_instance *instance; + + bool use_apir_capset; + + int fd; + + struct { + enum virgl_renderer_capset id; + uint32_t version; + struct virgl_renderer_capset_apir data; + } capset; + + struct util_sparse_array shmem_array; + + /* APIR communication pages */ + struct virtgpu_shmem reply_shmem; + struct virtgpu_shmem data_shmem; +}; + + +static inline int +virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args) +{ + return drmIoctl(gpu->fd, request, args); +} + +struct virtgpu *create_virtgpu(); + +struct apir_encoder *remote_call_prepare( + struct virtgpu *gpu, + ApirCommandType apir_cmd_type, + int32_t cmd_flags); + +uint32_t remote_call( + struct virtgpu *gpu, + struct apir_encoder *enc, + struct apir_decoder **dec, + float max_wait_ms, + long long *call_duration_ns + ); + +void remote_call_finish( + struct virtgpu *gpu, + struct apir_encoder *enc, + struct apir_decoder *dec); From 8a8d0673f803a5d606c97c657a3bb99bf5f40c4a Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 9 Jan 2026 14:08:23 +0100 Subject: [PATCH 03/37] ggml-remotingbackend: host-side backend for Virglrenderer APIR component --- ggml/src/ggml-remotingbackend/CMakeLists.txt | 20 + .../apir_cs_ggml-rpc-back.cpp | 118 +++++ .../ggml-remotingbackend/backend-convert.h | 15 + .../backend-dispatched-backend.cpp | 72 +++ .../backend-dispatched-buffer-type.cpp | 96 ++++ .../backend-dispatched-buffer.cpp | 165 +++++++ .../backend-dispatched-device.cpp | 153 ++++++ .../backend-dispatched.cpp | 45 ++ .../backend-dispatched.gen.h | 108 +++++ .../ggml-remotingbackend/backend-dispatched.h | 18 + .../ggml-remotingbackend/backend-internal.h | 22 + .../ggml-remotingbackend/backend-utils.cpp | 0 ggml/src/ggml-remotingbackend/backend-utils.h | 61 +++ ggml/src/ggml-remotingbackend/backend.cpp | 141 ++++++ .../shared/api_remoting.h | 86 ++++ .../shared/apir_backend.gen.h | 36 ++ .../shared/apir_backend.h | 100 ++++ .../src/ggml-remotingbackend/shared/apir_cs.h | 450 ++++++++++++++++++ .../shared/apir_cs_ggml.h | 234 +++++++++ .../ggml-remotingbackend/shared/apir_cs_rpc.h | 45 ++ 20 files changed, 1985 insertions(+) create mode 100644 ggml/src/ggml-remotingbackend/CMakeLists.txt create mode 100644 ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp create mode 100644 ggml/src/ggml-remotingbackend/backend-convert.h create mode 100644 ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp create mode 100644 ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp create mode 100644 ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp create mode 100644 ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp create mode 100644 ggml/src/ggml-remotingbackend/backend-dispatched.cpp create mode 100644 ggml/src/ggml-remotingbackend/backend-dispatched.gen.h create mode 100644 ggml/src/ggml-remotingbackend/backend-dispatched.h create mode 100644 ggml/src/ggml-remotingbackend/backend-internal.h create mode 100644 ggml/src/ggml-remotingbackend/backend-utils.cpp create mode 100644 ggml/src/ggml-remotingbackend/backend-utils.h create mode 100644 ggml/src/ggml-remotingbackend/backend.cpp create mode 100644 ggml/src/ggml-remotingbackend/shared/api_remoting.h create mode 100644 ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h create mode 100644 ggml/src/ggml-remotingbackend/shared/apir_backend.h create mode 100644 ggml/src/ggml-remotingbackend/shared/apir_cs.h create mode 100644 ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h create mode 100644 ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h diff --git a/ggml/src/ggml-remotingbackend/CMakeLists.txt b/ggml/src/ggml-remotingbackend/CMakeLists.txt new file mode 100644 index 00000000000..c96d5332dda --- /dev/null +++ b/ggml/src/ggml-remotingbackend/CMakeLists.txt @@ -0,0 +1,20 @@ +cmake_minimum_required(VERSION 3.19) +cmake_policy(SET CMP0114 NEW) + +message(STATUS "Enable API Remoting backend") + +ggml_add_backend_library(ggml-remotingbackend + backend.cpp + backend-dispatched.cpp + backend-dispatched-backend.cpp + backend-dispatched-device.cpp + backend-dispatched-buffer.cpp + backend-dispatched-buffer-type.cpp + backend-utils.cpp + shared/api_remoting.h + shared/apir_backend.h + shared/apir_cs.h + apir_cs_ggml-rpc-back.cpp + ) + +target_compile_options(ggml-remotingbackend PRIVATE -std=c++20) diff --git a/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp b/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp new file mode 100644 index 00000000000..f4de35564a8 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp @@ -0,0 +1,118 @@ +#include +#include +#include +#include + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "shared/apir_cs_rpc.h" + +std::unordered_set backend_buffers; + +void +apir_track_backend_buffer(ggml_backend_buffer_t buffer) { + backend_buffers.insert(buffer); +} + +bool +apir_untrack_backend_buffer(ggml_backend_buffer_t buffer) { + auto it = backend_buffers.find(buffer); + if (it == backend_buffers.end()) { + return false; + } + + backend_buffers.erase(it); + return true; +} + +std::unordered_set +apir_get_track_backend_buffers() { + return backend_buffers; +} + +ggml_tensor * +apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tensor) { + ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type, + tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); + for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) { + result->nb[i] = tensor->nb[i]; + } + result->buffer = reinterpret_cast(tensor->buffer); + if (result->buffer && backend_buffers.find(result->buffer) == backend_buffers.end()) { + printf("WARNING: HOST BUFFER NOT FOUND | %p\n", (void *)result->buffer); + result->buffer = nullptr; + } + + uint64_t tensor_data = tensor->data; + if (result->buffer) { + // require that the tensor data does not go beyond the buffer end + uint64_t tensor_size = (uint64_t) ggml_nbytes(result); + uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer); + uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer); + + // tensor->data is serialized as an offset to the buffer base address + tensor_data += buffer_start; + + GGML_ASSERT(tensor_data + tensor_size >= tensor_data); // check for overflow + GGML_ASSERT(tensor_data >= buffer_start && tensor_data + tensor_size <= buffer_start + buffer_size); + } + + result->op = (ggml_op) tensor->op; + for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) { + result->op_params[i] = tensor->op_params[i]; + } + result->flags = tensor->flags; + result->data = reinterpret_cast(tensor_data); + ggml_set_name(result, tensor->name); + return result; +} + +ggml_tensor * +apir_create_node(uint64_t id, + struct ggml_context * ctx, + const std::unordered_map & tensor_ptrs, + std::unordered_map & tensor_map) { + if (id == 0) { + return nullptr; + } + if (tensor_map.find(id) != tensor_map.end()) { + return tensor_map[id]; + } + const apir_rpc_tensor * tensor = tensor_ptrs.at(id); + struct ggml_tensor * result = apir_deserialize_tensor(ctx, tensor); + if (result == nullptr) { + return nullptr; + } + tensor_map[id] = result; + for (int i = 0; i < GGML_MAX_SRC; i++) { + result->src[i] = apir_create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map); + } + result->view_src = apir_create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map); + result->view_offs = tensor->view_offs; + return result; +} + +ggml_cgraph * +apir_deserialize_graph(uint32_t n_nodes, uint32_t n_tensors, const apir_rpc_tensor * tensors, const uint64_t * nodes) { + size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); + struct ggml_init_params params = { + /*.mem_size =*/ buf_size, + /*.mem_buffer =*/ NULL, + /*.no_alloc =*/ true, + }; + struct ggml_context * ctx = ggml_init(params); + struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false); + graph->n_nodes = n_nodes; + std::unordered_map tensor_ptrs; + for (uint32_t i = 0; i < n_tensors; i++) { + tensor_ptrs[tensors[i].id] = &tensors[i]; + } + std::unordered_map tensor_map; + for (uint32_t i = 0; i < n_nodes; i++) { + int64_t id; + memcpy(&id, &nodes[i], sizeof(id)); + graph->nodes[i] = apir_create_node(id, ctx, tensor_ptrs, tensor_map); + } + + return graph; +} diff --git a/ggml/src/ggml-remotingbackend/backend-convert.h b/ggml/src/ggml-remotingbackend/backend-convert.h new file mode 100644 index 00000000000..b45c2784160 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-convert.h @@ -0,0 +1,15 @@ +#include "shared/apir_backend.h" + +#define BUFFER_TO_HOST_HANDLE(name) ggml_buffer_to_apir_handle(name) + +static inline apir_buffer_host_handle_t +ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) { + // in the backend, the buffer handle is the buffer pointer + return (apir_buffer_host_handle_t) buffer; +} + +static inline apir_buffer_type_host_handle_t +ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) { + // in the backend, the buffer handle is the buffer pointer + return (apir_buffer_type_host_handle_t) buft; +} diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp new file mode 100644 index 00000000000..ca9cd7a5a2e --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp @@ -0,0 +1,72 @@ +#include +#include "backend-internal.h" +#include "backend-dispatched.h" + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" + +#include "shared/apir_backend.h" + +struct timer_data graph_compute_timer = {0, 0, 0, "compute_timer"}; + +uint32_t +backend_backend_graph_compute(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(enc); + + static bool async_backend_initialized = false; + static bool async_backend; + + if (!async_backend_initialized) { + struct ggml_backend_dev_props props; + + dev->iface.get_props(dev, &props); + async_backend = props.caps.async; + async_backend_initialized = true; + } + + start_timer(&graph_compute_timer); + + uint32_t shmem_res_id; + apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); + + const void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + if (!shmem_data) { + FATAL("Couldn't get the shmem addr from virgl :/"); + } + size_t cgraph_size; + apir_decode_size_t(dec, &cgraph_size); + + struct apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size); + + ggml_cgraph *cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size); + + ggml_status status; +#if APIR_BACKEND_CHECK_SUPPORTS_OP == 1 + for (int idx = 0; idx < cgraph->n_nodes; idx++) { + ggml_tensor *op = ggml_graph_node(cgraph, idx); + if (dev->iface.supports_op(dev, op)) { + continue; + } + ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op)); + + status = GGML_STATUS_ABORTED; + apir_encode_ggml_status(enc, &status); + + stop_timer(&graph_compute_timer); + return 0; + } +#endif + status = bck->iface.graph_compute(bck, cgraph); + + if (async_backend) { + bck->iface.synchronize(bck); + } + + apir_encode_ggml_status(enc, &status); + + stop_timer(&graph_compute_timer); + + return 0; +} diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp new file mode 100644 index 00000000000..4fba10ed623 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp @@ -0,0 +1,96 @@ +#include +#include "backend-internal.h" +#include "backend-dispatched.h" + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" + +uint32_t +backend_buffer_type_get_name(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + ggml_backend_buffer_type_t buft; + buft = apir_decode_ggml_buffer_type(dec); + + const char *string = buft->iface.get_name(buft); + + const size_t string_size = strlen(string) + 1; + apir_encode_array_size(enc, string_size); + apir_encode_char_array(enc, string, string_size); + + return 0; +} + +uint32_t +backend_buffer_type_get_alignment(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + ggml_backend_buffer_type_t buft; + buft = apir_decode_ggml_buffer_type(dec); + + size_t value = buft->iface.get_alignment(buft); + apir_encode_size_t(enc, &value); + + return 0; +} + +uint32_t +backend_buffer_type_get_max_size(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + ggml_backend_buffer_type_t buft; + buft = apir_decode_ggml_buffer_type(dec); + + size_t value = buft->iface.get_max_size(buft); + apir_encode_size_t(enc, &value); + + return 0; +} + +uint32_t +backend_buffer_type_is_host(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + ggml_backend_buffer_type_t buft; + buft = apir_decode_ggml_buffer_type(dec); + + bool is_host = buft->iface.is_host(buft); + apir_encode_bool_t(enc, &is_host); + + return 0; +} + +uint32_t +backend_buffer_type_alloc_buffer(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + + ggml_backend_buffer_type_t buft; + buft = apir_decode_ggml_buffer_type(dec); + + size_t size; + apir_decode_size_t(dec, &size); + + ggml_backend_buffer_t buffer; + + buffer = buft->iface.alloc_buffer(buft, size); + + apir_encode_ggml_buffer(enc, buffer); + + if (buffer) { + apir_track_backend_buffer(buffer); + } + + return 0; +} + +uint32_t +backend_buffer_type_get_alloc_size(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + ggml_backend_buffer_type_t buft; + buft = apir_decode_ggml_buffer_type(dec); + + const ggml_tensor *op = apir_decode_ggml_tensor_inplace(dec); + + size_t value = buft->iface.get_alloc_size(buft, op); + + apir_encode_size_t(enc, &value); + + return 0; +} diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp new file mode 100644 index 00000000000..0228241bf1a --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp @@ -0,0 +1,165 @@ +#include +#include "backend-internal.h" +#include "backend-dispatched.h" + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" + +struct timer_data get_tensor_timer = {0, 0, 0, "get_tensor"}; +struct timer_data set_tensor_timer = {0, 0, 0, "set_tensor"}; +struct timer_data cpy_tensor_timer = {0, 0, 0, "set_tensor"}; + +uint32_t +backend_buffer_get_base(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); + + uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer); + apir_encode_uintptr_t(enc, &base); + + return 0; +} + +uint32_t +backend_buffer_set_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(enc); + + start_timer(&set_tensor_timer); + + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); + + ggml_tensor *tensor; + // safe to remove the const qualifier here + tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec); + + uint32_t shmem_res_id; + apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); + + size_t offset; + apir_decode_size_t(dec, &offset); + + size_t size; + apir_decode_size_t(dec, &size); + + void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + + if (!shmem_data) { + FATAL("Couldn't get the shmem addr from virgl :/"); + } + +#if 0 + INFO("Calling (%p)->set_tensor(tensor=%p, data=%p, offset=%lu, size=%lu", + buffer, tensor, shmem_data, offset, size); +#endif +#if 0 + void **addr = (void **)(uintptr_t) shmem_data; + for (int i = 0; i <= 10; i++) { + INFO("%s: %p | %llx", __func__, addr, *addr); + addr++; + } + INFO("\n"); +#endif + + buffer->iface.set_tensor(buffer, tensor, shmem_data, offset, size); + + stop_timer(&set_tensor_timer); + + return 0; +} + +uint32_t +backend_buffer_get_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(enc); + + start_timer(&get_tensor_timer); + + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); + + + const ggml_tensor *tensor; + // safe to remove the const qualifier here + tensor = apir_decode_ggml_tensor(dec); + + uint32_t shmem_res_id; + apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); + + size_t offset; + apir_decode_size_t(dec, &offset); + + size_t size; + apir_decode_size_t(dec, &size); + + void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + if (!shmem_data) { + FATAL("Couldn't get the shmem addr from virgl :/"); + } + + buffer->iface.get_tensor(buffer, tensor, shmem_data, offset, size); + + stop_timer(&get_tensor_timer); + + return 0; +} + +uint32_t +backend_buffer_cpy_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + + start_timer(&cpy_tensor_timer); + + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); + INFO("%s <---->", __func__); + const ggml_tensor *src; + // safe to remove the const qualifier here + src = apir_decode_ggml_tensor(dec); + ggml_tensor* dst = (ggml_tensor*)(uintptr_t) apir_decode_ggml_tensor(dec); + + bool ret = buffer->iface.cpy_tensor(buffer, src, (ggml_tensor*)dst); + + apir_encode_bool_t(enc, &ret); + + stop_timer(&cpy_tensor_timer); + + return 0; +} + +uint32_t +backend_buffer_clear(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(enc); + + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); + + uint8_t value; + apir_decode_uint8_t(dec, &value); + + buffer->iface.clear(buffer, value); + + return 0; +} + +uint32_t +backend_buffer_free_buffer(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(enc); + + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); + + if (!apir_untrack_backend_buffer(buffer)) { + WARNING("%s: unknown buffer %p", (void *) buffer); + return 1; + } + + buffer->iface.free_buffer(buffer); + + return 0; +} diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp new file mode 100644 index 00000000000..b507832ce16 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp @@ -0,0 +1,153 @@ +#include +#include "backend-internal.h" +#include "backend-dispatched.h" + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" + +uint32_t backend_device_get_device_count(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(ctx); + UNUSED(dec); + + int32_t dev_count = reg->iface.get_device_count(reg); + apir_encode_int32_t(enc, &dev_count); + + return 0; +} + +uint32_t backend_device_get_count(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(ctx); + UNUSED(dec); + + int32_t dev_count = reg->iface.get_device_count(reg); + apir_encode_int32_t(enc, &dev_count); + + return 0; +} + +uint32_t backend_device_get_name(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(dec); + + const char *string = dev->iface.get_name(dev); + + const size_t string_size = strlen(string) + 1; + apir_encode_array_size(enc, string_size); + apir_encode_char_array(enc, string, string_size); + + return 0; +} + +uint32_t +backend_device_get_description(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(dec); + + const char *string = dev->iface.get_description(dev); + + const size_t string_size = strlen(string) + 1; + apir_encode_array_size(enc, string_size); + apir_encode_char_array(enc, string, string_size); + + return 0; +} + +uint32_t +backend_device_get_type(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(dec); + + uint32_t type = dev->iface.get_type(dev); + apir_encode_uint32_t(enc, &type); + + return 0; +} + +uint32_t +backend_device_get_memory(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(dec); + + size_t free, total; + dev->iface.get_memory(dev, &free, &total); + + apir_encode_size_t(enc, &free); + apir_encode_size_t(enc, &total); + + return 0; +} + +uint32_t +backend_device_supports_op(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + + const ggml_tensor *op = apir_decode_ggml_tensor_inplace(dec); + + bool supports_op = dev->iface.supports_op(dev, op); + + apir_encode_bool_t(enc, &supports_op); + + return 0; +} + +uint32_t +backend_device_get_buffer_type(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(dec); + + ggml_backend_buffer_type_t bufft = dev->iface.get_buffer_type(dev); + + apir_encode_ggml_buffer_type(enc, bufft); + + return 0; +} + +uint32_t +backend_device_get_props(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(dec); + + struct ggml_backend_dev_props props; + dev->iface.get_props(dev, &props); + + apir_encode_bool_t(enc, &props.caps.async); + apir_encode_bool_t(enc, &props.caps.host_buffer); + apir_encode_bool_t(enc, &props.caps.buffer_from_host_ptr); + apir_encode_bool_t(enc, &props.caps.events); + + return 0; +} + +uint32_t +backend_device_buffer_from_ptr(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { + UNUSED(ctx); + UNUSED(dec); + + uint32_t shmem_res_id; + apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); + + void *shmem_ptr = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + if (!shmem_ptr) { + FATAL("Couldn't get the shmem addr from virgl :/"); + } + + size_t size; + apir_decode_size_t(dec, &size); + size_t max_tensor_size; + apir_decode_size_t(dec, &max_tensor_size); + + ggml_backend_buffer_t buffer; + buffer = dev->iface.buffer_from_host_ptr(dev, shmem_ptr, size, max_tensor_size); + + apir_encode_ggml_buffer(enc, buffer); + apir_encode_ggml_buffer_type(enc, buffer->buft); + + if (buffer) { + apir_track_backend_buffer(buffer); + } + + return 0; +} diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp new file mode 100644 index 00000000000..fca41e8a7d9 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp @@ -0,0 +1,45 @@ +#include +#include "backend-internal.h" +#include "backend-dispatched.h" + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" + +ggml_backend_reg_t reg = NULL; +ggml_backend_dev_t dev = NULL; +ggml_backend_t bck = NULL; + +long long timer_start = 0; +long long timer_total = 0; +long long timer_count = 0; + +uint32_t backend_dispatch_initialize(void *ggml_backend_reg_fct_p, void *ggml_backend_init_fct_p) { + if (reg != NULL) { + FATAL("%s: already initialized :/", __func__); + } + ggml_backend_reg_t (* ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p; + + reg = ggml_backend_reg_fct(); + if (reg == NULL) { + FATAL("%s: backend registration failed :/", __func__); + } + + if (reg->iface.get_device_count(reg)) { + dev = reg->iface.get_device(reg, 0); + } + + ggml_backend_t (* ggml_backend_fct)(int) = (ggml_backend_t (*)(int)) ggml_backend_init_fct_p; + + bck = ggml_backend_fct(0); + if (!bck) { + ERROR("%s: backend initialization failed :/", __func__); + return APIR_BACKEND_INITIALIZE_BACKEND_FAILED; + } + + size_t free, total; + dev->iface.get_memory(dev, &free, &total); + INFO("%s: free memory: %ld MB", __func__, (size_t) free/1024/1024); + + return APIR_BACKEND_INITIALIZE_SUCCESS; +} diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h b/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h new file mode 100644 index 00000000000..2160cca9f65 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h @@ -0,0 +1,108 @@ +#pragma once + + +/* device */ +uint32_t backend_device_get_device_count(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_get_count(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_get_name(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_get_description(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_get_type(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_get_memory(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_supports_op(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_get_buffer_type(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_get_props(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_buffer_from_ptr(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); + +/* buffer-type */ +uint32_t backend_buffer_type_get_name(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_type_get_alignment(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_type_get_max_size(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_type_is_host(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_type_alloc_buffer(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_type_get_alloc_size(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); + +/* buffer */ +uint32_t backend_buffer_get_base(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_set_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_get_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_cpy_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_clear(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_free_buffer(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); + +/* backend */ +uint32_t backend_backend_graph_compute(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); + +static inline const char *backend_dispatch_command_name(ApirBackendCommandType type) +{ + switch (type) { + /* device */ + case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT: return "backend_device_get_device_count"; + case APIR_COMMAND_TYPE_DEVICE_GET_COUNT: return "backend_device_get_count"; + case APIR_COMMAND_TYPE_DEVICE_GET_NAME: return "backend_device_get_name"; + case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION: return "backend_device_get_description"; + case APIR_COMMAND_TYPE_DEVICE_GET_TYPE: return "backend_device_get_type"; + case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY: return "backend_device_get_memory"; + case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP: return "backend_device_supports_op"; + case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE: return "backend_device_get_buffer_type"; + case APIR_COMMAND_TYPE_DEVICE_GET_PROPS: return "backend_device_get_props"; + case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR: return "backend_device_buffer_from_ptr"; + /* buffer-type */ + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME: return "backend_buffer_type_get_name"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT: return "backend_buffer_type_get_alignment"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE: return "backend_buffer_type_get_max_size"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST: return "backend_buffer_type_is_host"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER: return "backend_buffer_type_alloc_buffer"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE: return "backend_buffer_type_get_alloc_size"; + /* buffer */ + case APIR_COMMAND_TYPE_BUFFER_GET_BASE: return "backend_buffer_get_base"; + case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR: return "backend_buffer_set_tensor"; + case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR: return "backend_buffer_get_tensor"; + case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR: return "backend_buffer_cpy_tensor"; + case APIR_COMMAND_TYPE_BUFFER_CLEAR: return "backend_buffer_clear"; + case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER: return "backend_buffer_free_buffer"; + /* backend */ + case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE: return "backend_backend_graph_compute"; + + default: return "unknown"; + } +} + +extern "C" { +static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = { + + /* device */ + + /* APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = */ backend_device_get_device_count, + /* APIR_COMMAND_TYPE_DEVICE_GET_COUNT = */ backend_device_get_count, + /* APIR_COMMAND_TYPE_DEVICE_GET_NAME = */ backend_device_get_name, + /* APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = */ backend_device_get_description, + /* APIR_COMMAND_TYPE_DEVICE_GET_TYPE = */ backend_device_get_type, + /* APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = */ backend_device_get_memory, + /* APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = */ backend_device_supports_op, + /* APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = */ backend_device_get_buffer_type, + /* APIR_COMMAND_TYPE_DEVICE_GET_PROPS = */ backend_device_get_props, + /* APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = */ backend_device_buffer_from_ptr, + + /* buffer-type */ + + /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size, + + /* buffer */ + + /* APIR_COMMAND_TYPE_BUFFER_GET_BASE = */ backend_buffer_get_base, + /* APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = */ backend_buffer_set_tensor, + /* APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = */ backend_buffer_get_tensor, + /* APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = */ backend_buffer_cpy_tensor, + /* APIR_COMMAND_TYPE_BUFFER_CLEAR = */ backend_buffer_clear, + /* APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = */ backend_buffer_free_buffer, + + /* backend */ + + /* APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = */ backend_backend_graph_compute, +}; +} diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.h b/ggml/src/ggml-remotingbackend/backend-dispatched.h new file mode 100644 index 00000000000..2921c760967 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +#include + +#include "backend-utils.h" +#include "backend-convert.h" +#include "shared/apir_backend.h" +#include "shared/apir_cs.h" +#include "shared/apir_cs_ggml.h" + +typedef uint32_t (*backend_dispatch_t)(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); + +#include "backend-dispatched.gen.h" + +uint32_t backend_dispatch_initialize(void *ggml_backend_reg_fct_p, void *ggml_backend_init_fct_p); diff --git a/ggml/src/ggml-remotingbackend/backend-internal.h b/ggml/src/ggml-remotingbackend/backend-internal.h new file mode 100644 index 00000000000..0b9dcc72364 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-internal.h @@ -0,0 +1,22 @@ +#include +#include +#include + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" +#include "shared/api_remoting.h" + +extern ggml_backend_reg_t reg; +extern ggml_backend_dev_t dev; +extern ggml_backend_t bck; + +extern "C" { + ApirLoadLibraryReturnCode apir_backend_initialize(); + void apir_backend_deinit(void); + uint32_t apir_backend_dispatcher(uint32_t cmd_type, struct virgl_apir_context *ctx, + char *dec_cur, const char *dec_end, + char *enc_cur, const char *enc_end, + char **enc_cur_after); +} + diff --git a/ggml/src/ggml-remotingbackend/backend-utils.cpp b/ggml/src/ggml-remotingbackend/backend-utils.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ggml/src/ggml-remotingbackend/backend-utils.h b/ggml/src/ggml-remotingbackend/backend-utils.h new file mode 100644 index 00000000000..d3abf3a2ace --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend-utils.h @@ -0,0 +1,61 @@ +#pragma once + +#include +#include +#include + +#include + +#define UNUSED GGML_UNUSED +#define APIR_LLAMA_CPP_LOG_TO_FILE_ENV "APIR_LLAMA_CPP_LOG_TO_FILE" + +static FILE * +get_log_dest(void) +{ + static FILE *dest = NULL; + if (dest) { + return dest; + } + const char *apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV); + if (!apir_log_to_file) { + dest = stderr; + return dest; + } + + dest = fopen(apir_log_to_file, "w"); + + return dest; +} + +#define APIR_VA_PRINT(prefix, format) \ + do { \ + FILE *dest = get_log_dest(); \ + fprintf(dest, prefix); \ + va_list argptr; \ + va_start(argptr, format); \ + vfprintf(dest, format, argptr); \ + fprintf(dest, "\n"); \ + va_end(argptr); \ + fflush(dest); \ + } while (0) + +inline void +INFO(const char *format, ...) { + APIR_VA_PRINT("INFO: ", format); +} + +inline void +WARNING(const char *format, ...) { + APIR_VA_PRINT("WARNING: ", format); +} + +inline void +ERROR(const char *format, ...) { + APIR_VA_PRINT("ERROR: ", format); +} + +[[noreturn]] inline void +FATAL(const char *format, ...) { + APIR_VA_PRINT("FATAL: ", format); + abort(); +} diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp new file mode 100644 index 00000000000..0b54b3e74b9 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -0,0 +1,141 @@ +#include +#include + +#include + +#include "backend-utils.h" +#include "backend-internal.h" +#include "backend-dispatched.h" + +#include "shared/api_remoting.h" +#include "shared/apir_backend.h" +#include "shared/apir_cs.h" + +#define GGML_BACKEND_LIBRARY_PATH_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_PATH" +#define GGML_BACKEND_LIBRARY_REG_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_REG" +#define GGML_BACKEND_LIBRARY_INIT_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_INIT" + +static void *backend_library_handle = NULL; + +extern "C" { + void apir_backend_deinit(void) { + auto buffers = apir_get_track_backend_buffers(); + for (const auto& buffer: buffers) { + apir_untrack_backend_buffer(buffer); + buffer->iface.free_buffer(buffer); + } + + if (dev) { + size_t free, total; + dev->iface.get_memory(dev, &free, &total); + INFO("%s: free memory: %ld MB", __func__, (size_t) free/1024/1024); + } + + show_timer(&graph_compute_timer); + show_timer(&set_tensor_timer); + show_timer(&get_tensor_timer); + /* *** */ + + if (backend_library_handle) { + INFO("%s: The GGML backend library was loaded. Unloading it.", __func__); + dlclose(backend_library_handle); + } + + INFO("%s: bye-bye", __func__); + } + + ApirLoadLibraryReturnCode apir_backend_initialize() { + const char* dlsym_error; + + const char* library_name = getenv(GGML_BACKEND_LIBRARY_PATH_ENV); + const char* library_reg = getenv(GGML_BACKEND_LIBRARY_REG_ENV); + const char* library_init = getenv(GGML_BACKEND_LIBRARY_INIT_ENV); + + INFO("%s: loading %s (%s|%s)", __func__, library_name, library_reg, library_init); + + if (!library_name) { + ERROR("cannot open the GGML library: env var '%s' not defined\n", GGML_BACKEND_LIBRARY_PATH_ENV); + + return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; + } + + backend_library_handle = dlopen(library_name, RTLD_LAZY); + + if (!backend_library_handle) { + ERROR("cannot open the GGML library: %s", dlerror()); + + return APIR_LOAD_LIBRARY_CANNOT_OPEN; + } + + if (!library_reg) { + ERROR("cannot register the GGML library: env var '%s' not defined", GGML_BACKEND_LIBRARY_REG_ENV); + + return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; + } + + void *ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg); + dlsym_error = dlerror(); + if (dlsym_error) { + ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s", + library_reg, GGML_BACKEND_LIBRARY_REG_ENV, dlsym_error); + + return APIR_LOAD_LIBRARY_SYMBOL_MISSING; + } + + if (!library_init) { + ERROR("cannot initialize the GGML library: env var '%s' not defined", library_init); + + return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; + } + + void *ggml_backend_init_fct = dlsym(backend_library_handle, library_init); + dlsym_error = dlerror(); + if (dlsym_error) { + ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s", + library_init, GGML_BACKEND_LIBRARY_INIT_ENV, dlsym_error); + + return APIR_LOAD_LIBRARY_SYMBOL_MISSING; + } + + uint32_t ret = backend_dispatch_initialize(ggml_backend_reg_fct, ggml_backend_init_fct); + + return (ApirLoadLibraryReturnCode) (APIR_LOAD_LIBRARY_INIT_BASE_INDEX + ret); + } + + uint32_t apir_backend_dispatcher(uint32_t cmd_type, struct virgl_apir_context *ctx, + char *dec_cur, const char *dec_end, + char *enc_cur, const char *enc_end, + char **enc_cur_after) { + struct apir_encoder _enc = { + .cur = enc_cur, + .start = enc_cur, + .end = enc_end, + }; + struct apir_encoder *enc = &_enc; + + struct apir_decoder _dec = { + .cur = dec_cur, + .end = dec_end, + }; + struct apir_decoder *dec = &_dec; + + + if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { + ERROR("Received an invalid dispatch index (%d >= %d)\n", + cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); + return APIR_BACKEND_FORWARD_INDEX_INVALID; + } + +#if 0 + static long long count = 0; + INFO("[%lld] Calling %s", count, backend_dispatch_command_name((ApirBackendCommandType) cmd_type)); + count += 1; +#endif + backend_dispatch_t forward_fct = apir_backend_dispatch_table[cmd_type]; + uint32_t ret = forward_fct(enc, dec, ctx); + + *enc_cur_after = enc->cur; + + return ret; + } +} diff --git a/ggml/src/ggml-remotingbackend/shared/api_remoting.h b/ggml/src/ggml-remotingbackend/shared/api_remoting.h new file mode 100644 index 00000000000..c7e4ed25145 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/shared/api_remoting.h @@ -0,0 +1,86 @@ +#pragma once + +/* the rest of this file must match virglrenderer/src/apir-protocol.h */ + +#include + +#define APIR_PROTOCOL_MAJOR 0 +#define APIR_PROTOCOL_MINOR 1 + +#define APIR_HANDSHAKE_MAGIC 0xab1e + +typedef enum { + APIR_COMMAND_TYPE_HandShake = 0, + APIR_COMMAND_TYPE_LoadLibrary = 1, + APIR_COMMAND_TYPE_Forward = 2, + + APIR_COMMAND_TYPE_LENGTH = 3, +} ApirCommandType; + +typedef uint64_t ApirCommandFlags; + +typedef enum { + APIR_LOAD_LIBRARY_SUCCESS = 0, + APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1, + APIR_LOAD_LIBRARY_ALREADY_LOADED = 2, + APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3, + APIR_LOAD_LIBRARY_CANNOT_OPEN = 4, + APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5, + APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code +} ApirLoadLibraryReturnCode; + +typedef enum { + APIR_FORWARD_SUCCESS = 0, + APIR_FORWARD_NO_DISPATCH_FCT = 1, + APIR_FORWARD_TIMEOUT = 2, + + APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code +} ApirForwardReturnCode; + +__attribute__((unused)) +static inline const char *apir_command_name(ApirCommandType type) +{ + switch (type) { + case APIR_COMMAND_TYPE_HandShake: return "HandShake"; + case APIR_COMMAND_TYPE_LoadLibrary: return "LoadLibrary"; + case APIR_COMMAND_TYPE_Forward: return "Forward"; + default: return "unknown"; + } +} + +__attribute__((unused)) +static const char *apir_load_library_error(ApirLoadLibraryReturnCode code) { +#define APIR_LOAD_LIBRARY_ERROR(code_name) \ + do { \ + if (code == code_name) return #code_name; \ + } while (0) \ + + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SUCCESS); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ALREADY_LOADED); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ENV_VAR_MISSING); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_CANNOT_OPEN); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SYMBOL_MISSING); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_INIT_BASE_INDEX); + + return "Unknown APIR_COMMAND_TYPE_LoadLibrary error"; + +#undef APIR_LOAD_LIBRARY_ERROR +} + +__attribute__((unused)) +static const char *apir_forward_error(ApirForwardReturnCode code) { +#define APIR_FORWARD_ERROR(code_name) \ + do { \ + if (code == code_name) return #code_name; \ + } while (0) \ + + APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS); + APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT); + APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT); + APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX); + + return "Unknown APIR_COMMAND_TYPE_Forward error"; + +#undef APIR_FORWARD_ERROR +} diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h b/ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h new file mode 100644 index 00000000000..572836db487 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h @@ -0,0 +1,36 @@ +typedef enum ApirBackendCommandType { + + /* device */ + APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = 0, + APIR_COMMAND_TYPE_DEVICE_GET_COUNT = 1, + APIR_COMMAND_TYPE_DEVICE_GET_NAME = 2, + APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = 3, + APIR_COMMAND_TYPE_DEVICE_GET_TYPE = 4, + APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = 5, + APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = 6, + APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = 7, + APIR_COMMAND_TYPE_DEVICE_GET_PROPS = 8, + APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = 9, + + /* buffer-type */ + APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = 10, + APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = 11, + APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = 12, + APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = 13, + APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = 14, + APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = 15, + + /* buffer */ + APIR_COMMAND_TYPE_BUFFER_GET_BASE = 16, + APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = 17, + APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = 18, + APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = 19, + APIR_COMMAND_TYPE_BUFFER_CLEAR = 20, + APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = 21, + + /* backend */ + APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = 22, + + // last command_type index + 1 + APIR_BACKEND_DISPATCH_TABLE_COUNT = 23, +} ApirBackendCommandType; diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.h b/ggml/src/ggml-remotingbackend/shared/apir_backend.h new file mode 100644 index 00000000000..5ba183439d8 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/shared/apir_backend.h @@ -0,0 +1,100 @@ +#pragma once + +#include // for struct timespec, clock_gettime +#include // for uintptr_t + +#include "apir_backend.gen.h" + +#define APIR_BACKEND_INITIALIZE_SUCCESS 0 +#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY 1 +#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY 2 +#define APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS 3 +#define APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS 4 + +#define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5 +// new entries here need to be added to the apir_backend_initialize_error function below + +#define APIR_BACKEND_FORWARD_INDEX_INVALID 6 + +// 0 is fast, 1 avoids the backend to crash if an unsupported tensor is received +#define APIR_BACKEND_CHECK_SUPPORTS_OP 0 + +typedef uintptr_t apir_buffer_type_host_handle_t; +typedef uintptr_t apir_buffer_host_handle_t; + +struct virgl_opaque_context; + +struct virgl_apir_callbacks { + void *(*get_shmem_ptr)(struct virgl_opaque_context *ctx, uint32_t res_id); +}; + +struct virgl_apir_context { + struct virgl_opaque_context *virgl_ctx; + + struct virgl_apir_callbacks iface; +}; + +struct timer_data { + long long start; + long long total; + long long count; + const char *name; +}; + +extern struct timer_data graph_compute_timer; +extern struct timer_data get_tensor_timer; +extern struct timer_data set_tensor_timer; +extern struct timer_data cpy_tensor_timer; +extern struct timer_data wait_host_reply_timer; +extern struct timer_data get_tensor_from_ptr_timer; +extern struct timer_data set_tensor_from_ptr_timer; + +static inline void start_timer(struct timer_data *timer) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + timer->start = (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec; +} + +// returns the duration in ns +static inline long long stop_timer(struct timer_data *timer) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + long long timer_end = (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec; + + long long duration = (timer_end - timer->start); + timer->total += duration; + timer->count += 1; + + return duration; +} + +static inline void show_timer(struct timer_data *timer) { + double ms = timer->total/1000000; + double itl = ms/timer->count; + double speed = 1/itl * 1000; + + if (!timer->total) { + return; + } + + INFO("%15s [%9.0f] ms for %4ld invocations | ITL %2.2f ms | throughput = %4.2f t/s (%4.2f ms/call)", + timer->name, ms, timer->count, itl, speed, ms/timer->count); +} + +static const char *apir_backend_initialize_error(int code) { +#define APIR_BACKEND_INITIALIZE_ERROR(code_name) \ + do { \ + if (code == code_name) return #code_name; \ + } while (0) \ + + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_SUCCESS); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED); + + return "Unknown APIR_BACKEND_INITIALIZE error:/"; + +#undef APIR_BACKEND_INITIALIZE_ERROR +} diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs.h b/ggml/src/ggml-remotingbackend/shared/apir_cs.h new file mode 100644 index 00000000000..22e954305ce --- /dev/null +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs.h @@ -0,0 +1,450 @@ +#pragma once + +#include +#include + +// needs UNUSED to be defined +// needs FATAL to be defined + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +struct apir_encoder { + char* cur; + const char *start; + const char* end; +}; + +struct apir_decoder { + const char* cur; + const char* end; +}; + +/* + * new encoder and decoder + */ + +static struct apir_decoder +apir_new_decoder(const char *ptr, size_t size) { + struct apir_decoder dec = { + .cur = ptr, + .end = ptr + size, + }; + + return dec; +} + +static struct apir_encoder +apir_new_encoder(char *ptr, size_t size) { + struct apir_encoder enc = { + .cur = ptr, + .start = ptr, + .end = ptr + size, + }; + + return enc; +} + +/* + * encode peek + */ + +static inline bool +apir_decoder_peek_internal(const struct apir_decoder *dec, + size_t size, + void *val, + size_t val_size) +{ + assert(val_size <= size); + + if (unlikely(size > (size_t) (dec->end - dec->cur))) { + FATAL("READING TOO MUCH FROM THE DECODER :/"); + //apir_decoder_set_fatal(dec); + memset(val, 0, val_size); + return false; + } + + /* we should not rely on the compiler to optimize away memcpy... */ + memcpy(val, dec->cur, val_size); + return true; +} + +static inline void +apir_decoder_peek(const struct apir_decoder *dec, + size_t size, + void *val, + size_t val_size) +{ + apir_decoder_peek_internal(dec, size, val, val_size); +} + +static inline const void * +apir_decoder_use_inplace(struct apir_decoder *dec, + size_t size) +{ + if (unlikely(size > (size_t) (dec->end - dec->cur))) { + FATAL("READING TOO MUCH FROM THE DECODER :/"); + } + const void *addr = dec->cur; + dec->cur += size; + + return addr; +} + +/* + * read/write + */ + +static inline void +apir_decoder_read(struct apir_decoder *dec, + size_t size, + void *val, + size_t val_size) +{ + if (apir_decoder_peek_internal(dec, size, val, val_size)) + dec->cur += size; +} + +static inline char * +apir_encoder_write(struct apir_encoder *enc, + size_t size, + const void *val, + size_t val_size) +{ + assert(val_size <= size); + assert(size <= ((size_t) (enc->end - enc->cur))); + + char *write_addr = enc->cur; + /* we should not rely on the compiler to optimize away memcpy... */ + memcpy(write_addr, val, val_size); + enc->cur += size; + + return write_addr; +} + +/* + * encode/decode + */ + +static inline void +apir_decode(struct apir_decoder *dec, size_t size, void *data, size_t data_size) +{ + assert(size % 4 == 0); + apir_decoder_read(dec, size, data, data_size); +} + +static inline void +apir_encode(struct apir_encoder *enc, size_t size, const void *data, size_t data_size) +{ + assert(size % 4 == 0); + apir_encoder_write(enc, size, data, data_size); +} + +/* + * typed encode/decode + */ + +/* uint8_t */ + +static inline void +apir_encode_uint8_t(struct apir_encoder *enc, const uint8_t *val) +{ + apir_encode(enc, sizeof(int), val, sizeof(*val)); +} + +static inline void +apir_decode_uint8_t(struct apir_decoder *dec, uint8_t *val) +{ + apir_decode(dec, sizeof(int), val, sizeof(*val)); +} + +/* uint64_t */ + +static inline void +apir_encode_uint64_t(struct apir_encoder *enc, const uint64_t *val) +{ + apir_encode(enc, 8, val, sizeof(*val)); +} + +static inline void +apir_decode_uint64_t(struct apir_decoder *dec, uint64_t *val) +{ + apir_decode(dec, 8, val, sizeof(*val)); +} + +static inline void +apir_encode_uint64_t_array(struct apir_encoder *enc, const uint64_t *val, uint32_t count) +{ + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_encode(enc, size, val, size); +} + +static inline void +apir_decode_uint64_t_array(struct apir_decoder *dec, uint64_t *val, uint32_t count) +{ + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_decode(dec, size, val, size); +} + +static inline const uint64_t * +apir_decode_uint64_t_array_inplace(struct apir_decoder *dec, uint32_t count) +{ + return (uint64_t *)(uintptr_t) apir_decoder_use_inplace(dec, count * sizeof(uint64_t)); +} + +/* int32_t */ + +static inline void +apir_encode_int32_t(struct apir_encoder *enc, const int32_t *val) +{ + apir_encode(enc, 4, val, sizeof(*val)); +} + +static inline void +apir_decode_int32_t(struct apir_decoder *dec, int32_t *val) +{ + apir_decode(dec, 4, val, sizeof(*val)); +} + +static inline void +apir_encode_int32_t_array(struct apir_encoder *enc, const int32_t *val, uint32_t count) +{ + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_encode(enc, size, val, size); +} + +static inline void +apir_decode_int32_t_array(struct apir_decoder *dec, int32_t *val, uint32_t count) +{ + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_decode(dec, size, val, size); +} + +/* array size (uint64_t) */ + +static inline void +apir_encode_array_size(struct apir_encoder *enc, uint64_t size) +{ + apir_encode_uint64_t(enc, &size); +} + +static inline uint64_t +apir_decode_array_size(struct apir_decoder *dec, uint64_t expected_size) +{ + uint64_t size; + apir_decode_uint64_t(dec, &size); + if (size != expected_size) { + FATAL("ENCODER IS FULL :/"); + size = 0; + } + return size; +} + +static inline uint64_t +apir_decode_array_size_unchecked(struct apir_decoder *dec) +{ + uint64_t size; + apir_decode_uint64_t(dec, &size); + return size; +} + +/* non-array pointer */ + +static inline bool +apir_encode_simple_pointer(struct apir_encoder *enc, const void *val) +{ + apir_encode_array_size(enc, val ? 1 : 0); + return val; +} + +static inline bool +apir_decode_simple_pointer(struct apir_decoder *dec) +{ + return apir_decode_array_size_unchecked(dec); +} + +/* uint32_t */ + +static inline void +apir_encode_uint32_t(struct apir_encoder *enc, const uint32_t *val) +{ + apir_encode(enc, 4, val, sizeof(*val)); +} + +static inline void +apir_decode_uint32_t(struct apir_decoder *dec, uint32_t *val) +{ + apir_decode(dec, 4, val, sizeof(*val)); +} + +static inline void +apir_encode_uint32_t_array(struct apir_encoder *enc, const uint32_t *val, uint32_t count) +{ + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_encode(enc, size, val, size); +} + +static inline void +apir_decode_uint32_t_array(struct apir_decoder *dec, uint32_t *val, uint32_t count) +{ + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_decode(dec, size, val, size); +} + +/* size_t */ + +static inline void +apir_encode_size_t(struct apir_encoder *enc, const size_t *val) +{ + const uint64_t tmp = *val; + apir_encode_uint64_t(enc, &tmp); +} + +static inline void +apir_decode_size_t(struct apir_decoder *dec, size_t *val) +{ + uint64_t tmp; + apir_decode_uint64_t(dec, &tmp); + *val = tmp; +} + +static inline void +apir_encode_size_t_array(struct apir_encoder *enc, const size_t *val, uint32_t count) +{ + if (sizeof(size_t) == sizeof(uint64_t)) { + apir_encode_uint64_t_array(enc, (const uint64_t *)val, count); + } else { + for (uint32_t i = 0; i < count; i++) + apir_encode_size_t(enc, &val[i]); + } +} + +static inline void +apir_decode_size_t_array(struct apir_decoder *dec, size_t *val, uint32_t count) +{ + if (sizeof(size_t) == sizeof(uint64_t)) { + apir_decode_uint64_t_array(dec, (uint64_t *)val, count); + } else { + for (uint32_t i = 0; i < count; i++) + apir_decode_size_t(dec, &val[i]); + } +} + +/* opaque blob */ + +static inline void +apir_encode_blob_array(struct apir_encoder *enc, const void *val, size_t size) +{ + apir_encode(enc, (size + 3) & ~3, val, size); +} + +static inline void +apir_decode_blob_array(struct apir_decoder *dec, void *val, size_t size) +{ + apir_decode(dec, (size + 3) & ~3, val, size); +} + +/* string */ + +static inline void +apir_encode_char_array(struct apir_encoder *enc, const char *val, size_t size) +{ + assert(size && strlen(val) < size); + apir_encode_blob_array(enc, val, size); +} + +static inline void +apir_decode_char_array(struct apir_decoder *dec, char *val, size_t size) +{ + apir_decode_blob_array(dec, val, size); + if (size) + val[size - 1] = '\0'; + else { + //apir_decoder_set_fatal(dec); + FATAL("Couldn't decode the blog array"); + } +} + +/* (temp) buffer allocation */ + +static inline void * +_apir_decoder_alloc_array(struct apir_decoder *dec, size_t size, size_t count) +{ + UNUSED(dec); + size_t alloc_size; + if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { + FATAL("overflow in array allocation of %zu * %zu bytes", size, count); + return NULL; + } + + return malloc(alloc_size); +} + +static inline void * +apir_decoder_alloc_array(struct apir_decoder *dec, size_t size, size_t count) +{ + struct apir_decoder *d = (struct apir_decoder *)dec; + return _apir_decoder_alloc_array(d, size, count); +} + +/* bool */ + +static inline void +apir_encode_bool_t(struct apir_encoder *enc, const bool *val) +{ + apir_encode(enc, sizeof(int), val, sizeof(bool)); +} + +static inline void +apir_decode_bool_t(struct apir_decoder *dec, bool *val) +{ + apir_decode(dec, sizeof(int), val, sizeof(bool)); +} + +/* apir_buffer_type_host_handle_t */ + +static inline void +apir_encode_apir_buffer_type_host_handle_t(struct apir_encoder *enc, const apir_buffer_type_host_handle_t *val) +{ + apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); +} + +static inline void +apir_decode_apir_buffer_type_host_handle_t(struct apir_decoder *dec, apir_buffer_type_host_handle_t *val) +{ + apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); +} + +/* apir_buffer_host_handle_t */ + +static inline void +apir_encode_apir_buffer_host_handle_t(struct apir_encoder *enc, const apir_buffer_host_handle_t *val) +{ + apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); +} + +static inline void +apir_decode_apir_buffer_host_handle_t(struct apir_decoder *dec, apir_buffer_host_handle_t *val) +{ + apir_decode(dec, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); +} + +/* uintptr_t */ + +static inline void +apir_encode_uintptr_t(struct apir_encoder *enc, const uintptr_t *val) +{ + apir_encode(enc, sizeof(*val), val, sizeof(*val)); +} + +static inline void +apir_decode_uintptr_t(struct apir_decoder *dec, uintptr_t *val) +{ + apir_decode(dec, sizeof(*val), val, sizeof(*val)); +} diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h new file mode 100644 index 00000000000..afc551d76d3 --- /dev/null +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h @@ -0,0 +1,234 @@ +// needs the ggml-backend-impl.h definition + +#include "apir_cs.h" +#include "apir_cs_rpc.h" + +// ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer); + +static inline void +apir_encode_ggml_buffer_host_handle(struct apir_encoder *enc, const apir_buffer_host_handle_t *handle); + +static inline ggml_backend_buffer_t +apir_decode_ggml_buffer(struct apir_decoder *dec); + +/* apir_rpc_tensor */ + +static inline void +apir_encode_rcp_tensor(struct apir_encoder *enc, const apir_rpc_tensor *apir_rpc_tensor) { + size_t apir_rpc_tensor_size = sizeof(*apir_rpc_tensor); + apir_encode(enc, apir_rpc_tensor_size, apir_rpc_tensor, apir_rpc_tensor_size); +} + +static inline apir_rpc_tensor * +apir_decode_apir_rpc_tensor_inplace(struct apir_decoder *dec) { + size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor); + + return (apir_rpc_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); +} + +static inline apir_rpc_tensor * +apir_decode_apir_rpc_tensor_array_inplace(struct apir_decoder *dec, uint32_t n_tensors) { + size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors; + + return (apir_rpc_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); +} + +/* ggml_tensor */ + +static inline void +apir_encode_ggml_tensor(struct apir_encoder *enc, const ggml_tensor *tensor) { + apir_rpc_tensor serialized = apir_serialize_tensor(tensor); + + apir_encode_rcp_tensor(enc, &serialized); +} + +static inline const ggml_tensor * +apir_decode_ggml_tensor(struct apir_decoder *dec) { + const apir_rpc_tensor *apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec); + struct ggml_init_params params { + /*.mem_size =*/ ggml_tensor_overhead(), + /*.mem_buffer =*/ NULL, + /*.no_alloc =*/ true, + }; + struct ggml_context * ctx = ggml_init(params); + + const ggml_tensor *tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor); + + return tensor; +} + +/* *** ggml_backend_buffer_type_t *** */ + +// ggml_backend_buffer_type_t is a POINTER (to a struct). +// Only the host pointer is shared between the host and guest. +// The guest stores it in `buft->context`. +// The host simply writes the pointer address in the buffer variable. + + +static inline void +apir_encode_ggml_buffer_type(struct apir_encoder *enc, ggml_backend_buffer_type_t buft) { + apir_buffer_type_host_handle_t handle = ggml_buffer_type_to_apir_handle(buft); + apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); +} + +static inline ggml_backend_buffer_type_t +apir_decode_ggml_buffer_type(struct apir_decoder *dec) { + apir_buffer_type_host_handle_t handle; + + apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle)); + + return (ggml_backend_buffer_type_t) handle; +} + +static inline apir_buffer_type_host_handle_t +apir_decode_apir_buffer_type_host_handle(struct apir_decoder *dec) { + apir_buffer_type_host_handle_t handle; + + apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle)); + + return handle; +} + +/* *** ggml_backend_type_t *** */ + +// ggml_backend_buffer_t is a POINTER. +// same logic as for ggml_backend_buffer_type_t + +static inline void +apir_encode_ggml_buffer(struct apir_encoder *enc, const ggml_backend_buffer_t buffer) { + apir_buffer_host_handle_t handle = BUFFER_TO_HOST_HANDLE(buffer); + apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); +} + +static inline ggml_backend_buffer_t +apir_decode_ggml_buffer(struct apir_decoder *dec) { + ggml_backend_buffer_t buffer; + size_t buffer_ptr_size = sizeof(buffer); + + apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size); + + return buffer; +} + +/* enum ggml_status */ + +static inline void +apir_encode_ggml_status(struct apir_encoder *enc, const enum ggml_status *status) { + apir_encoder_write(enc, sizeof(*status), status, sizeof(*status)); +} + +static inline void +apir_decode_ggml_status(struct apir_decoder *dec, enum ggml_status *status) { + apir_decoder_read(dec, sizeof(*status), status, sizeof(*status)); +} + +/* virtgpu_shmem */ + +static inline void +apir_encode_virtgpu_shmem_res_id(struct apir_encoder *enc, uint32_t shmem_res_id) { + apir_encode_uint32_t(enc, &shmem_res_id); +} + +static inline void +apir_decode_virtgpu_shmem_res_id(struct apir_decoder *dec, uint32_t *shmem_res_id) { + apir_decode_uint32_t(dec, shmem_res_id); +} + +/* ggml_cgraph */ + +static inline size_t +apir_serialize_ggml_cgraph(ggml_cgraph *cgraph, std::vector & cgraph_data) { + apir_serialize_graph(cgraph, cgraph_data); + + return cgraph_data.size(); +} + +static inline void +apir_encode_cgraph_data(struct apir_encoder *enc, std::vector & cgraph_data) { + size_t cgraph_size = cgraph_data.size(); + + apir_encode(enc, cgraph_size, cgraph_data.data(), cgraph_size); +} + +static inline ggml_cgraph * +apir_decode_ggml_cgraph(struct apir_decoder *dec, size_t cgraph_size) { + UNUSED(cgraph_size); + + uint32_t n_nodes; + apir_decode_uint32_t(dec, &n_nodes); + const uint64_t * nodes = apir_decode_uint64_t_array_inplace(dec, n_nodes); + + uint32_t n_tensors; + apir_decode_uint32_t(dec, &n_tensors); + const apir_rpc_tensor *tensors = apir_decode_apir_rpc_tensor_array_inplace(dec, n_tensors); + + return apir_deserialize_graph(n_nodes, n_tensors, tensors, nodes); +} + +static inline void +apir_encode_ggml_buffer_handle(struct apir_encoder *enc, const apir_buffer_host_handle_t *handle) { + apir_encoder_write(enc, sizeof(*handle), &handle, sizeof(*handle)); +} + +static inline void +apir_encode_ggml_tensor_inline(struct apir_encoder *enc, const ggml_tensor *tensor) { + size_t tensor_size = sizeof(*tensor); + + if (tensor->extra) { + FATAL("Cannot pass tensors with extra"); + } + + if (tensor->src[0] && tensor->buffer) { + static int first = 1; + if (first) { + WARNING("Cannot pass tensors with src and buffer"); + first = 0; + } + } + + apir_encoder_write(enc, tensor_size, tensor, tensor_size); + + // tensor->data is a pointer inside the device buffer. No need to touch it + // tensor->buffer is a pointer to a buffer. Encoding the buffer handle in sequence. + // (could also make a copy of the tensor, and update locally.) + + if (tensor->buffer) { + apir_buffer_host_handle_t buffer_handle = ggml_buffer_to_apir_handle(tensor->buffer); + apir_encode_ggml_buffer_handle(enc, &buffer_handle); + } + + if (tensor->view_src) { + apir_encoder_write(enc, tensor_size, tensor->view_src, tensor_size); + } + + for (int i = 0; tensor->src[i]; i++) { + const ggml_tensor *tensor_src = tensor->src[i]; + apir_encoder_write(enc, tensor_size, tensor_src, tensor_size); + } +} + +static inline const ggml_tensor * +apir_decode_ggml_tensor_inplace(struct apir_decoder *dec) { + + // it safe to remove the `const` qualifier here, we *do* want to + // modify the shared memory data to fix the `src` pointers. + ggml_tensor *tensor = (ggml_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); + + // tensor->data is a pointer inside the device buffer. No need to touch it + // tensor->buffer is a pointer to a buffer. Decode the buffer handle encoded in sequence. + if (tensor->buffer) { + tensor->buffer = apir_decode_ggml_buffer(dec); + } + + if (tensor->view_src) { + ggml_tensor *tensor_view_src = (ggml_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); + tensor->view_src = tensor_view_src; + } + + for (int i = 0; tensor->src[i]; i++) { + ggml_tensor *tensor_src = (ggml_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); + tensor->src[i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor + } + + return tensor; +} diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h new file mode 100644 index 00000000000..a92b28317cd --- /dev/null +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h @@ -0,0 +1,45 @@ +#include +#include +#include + +// ggml_tensor is serialized into apir_rpc_tensor +struct apir_rpc_tensor { + uint64_t id; + uint32_t type; + uint64_t buffer; + uint32_t ne[GGML_MAX_DIMS]; + uint32_t nb[GGML_MAX_DIMS]; + uint32_t op; + int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)]; + int32_t flags; + uint64_t src[GGML_MAX_SRC]; + uint64_t view_src; + uint64_t view_offs; + uint64_t data; + char name[GGML_MAX_NAME]; + + char padding[4]; +}; + +/* frontend */ + +apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor); + +void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector & output); + +/* backend */ + +void apir_track_backend_buffer(ggml_backend_buffer_t buffer); +bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer); +std::unordered_set apir_get_track_backend_buffers(); + +void apir_add_tensor(ggml_tensor * tensor, std::vector & tensors, std::unordered_set & visited); + +ggml_tensor *apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tensor); + +ggml_tensor *apir_create_node(uint64_t id, + struct ggml_context * ctx, + const std::unordered_map & tensor_ptrs, + std::unordered_map & tensor_map); + +ggml_cgraph *apir_deserialize_graph(uint32_t n_nodes, uint32_t n_tensors, const apir_rpc_tensor * tensors, const uint64_t * nodes); From 61715cd01cd2385099e65a2fbcf6b99ed5e55453 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 9 Jan 2026 14:09:01 +0100 Subject: [PATCH 04/37] ggml: disable Vulkan backend loading with GGML_DISABLE_VULKAN This flag allows disabling the ggml-vulkan backend at runtime. This is necessary for the API Remoting support, as the API Remoting frontend (`ggml-remotingfrontend`) relies on the same device file as `ggml-vulkan`, when running inside a Virtual Machine. This runtime disable flag allows enabling the compilation of both `ggml-vulkan` and `ggml-remotingfrontend`, while selecting at runtime which one should be activated. --- ggml/src/ggml-backend-reg.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index e4d2004c123..087a228ece2 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -200,7 +200,12 @@ struct ggml_backend_registry { register_backend(ggml_backend_sycl_reg()); #endif #ifdef GGML_USE_VULKAN + // Add runtime disable check + if (getenv("GGML_DISABLE_VULKAN") == nullptr) { register_backend(ggml_backend_vk_reg()); + } else { + GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n"); + } #endif #ifdef GGML_USE_WEBGPU register_backend(ggml_backend_webgpu_reg()); From 0464ca39e8a2845d9a7a2691a341abd5951e00ed Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 12 Jan 2026 09:00:24 +0100 Subject: [PATCH 05/37] CMakePresets.json: don't expose presets for the API Remoting backends --- CMakePresets.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/CMakePresets.json b/CMakePresets.json index 77c654089ab..b5afeb3c0f2 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -30,8 +30,6 @@ { "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } }, { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } }, { "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } }, - { "name": "remoting_frontend", "hidden": true, "cacheVariables": { "GGML_REMOTING_FRONTEND": "ON" } }, - { "name": "remoting_backend", "hidden": true, "cacheVariables": { "GGML_REMOTING_BACKEND": "ON" } }, { "name": "x64-windows-llvm", "hidden": true, From 350e94c8e51ac866e2c9c3c5838578aa33657191 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 12 Jan 2026 09:01:10 +0100 Subject: [PATCH 06/37] backend-utils.cpp: remove unused file --- ggml/src/ggml-remotingbackend/CMakeLists.txt | 1 - ggml/src/ggml-remotingbackend/backend-utils.cpp | 0 2 files changed, 1 deletion(-) delete mode 100644 ggml/src/ggml-remotingbackend/backend-utils.cpp diff --git a/ggml/src/ggml-remotingbackend/CMakeLists.txt b/ggml/src/ggml-remotingbackend/CMakeLists.txt index c96d5332dda..d01ec8b1b0f 100644 --- a/ggml/src/ggml-remotingbackend/CMakeLists.txt +++ b/ggml/src/ggml-remotingbackend/CMakeLists.txt @@ -10,7 +10,6 @@ ggml_add_backend_library(ggml-remotingbackend backend-dispatched-device.cpp backend-dispatched-buffer.cpp backend-dispatched-buffer-type.cpp - backend-utils.cpp shared/api_remoting.h shared/apir_backend.h shared/apir_cs.h diff --git a/ggml/src/ggml-remotingbackend/backend-utils.cpp b/ggml/src/ggml-remotingbackend/backend-utils.cpp deleted file mode 100644 index e69de29bb2d..00000000000 From dd518effaab92e9da6c229abb764725db8120693 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 12 Jan 2026 09:43:47 +0100 Subject: [PATCH 07/37] Update the indentation with clang-format And fix the include order afterwards ... --- .../apir_cs_ggml-rpc-back.cpp | 75 ++- .../ggml-remotingbackend/backend-convert.h | 14 +- .../backend-dispatched-backend.cpp | 98 ++-- .../backend-dispatched-buffer-type.cpp | 42 +- .../backend-dispatched-buffer.cpp | 199 ++++---- .../backend-dispatched-device.cpp | 63 ++- .../backend-dispatched.cpp | 53 ++- .../backend-dispatched.gen.h | 228 +++++---- .../ggml-remotingbackend/backend-dispatched.h | 7 +- .../ggml-remotingbackend/backend-internal.h | 28 +- ggml/src/ggml-remotingbackend/backend-utils.h | 85 ++-- ggml/src/ggml-remotingbackend/backend.cpp | 122 ++--- .../shared/api_remoting.h | 90 ++-- .../shared/apir_backend.gen.h | 58 +-- .../shared/apir_backend.h | 97 ++-- .../src/ggml-remotingbackend/shared/apir_cs.h | 447 +++++++----------- .../shared/apir_cs_ggml.h | 104 ++-- .../ggml-remotingbackend/shared/apir_cs_rpc.h | 61 ++- .../apir_cs_ggml-rpc-front.cpp | 48 +- .../ggml-backend-buffer-type.cpp | 53 +-- .../ggml-backend-buffer.cpp | 71 +-- .../ggml-backend-device.cpp | 88 ++-- .../ggml-backend-reg.cpp | 36 +- .../ggml-remotingfrontend/ggml-backend.cpp | 72 +-- .../ggml-remoting-frontend.cpp | 26 +- .../src/ggml-remotingfrontend/ggml-remoting.h | 67 ++- ggml/src/ggml-remotingfrontend/virtgpu-apir.h | 14 +- .../virtgpu-forward-backend.cpp | 23 +- .../virtgpu-forward-buffer-type.cpp | 153 +++--- .../virtgpu-forward-buffer.cpp | 79 ++-- .../virtgpu-forward-device.cpp | 87 ++-- .../virtgpu-forward-impl.h | 44 +- .../virtgpu-forward.gen.h | 65 ++- .../src/ggml-remotingfrontend/virtgpu-shm.cpp | 150 +++--- ggml/src/ggml-remotingfrontend/virtgpu-shm.h | 19 +- .../ggml-remotingfrontend/virtgpu-utils.cpp | 281 ++++++----- .../src/ggml-remotingfrontend/virtgpu-utils.h | 143 +++--- ggml/src/ggml-remotingfrontend/virtgpu.cpp | 265 +++++------ ggml/src/ggml-remotingfrontend/virtgpu.h | 88 ++-- 39 files changed, 1818 insertions(+), 1925 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp b/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp index f4de35564a8..7d1088f2526 100644 --- a/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp +++ b/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp @@ -1,21 +1,19 @@ -#include -#include -#include -#include - -#include "ggml-impl.h" #include "ggml-backend-impl.h" +#include "ggml-impl.h" #include "shared/apir_cs_rpc.h" +#include +#include +#include +#include + std::unordered_set backend_buffers; -void -apir_track_backend_buffer(ggml_backend_buffer_t buffer) { +void apir_track_backend_buffer(ggml_backend_buffer_t buffer) { backend_buffers.insert(buffer); } -bool -apir_untrack_backend_buffer(ggml_backend_buffer_t buffer) { +bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer) { auto it = backend_buffers.find(buffer); if (it == backend_buffers.end()) { return false; @@ -25,35 +23,33 @@ apir_untrack_backend_buffer(ggml_backend_buffer_t buffer) { return true; } -std::unordered_set -apir_get_track_backend_buffers() { +std::unordered_set apir_get_track_backend_buffers() { return backend_buffers; } -ggml_tensor * -apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tensor) { - ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type, - tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); +ggml_tensor * apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tensor) { + ggml_tensor * result = + ggml_new_tensor_4d(ctx, (ggml_type) tensor->type, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) { result->nb[i] = tensor->nb[i]; } result->buffer = reinterpret_cast(tensor->buffer); if (result->buffer && backend_buffers.find(result->buffer) == backend_buffers.end()) { - printf("WARNING: HOST BUFFER NOT FOUND | %p\n", (void *)result->buffer); + printf("WARNING: HOST BUFFER NOT FOUND | %p\n", (void *) result->buffer); result->buffer = nullptr; } uint64_t tensor_data = tensor->data; if (result->buffer) { // require that the tensor data does not go beyond the buffer end - uint64_t tensor_size = (uint64_t) ggml_nbytes(result); + uint64_t tensor_size = (uint64_t) ggml_nbytes(result); uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer); - uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer); + uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer); // tensor->data is serialized as an offset to the buffer base address tensor_data += buffer_start; - GGML_ASSERT(tensor_data + tensor_size >= tensor_data); // check for overflow + GGML_ASSERT(tensor_data + tensor_size >= tensor_data); // check for overflow GGML_ASSERT(tensor_data >= buffer_start && tensor_data + tensor_size <= buffer_start + buffer_size); } @@ -62,16 +58,15 @@ apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tenso result->op_params[i] = tensor->op_params[i]; } result->flags = tensor->flags; - result->data = reinterpret_cast(tensor_data); + result->data = reinterpret_cast(tensor_data); ggml_set_name(result, tensor->name); return result; } -ggml_tensor * -apir_create_node(uint64_t id, - struct ggml_context * ctx, - const std::unordered_map & tensor_ptrs, - std::unordered_map & tensor_map) { +ggml_tensor * apir_create_node(uint64_t id, + struct ggml_context * ctx, + const std::unordered_map & tensor_ptrs, + std::unordered_map & tensor_map) { if (id == 0) { return nullptr; } @@ -79,7 +74,7 @@ apir_create_node(uint64_t id, return tensor_map[id]; } const apir_rpc_tensor * tensor = tensor_ptrs.at(id); - struct ggml_tensor * result = apir_deserialize_tensor(ctx, tensor); + struct ggml_tensor * result = apir_deserialize_tensor(ctx, tensor); if (result == nullptr) { return nullptr; } @@ -87,27 +82,29 @@ apir_create_node(uint64_t id, for (int i = 0; i < GGML_MAX_SRC; i++) { result->src[i] = apir_create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map); } - result->view_src = apir_create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map); + result->view_src = apir_create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map); result->view_offs = tensor->view_offs; return result; } -ggml_cgraph * -apir_deserialize_graph(uint32_t n_nodes, uint32_t n_tensors, const apir_rpc_tensor * tensors, const uint64_t * nodes) { - size_t buf_size = ggml_tensor_overhead()*(n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); +ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes, + uint32_t n_tensors, + const apir_rpc_tensor * tensors, + const uint64_t * nodes) { + size_t buf_size = ggml_tensor_overhead() * (n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); struct ggml_init_params params = { - /*.mem_size =*/ buf_size, - /*.mem_buffer =*/ NULL, - /*.no_alloc =*/ true, + /*.mem_size =*/buf_size, + /*.mem_buffer =*/NULL, + /*.no_alloc =*/true, }; - struct ggml_context * ctx = ggml_init(params); - struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false); - graph->n_nodes = n_nodes; - std::unordered_map tensor_ptrs; + struct ggml_context * ctx = ggml_init(params); + struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false); + graph->n_nodes = n_nodes; + std::unordered_map tensor_ptrs; for (uint32_t i = 0; i < n_tensors; i++) { tensor_ptrs[tensors[i].id] = &tensors[i]; } - std::unordered_map tensor_map; + std::unordered_map tensor_map; for (uint32_t i = 0; i < n_nodes; i++) { int64_t id; memcpy(&id, &nodes[i], sizeof(id)); diff --git a/ggml/src/ggml-remotingbackend/backend-convert.h b/ggml/src/ggml-remotingbackend/backend-convert.h index b45c2784160..1978d21f7ef 100644 --- a/ggml/src/ggml-remotingbackend/backend-convert.h +++ b/ggml/src/ggml-remotingbackend/backend-convert.h @@ -2,14 +2,12 @@ #define BUFFER_TO_HOST_HANDLE(name) ggml_buffer_to_apir_handle(name) -static inline apir_buffer_host_handle_t -ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) { - // in the backend, the buffer handle is the buffer pointer - return (apir_buffer_host_handle_t) buffer; +static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) { + // in the backend, the buffer handle is the buffer pointer + return (apir_buffer_host_handle_t) buffer; } -static inline apir_buffer_type_host_handle_t -ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) { - // in the backend, the buffer handle is the buffer pointer - return (apir_buffer_type_host_handle_t) buft; +static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) { + // in the backend, the buffer handle is the buffer pointer + return (apir_buffer_type_host_handle_t) buft; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp index ca9cd7a5a2e..22d2920e110 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp @@ -1,72 +1,72 @@ -#include -#include "backend-internal.h" #include "backend-dispatched.h" - -#include "ggml-impl.h" +#include "backend-internal.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" - +#include "ggml-impl.h" #include "shared/apir_backend.h" -struct timer_data graph_compute_timer = {0, 0, 0, "compute_timer"}; +#include -uint32_t -backend_backend_graph_compute(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { - UNUSED(ctx); - UNUSED(enc); +struct timer_data graph_compute_timer = { 0, 0, 0, "compute_timer" }; - static bool async_backend_initialized = false; - static bool async_backend; +uint32_t backend_backend_graph_compute(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { + UNUSED(ctx); + UNUSED(enc); - if (!async_backend_initialized) { - struct ggml_backend_dev_props props; + static bool async_backend_initialized = false; + static bool async_backend; - dev->iface.get_props(dev, &props); - async_backend = props.caps.async; - async_backend_initialized = true; - } + if (!async_backend_initialized) { + struct ggml_backend_dev_props props; + + dev->iface.get_props(dev, &props); + async_backend = props.caps.async; + async_backend_initialized = true; + } - start_timer(&graph_compute_timer); + start_timer(&graph_compute_timer); - uint32_t shmem_res_id; - apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); + uint32_t shmem_res_id; + apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); - const void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); - if (!shmem_data) { - FATAL("Couldn't get the shmem addr from virgl :/"); - } - size_t cgraph_size; - apir_decode_size_t(dec, &cgraph_size); + const void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + if (!shmem_data) { + FATAL("Couldn't get the shmem addr from virgl :/"); + } + size_t cgraph_size; + apir_decode_size_t(dec, &cgraph_size); - struct apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size); + struct apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size); - ggml_cgraph *cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size); + ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size); - ggml_status status; + ggml_status status; #if APIR_BACKEND_CHECK_SUPPORTS_OP == 1 - for (int idx = 0; idx < cgraph->n_nodes; idx++) { - ggml_tensor *op = ggml_graph_node(cgraph, idx); - if (dev->iface.supports_op(dev, op)) { - continue; + for (int idx = 0; idx < cgraph->n_nodes; idx++) { + ggml_tensor * op = ggml_graph_node(cgraph, idx); + if (dev->iface.supports_op(dev, op)) { + continue; + } + ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op)); + + status = GGML_STATUS_ABORTED; + apir_encode_ggml_status(enc, &status); + + stop_timer(&graph_compute_timer); + return 0; } - ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op)); - - status = GGML_STATUS_ABORTED; - apir_encode_ggml_status(enc, &status); - - stop_timer(&graph_compute_timer); - return 0; - } #endif - status = bck->iface.graph_compute(bck, cgraph); + status = bck->iface.graph_compute(bck, cgraph); - if (async_backend) { - bck->iface.synchronize(bck); - } + if (async_backend) { + bck->iface.synchronize(bck); + } - apir_encode_ggml_status(enc, &status); + apir_encode_ggml_status(enc, &status); - stop_timer(&graph_compute_timer); + stop_timer(&graph_compute_timer); - return 0; + return 0; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp index 4fba10ed623..e05f4f87f50 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp @@ -1,18 +1,19 @@ -#include -#include "backend-internal.h" #include "backend-dispatched.h" - -#include "ggml-impl.h" +#include "backend-internal.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" +#include "ggml-impl.h" + +#include -uint32_t -backend_buffer_type_get_name(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_buffer_type_get_name(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); - const char *string = buft->iface.get_name(buft); + const char * string = buft->iface.get_name(buft); const size_t string_size = strlen(string) + 1; apir_encode_array_size(enc, string_size); @@ -21,8 +22,9 @@ backend_buffer_type_get_name(struct apir_encoder *enc, struct apir_decoder *dec, return 0; } -uint32_t -backend_buffer_type_get_alignment(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_buffer_type_get_alignment(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -33,8 +35,9 @@ backend_buffer_type_get_alignment(struct apir_encoder *enc, struct apir_decoder return 0; } -uint32_t -backend_buffer_type_get_max_size(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_buffer_type_get_max_size(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -45,8 +48,9 @@ backend_buffer_type_get_max_size(struct apir_encoder *enc, struct apir_decoder * return 0; } -uint32_t -backend_buffer_type_is_host(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_buffer_type_is_host(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -57,8 +61,9 @@ backend_buffer_type_is_host(struct apir_encoder *enc, struct apir_decoder *dec, return 0; } -uint32_t -backend_buffer_type_alloc_buffer(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_buffer_type_alloc_buffer(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; @@ -80,13 +85,14 @@ backend_buffer_type_alloc_buffer(struct apir_encoder *enc, struct apir_decoder * return 0; } -uint32_t -backend_buffer_type_get_alloc_size(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_buffer_type_get_alloc_size(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); - const ggml_tensor *op = apir_decode_ggml_tensor_inplace(dec); + const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec); size_t value = buft->iface.get_alloc_size(buft, op); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp index 0228241bf1a..a04ef0f1c55 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp @@ -1,55 +1,57 @@ -#include -#include "backend-internal.h" #include "backend-dispatched.h" - -#include "ggml-impl.h" +#include "backend-internal.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" +#include "ggml-impl.h" -struct timer_data get_tensor_timer = {0, 0, 0, "get_tensor"}; -struct timer_data set_tensor_timer = {0, 0, 0, "set_tensor"}; -struct timer_data cpy_tensor_timer = {0, 0, 0, "set_tensor"}; +#include -uint32_t -backend_buffer_get_base(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { - UNUSED(ctx); - ggml_backend_buffer_t buffer; - buffer = apir_decode_ggml_buffer(dec); +struct timer_data get_tensor_timer = { 0, 0, 0, "get_tensor" }; +struct timer_data set_tensor_timer = { 0, 0, 0, "set_tensor" }; +struct timer_data cpy_tensor_timer = { 0, 0, 0, "set_tensor" }; - uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer); - apir_encode_uintptr_t(enc, &base); +uint32_t backend_buffer_get_base(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { + UNUSED(ctx); + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); - return 0; + uintptr_t base = (uintptr_t) buffer->iface.get_base(buffer); + apir_encode_uintptr_t(enc, &base); + + return 0; } -uint32_t -backend_buffer_set_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { - UNUSED(ctx); - UNUSED(enc); +uint32_t backend_buffer_set_tensor(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { + UNUSED(ctx); + UNUSED(enc); - start_timer(&set_tensor_timer); + start_timer(&set_tensor_timer); - ggml_backend_buffer_t buffer; - buffer = apir_decode_ggml_buffer(dec); + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); - ggml_tensor *tensor; - // safe to remove the const qualifier here - tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec); + ggml_tensor * tensor; + // safe to remove the const qualifier here + tensor = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec); - uint32_t shmem_res_id; - apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); + uint32_t shmem_res_id; + apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); - size_t offset; - apir_decode_size_t(dec, &offset); + size_t offset; + apir_decode_size_t(dec, &offset); - size_t size; - apir_decode_size_t(dec, &size); + size_t size; + apir_decode_size_t(dec, &size); - void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); - if (!shmem_data) { - FATAL("Couldn't get the shmem addr from virgl :/"); - } + if (!shmem_data) { + FATAL("Couldn't get the shmem addr from virgl :/"); + } #if 0 INFO("Calling (%p)->set_tensor(tensor=%p, data=%p, offset=%lu, size=%lu", @@ -64,102 +66,103 @@ backend_buffer_set_tensor(struct apir_encoder *enc, struct apir_decoder *dec, st INFO("\n"); #endif - buffer->iface.set_tensor(buffer, tensor, shmem_data, offset, size); + buffer->iface.set_tensor(buffer, tensor, shmem_data, offset, size); - stop_timer(&set_tensor_timer); + stop_timer(&set_tensor_timer); - return 0; + return 0; } -uint32_t -backend_buffer_get_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { - UNUSED(ctx); - UNUSED(enc); +uint32_t backend_buffer_get_tensor(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { + UNUSED(ctx); + UNUSED(enc); - start_timer(&get_tensor_timer); + start_timer(&get_tensor_timer); - ggml_backend_buffer_t buffer; - buffer = apir_decode_ggml_buffer(dec); + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); + const ggml_tensor * tensor; + // safe to remove the const qualifier here + tensor = apir_decode_ggml_tensor(dec); - const ggml_tensor *tensor; - // safe to remove the const qualifier here - tensor = apir_decode_ggml_tensor(dec); + uint32_t shmem_res_id; + apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); - uint32_t shmem_res_id; - apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); + size_t offset; + apir_decode_size_t(dec, &offset); - size_t offset; - apir_decode_size_t(dec, &offset); + size_t size; + apir_decode_size_t(dec, &size); - size_t size; - apir_decode_size_t(dec, &size); - - void *shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - FATAL("Couldn't get the shmem addr from virgl :/"); - } + FATAL("Couldn't get the shmem addr from virgl :/"); + } - buffer->iface.get_tensor(buffer, tensor, shmem_data, offset, size); + buffer->iface.get_tensor(buffer, tensor, shmem_data, offset, size); - stop_timer(&get_tensor_timer); + stop_timer(&get_tensor_timer); - return 0; + return 0; } -uint32_t -backend_buffer_cpy_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { - UNUSED(ctx); +uint32_t backend_buffer_cpy_tensor(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { + UNUSED(ctx); - start_timer(&cpy_tensor_timer); + start_timer(&cpy_tensor_timer); - ggml_backend_buffer_t buffer; - buffer = apir_decode_ggml_buffer(dec); - INFO("%s <---->", __func__); - const ggml_tensor *src; - // safe to remove the const qualifier here - src = apir_decode_ggml_tensor(dec); - ggml_tensor* dst = (ggml_tensor*)(uintptr_t) apir_decode_ggml_tensor(dec); + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); + INFO("%s <---->", __func__); + const ggml_tensor * src; + // safe to remove the const qualifier here + src = apir_decode_ggml_tensor(dec); + ggml_tensor * dst = (ggml_tensor *) (uintptr_t) apir_decode_ggml_tensor(dec); - bool ret = buffer->iface.cpy_tensor(buffer, src, (ggml_tensor*)dst); + bool ret = buffer->iface.cpy_tensor(buffer, src, (ggml_tensor *) dst); - apir_encode_bool_t(enc, &ret); + apir_encode_bool_t(enc, &ret); - stop_timer(&cpy_tensor_timer); + stop_timer(&cpy_tensor_timer); - return 0; + return 0; } -uint32_t -backend_buffer_clear(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { - UNUSED(ctx); - UNUSED(enc); +uint32_t backend_buffer_clear(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx) { + UNUSED(ctx); + UNUSED(enc); - ggml_backend_buffer_t buffer; - buffer = apir_decode_ggml_buffer(dec); + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); - uint8_t value; - apir_decode_uint8_t(dec, &value); + uint8_t value; + apir_decode_uint8_t(dec, &value); - buffer->iface.clear(buffer, value); + buffer->iface.clear(buffer, value); - return 0; + return 0; } -uint32_t -backend_buffer_free_buffer(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { - UNUSED(ctx); - UNUSED(enc); +uint32_t backend_buffer_free_buffer(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { + UNUSED(ctx); + UNUSED(enc); - ggml_backend_buffer_t buffer; - buffer = apir_decode_ggml_buffer(dec); + ggml_backend_buffer_t buffer; + buffer = apir_decode_ggml_buffer(dec); - if (!apir_untrack_backend_buffer(buffer)) { - WARNING("%s: unknown buffer %p", (void *) buffer); - return 1; - } + if (!apir_untrack_backend_buffer(buffer)) { + WARNING("%s: unknown buffer %p", (void *) buffer); + return 1; + } - buffer->iface.free_buffer(buffer); + buffer->iface.free_buffer(buffer); - return 0; + return 0; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp index b507832ce16..03ff8ad0558 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp @@ -1,12 +1,14 @@ -#include -#include "backend-internal.h" #include "backend-dispatched.h" - -#include "ggml-impl.h" +#include "backend-internal.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" +#include "ggml-impl.h" + +#include -uint32_t backend_device_get_device_count(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_get_device_count(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(ctx); UNUSED(dec); @@ -17,7 +19,9 @@ uint32_t backend_device_get_device_count(struct apir_encoder *enc, struct apir_d return 0; } -uint32_t backend_device_get_count(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_get_count(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(ctx); UNUSED(dec); @@ -28,11 +32,13 @@ uint32_t backend_device_get_count(struct apir_encoder *enc, struct apir_decoder return 0; } -uint32_t backend_device_get_name(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_get_name(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); - const char *string = dev->iface.get_name(dev); + const char * string = dev->iface.get_name(dev); const size_t string_size = strlen(string) + 1; apir_encode_array_size(enc, string_size); @@ -41,12 +47,13 @@ uint32_t backend_device_get_name(struct apir_encoder *enc, struct apir_decoder * return 0; } -uint32_t -backend_device_get_description(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_get_description(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); - const char *string = dev->iface.get_description(dev); + const char * string = dev->iface.get_description(dev); const size_t string_size = strlen(string) + 1; apir_encode_array_size(enc, string_size); @@ -55,8 +62,9 @@ backend_device_get_description(struct apir_encoder *enc, struct apir_decoder *de return 0; } -uint32_t -backend_device_get_type(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_get_type(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -66,8 +74,9 @@ backend_device_get_type(struct apir_encoder *enc, struct apir_decoder *dec, stru return 0; } -uint32_t -backend_device_get_memory(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_get_memory(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -80,11 +89,12 @@ backend_device_get_memory(struct apir_encoder *enc, struct apir_decoder *dec, st return 0; } -uint32_t -backend_device_supports_op(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_supports_op(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); - const ggml_tensor *op = apir_decode_ggml_tensor_inplace(dec); + const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec); bool supports_op = dev->iface.supports_op(dev, op); @@ -93,8 +103,9 @@ backend_device_supports_op(struct apir_encoder *enc, struct apir_decoder *dec, s return 0; } -uint32_t -backend_device_get_buffer_type(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_get_buffer_type(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -105,8 +116,9 @@ backend_device_get_buffer_type(struct apir_encoder *enc, struct apir_decoder *de return 0; } -uint32_t -backend_device_get_props(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_get_props(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -121,15 +133,16 @@ backend_device_get_props(struct apir_encoder *enc, struct apir_decoder *dec, str return 0; } -uint32_t -backend_device_buffer_from_ptr(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx) { +uint32_t backend_device_buffer_from_ptr(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); uint32_t shmem_res_id; apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); - void *shmem_ptr = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + void * shmem_ptr = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_ptr) { FATAL("Couldn't get the shmem addr from virgl :/"); } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp index fca41e8a7d9..b75a5d2ad7b 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp @@ -1,45 +1,46 @@ -#include -#include "backend-internal.h" #include "backend-dispatched.h" -#include "ggml-impl.h" +#include "backend-internal.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" +#include "ggml-impl.h" + +#include ggml_backend_reg_t reg = NULL; ggml_backend_dev_t dev = NULL; -ggml_backend_t bck = NULL; +ggml_backend_t bck = NULL; long long timer_start = 0; long long timer_total = 0; long long timer_count = 0; -uint32_t backend_dispatch_initialize(void *ggml_backend_reg_fct_p, void *ggml_backend_init_fct_p) { - if (reg != NULL) { - FATAL("%s: already initialized :/", __func__); - } - ggml_backend_reg_t (* ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p; +uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_backend_init_fct_p) { + if (reg != NULL) { + FATAL("%s: already initialized :/", __func__); + } + ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p; - reg = ggml_backend_reg_fct(); - if (reg == NULL) { - FATAL("%s: backend registration failed :/", __func__); - } + reg = ggml_backend_reg_fct(); + if (reg == NULL) { + FATAL("%s: backend registration failed :/", __func__); + } - if (reg->iface.get_device_count(reg)) { - dev = reg->iface.get_device(reg, 0); - } + if (reg->iface.get_device_count(reg)) { + dev = reg->iface.get_device(reg, 0); + } - ggml_backend_t (* ggml_backend_fct)(int) = (ggml_backend_t (*)(int)) ggml_backend_init_fct_p; + ggml_backend_t (*ggml_backend_fct)(int) = (ggml_backend_t (*)(int)) ggml_backend_init_fct_p; - bck = ggml_backend_fct(0); - if (!bck) { - ERROR("%s: backend initialization failed :/", __func__); - return APIR_BACKEND_INITIALIZE_BACKEND_FAILED; - } + bck = ggml_backend_fct(0); + if (!bck) { + ERROR("%s: backend initialization failed :/", __func__); + return APIR_BACKEND_INITIALIZE_BACKEND_FAILED; + } - size_t free, total; - dev->iface.get_memory(dev, &free, &total); - INFO("%s: free memory: %ld MB", __func__, (size_t) free/1024/1024); + size_t free, total; + dev->iface.get_memory(dev, &free, &total); + INFO("%s: free memory: %ld MB", __func__, (size_t) free / 1024 / 1024); - return APIR_BACKEND_INITIALIZE_SUCCESS; + return APIR_BACKEND_INITIALIZE_SUCCESS; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h b/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h index 2160cca9f65..da82846db2a 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h @@ -1,108 +1,168 @@ #pragma once - /* device */ -uint32_t backend_device_get_device_count(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_get_count(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_get_name(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_get_description(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_get_type(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_get_memory(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_supports_op(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_get_buffer_type(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_get_props(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_device_buffer_from_ptr(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_device_get_device_count(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_device_get_count(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_device_get_name(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx); +uint32_t backend_device_get_description(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_device_get_type(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx); +uint32_t backend_device_get_memory(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_device_supports_op(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_device_get_buffer_type(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_device_get_props(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_device_buffer_from_ptr(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); /* buffer-type */ -uint32_t backend_buffer_type_get_name(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_type_get_alignment(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_type_get_max_size(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_type_is_host(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_type_alloc_buffer(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_type_get_alloc_size(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_type_get_name(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_alignment(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_max_size(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_buffer_type_is_host(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_buffer_type_alloc_buffer(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_alloc_size(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); /* buffer */ -uint32_t backend_buffer_get_base(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_set_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_get_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_cpy_tensor(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_clear(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); -uint32_t backend_buffer_free_buffer(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_buffer_get_base(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx); +uint32_t backend_buffer_set_tensor(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_buffer_get_tensor(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_buffer_cpy_tensor(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); +uint32_t backend_buffer_clear(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx); +uint32_t backend_buffer_free_buffer(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); /* backend */ -uint32_t backend_backend_graph_compute(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); +uint32_t backend_backend_graph_compute(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); -static inline const char *backend_dispatch_command_name(ApirBackendCommandType type) -{ - switch (type) { - /* device */ - case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT: return "backend_device_get_device_count"; - case APIR_COMMAND_TYPE_DEVICE_GET_COUNT: return "backend_device_get_count"; - case APIR_COMMAND_TYPE_DEVICE_GET_NAME: return "backend_device_get_name"; - case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION: return "backend_device_get_description"; - case APIR_COMMAND_TYPE_DEVICE_GET_TYPE: return "backend_device_get_type"; - case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY: return "backend_device_get_memory"; - case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP: return "backend_device_supports_op"; - case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE: return "backend_device_get_buffer_type"; - case APIR_COMMAND_TYPE_DEVICE_GET_PROPS: return "backend_device_get_props"; - case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR: return "backend_device_buffer_from_ptr"; - /* buffer-type */ - case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME: return "backend_buffer_type_get_name"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT: return "backend_buffer_type_get_alignment"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE: return "backend_buffer_type_get_max_size"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST: return "backend_buffer_type_is_host"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER: return "backend_buffer_type_alloc_buffer"; - case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE: return "backend_buffer_type_get_alloc_size"; - /* buffer */ - case APIR_COMMAND_TYPE_BUFFER_GET_BASE: return "backend_buffer_get_base"; - case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR: return "backend_buffer_set_tensor"; - case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR: return "backend_buffer_get_tensor"; - case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR: return "backend_buffer_cpy_tensor"; - case APIR_COMMAND_TYPE_BUFFER_CLEAR: return "backend_buffer_clear"; - case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER: return "backend_buffer_free_buffer"; - /* backend */ - case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE: return "backend_backend_graph_compute"; +static inline const char * backend_dispatch_command_name(ApirBackendCommandType type) { + switch (type) { + /* device */ + case APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT: + return "backend_device_get_device_count"; + case APIR_COMMAND_TYPE_DEVICE_GET_COUNT: + return "backend_device_get_count"; + case APIR_COMMAND_TYPE_DEVICE_GET_NAME: + return "backend_device_get_name"; + case APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION: + return "backend_device_get_description"; + case APIR_COMMAND_TYPE_DEVICE_GET_TYPE: + return "backend_device_get_type"; + case APIR_COMMAND_TYPE_DEVICE_GET_MEMORY: + return "backend_device_get_memory"; + case APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP: + return "backend_device_supports_op"; + case APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE: + return "backend_device_get_buffer_type"; + case APIR_COMMAND_TYPE_DEVICE_GET_PROPS: + return "backend_device_get_props"; + case APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR: + return "backend_device_buffer_from_ptr"; + /* buffer-type */ + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME: + return "backend_buffer_type_get_name"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT: + return "backend_buffer_type_get_alignment"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE: + return "backend_buffer_type_get_max_size"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST: + return "backend_buffer_type_is_host"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER: + return "backend_buffer_type_alloc_buffer"; + case APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE: + return "backend_buffer_type_get_alloc_size"; + /* buffer */ + case APIR_COMMAND_TYPE_BUFFER_GET_BASE: + return "backend_buffer_get_base"; + case APIR_COMMAND_TYPE_BUFFER_SET_TENSOR: + return "backend_buffer_set_tensor"; + case APIR_COMMAND_TYPE_BUFFER_GET_TENSOR: + return "backend_buffer_get_tensor"; + case APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR: + return "backend_buffer_cpy_tensor"; + case APIR_COMMAND_TYPE_BUFFER_CLEAR: + return "backend_buffer_clear"; + case APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER: + return "backend_buffer_free_buffer"; + /* backend */ + case APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE: + return "backend_backend_graph_compute"; - default: return "unknown"; - } + default: + return "unknown"; + } } extern "C" { static const backend_dispatch_t apir_backend_dispatch_table[APIR_BACKEND_DISPATCH_TABLE_COUNT] = { - - /* device */ - /* APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = */ backend_device_get_device_count, - /* APIR_COMMAND_TYPE_DEVICE_GET_COUNT = */ backend_device_get_count, - /* APIR_COMMAND_TYPE_DEVICE_GET_NAME = */ backend_device_get_name, - /* APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = */ backend_device_get_description, - /* APIR_COMMAND_TYPE_DEVICE_GET_TYPE = */ backend_device_get_type, - /* APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = */ backend_device_get_memory, - /* APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = */ backend_device_supports_op, - /* APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = */ backend_device_get_buffer_type, - /* APIR_COMMAND_TYPE_DEVICE_GET_PROPS = */ backend_device_get_props, - /* APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = */ backend_device_buffer_from_ptr, + /* device */ + + /* APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = */ backend_device_get_device_count, + /* APIR_COMMAND_TYPE_DEVICE_GET_COUNT = */ backend_device_get_count, + /* APIR_COMMAND_TYPE_DEVICE_GET_NAME = */ backend_device_get_name, + /* APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = */ backend_device_get_description, + /* APIR_COMMAND_TYPE_DEVICE_GET_TYPE = */ backend_device_get_type, + /* APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = */ backend_device_get_memory, + /* APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = */ backend_device_supports_op, + /* APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = */ backend_device_get_buffer_type, + /* APIR_COMMAND_TYPE_DEVICE_GET_PROPS = */ backend_device_get_props, + /* APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = */ backend_device_buffer_from_ptr, - /* buffer-type */ + /* buffer-type */ - /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name, - /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment, - /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size, - /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host, - /* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer, - /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = */ backend_buffer_type_get_name, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = */ backend_buffer_type_get_alignment, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = */ backend_buffer_type_get_max_size, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = */ backend_buffer_type_is_host, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = */ backend_buffer_type_alloc_buffer, + /* APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = */ backend_buffer_type_get_alloc_size, - /* buffer */ + /* buffer */ - /* APIR_COMMAND_TYPE_BUFFER_GET_BASE = */ backend_buffer_get_base, - /* APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = */ backend_buffer_set_tensor, - /* APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = */ backend_buffer_get_tensor, - /* APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = */ backend_buffer_cpy_tensor, - /* APIR_COMMAND_TYPE_BUFFER_CLEAR = */ backend_buffer_clear, - /* APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = */ backend_buffer_free_buffer, + /* APIR_COMMAND_TYPE_BUFFER_GET_BASE = */ backend_buffer_get_base, + /* APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = */ backend_buffer_set_tensor, + /* APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = */ backend_buffer_get_tensor, + /* APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = */ backend_buffer_cpy_tensor, + /* APIR_COMMAND_TYPE_BUFFER_CLEAR = */ backend_buffer_clear, + /* APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = */ backend_buffer_free_buffer, - /* backend */ + /* backend */ - /* APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = */ backend_backend_graph_compute, + /* APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = */ backend_backend_graph_compute, }; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.h b/ggml/src/ggml-remotingbackend/backend-dispatched.h index 2921c760967..e4d054dad15 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.h @@ -11,8 +11,11 @@ #include "shared/apir_cs.h" #include "shared/apir_cs_ggml.h" -typedef uint32_t (*backend_dispatch_t)(struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx); + +typedef uint32_t (*backend_dispatch_t)(struct apir_encoder * enc, + struct apir_decoder * dec, + struct virgl_apir_context * ctx); #include "backend-dispatched.gen.h" -uint32_t backend_dispatch_initialize(void *ggml_backend_reg_fct_p, void *ggml_backend_init_fct_p); +uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_backend_init_fct_p); diff --git a/ggml/src/ggml-remotingbackend/backend-internal.h b/ggml/src/ggml-remotingbackend/backend-internal.h index 0b9dcc72364..939d7d588e1 100644 --- a/ggml/src/ggml-remotingbackend/backend-internal.h +++ b/ggml/src/ggml-remotingbackend/backend-internal.h @@ -1,22 +1,24 @@ -#include -#include -#include - -#include "ggml-impl.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" +#include "ggml-impl.h" #include "shared/api_remoting.h" +#include +#include +#include + extern ggml_backend_reg_t reg; extern ggml_backend_dev_t dev; -extern ggml_backend_t bck; +extern ggml_backend_t bck; extern "C" { - ApirLoadLibraryReturnCode apir_backend_initialize(); - void apir_backend_deinit(void); - uint32_t apir_backend_dispatcher(uint32_t cmd_type, struct virgl_apir_context *ctx, - char *dec_cur, const char *dec_end, - char *enc_cur, const char *enc_end, - char **enc_cur_after); +ApirLoadLibraryReturnCode apir_backend_initialize(); +void apir_backend_deinit(void); +uint32_t apir_backend_dispatcher(uint32_t cmd_type, + struct virgl_apir_context * ctx, + char * dec_cur, + const char * dec_end, + char * enc_cur, + const char * enc_end, + char ** enc_cur_after); } - diff --git a/ggml/src/ggml-remotingbackend/backend-utils.h b/ggml/src/ggml-remotingbackend/backend-utils.h index d3abf3a2ace..95315e09937 100644 --- a/ggml/src/ggml-remotingbackend/backend-utils.h +++ b/ggml/src/ggml-remotingbackend/backend-utils.h @@ -1,61 +1,56 @@ #pragma once +#include + +#include #include #include -#include +#include -#include - -#define UNUSED GGML_UNUSED +#define UNUSED GGML_UNUSED #define APIR_LLAMA_CPP_LOG_TO_FILE_ENV "APIR_LLAMA_CPP_LOG_TO_FILE" -static FILE * -get_log_dest(void) -{ - static FILE *dest = NULL; - if (dest) { - return dest; - } - const char *apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV); - if (!apir_log_to_file) { - dest = stderr; - return dest; - } - - dest = fopen(apir_log_to_file, "w"); - - return dest; +static FILE * get_log_dest(void) { + static FILE * dest = NULL; + if (dest) { + return dest; + } + const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV); + if (!apir_log_to_file) { + dest = stderr; + return dest; + } + + dest = fopen(apir_log_to_file, "w"); + + return dest; } -#define APIR_VA_PRINT(prefix, format) \ - do { \ - FILE *dest = get_log_dest(); \ - fprintf(dest, prefix); \ - va_list argptr; \ - va_start(argptr, format); \ - vfprintf(dest, format, argptr); \ - fprintf(dest, "\n"); \ - va_end(argptr); \ - fflush(dest); \ - } while (0) - -inline void -INFO(const char *format, ...) { - APIR_VA_PRINT("INFO: ", format); +#define APIR_VA_PRINT(prefix, format) \ + do { \ + FILE * dest = get_log_dest(); \ + fprintf(dest, prefix); \ + va_list argptr; \ + va_start(argptr, format); \ + vfprintf(dest, format, argptr); \ + fprintf(dest, "\n"); \ + va_end(argptr); \ + fflush(dest); \ + } while (0) + +inline void INFO(const char * format, ...) { + APIR_VA_PRINT("INFO: ", format); } -inline void -WARNING(const char *format, ...) { - APIR_VA_PRINT("WARNING: ", format); +inline void WARNING(const char * format, ...) { + APIR_VA_PRINT("WARNING: ", format); } -inline void -ERROR(const char *format, ...) { - APIR_VA_PRINT("ERROR: ", format); +inline void ERROR(const char * format, ...) { + APIR_VA_PRINT("ERROR: ", format); } -[[noreturn]] inline void -FATAL(const char *format, ...) { - APIR_VA_PRINT("FATAL: ", format); - abort(); +[[noreturn]] inline void FATAL(const char * format, ...) { + APIR_VA_PRINT("FATAL: ", format); + abort(); } diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index 0b54b3e74b9..a9334d48073 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -1,34 +1,33 @@ -#include -#include - -#include - -#include "backend-utils.h" -#include "backend-internal.h" #include "backend-dispatched.h" - +#include "backend-internal.h" +#include "backend-utils.h" #include "shared/api_remoting.h" #include "shared/apir_backend.h" #include "shared/apir_cs.h" +#include +#include + +#include + #define GGML_BACKEND_LIBRARY_PATH_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_PATH" -#define GGML_BACKEND_LIBRARY_REG_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_REG" +#define GGML_BACKEND_LIBRARY_REG_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_REG" #define GGML_BACKEND_LIBRARY_INIT_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_INIT" -static void *backend_library_handle = NULL; +static void * backend_library_handle = NULL; extern "C" { - void apir_backend_deinit(void) { +void apir_backend_deinit(void) { auto buffers = apir_get_track_backend_buffers(); - for (const auto& buffer: buffers) { - apir_untrack_backend_buffer(buffer); - buffer->iface.free_buffer(buffer); + for (const auto & buffer : buffers) { + apir_untrack_backend_buffer(buffer); + buffer->iface.free_buffer(buffer); } if (dev) { - size_t free, total; - dev->iface.get_memory(dev, &free, &total); - INFO("%s: free memory: %ld MB", __func__, (size_t) free/1024/1024); + size_t free, total; + dev->iface.get_memory(dev, &free, &total); + INFO("%s: free memory: %ld MB", __func__, (size_t) free / 1024 / 1024); } show_timer(&graph_compute_timer); @@ -37,93 +36,94 @@ extern "C" { /* *** */ if (backend_library_handle) { - INFO("%s: The GGML backend library was loaded. Unloading it.", __func__); - dlclose(backend_library_handle); + INFO("%s: The GGML backend library was loaded. Unloading it.", __func__); + dlclose(backend_library_handle); } INFO("%s: bye-bye", __func__); - } +} - ApirLoadLibraryReturnCode apir_backend_initialize() { - const char* dlsym_error; +ApirLoadLibraryReturnCode apir_backend_initialize() { + const char * dlsym_error; - const char* library_name = getenv(GGML_BACKEND_LIBRARY_PATH_ENV); - const char* library_reg = getenv(GGML_BACKEND_LIBRARY_REG_ENV); - const char* library_init = getenv(GGML_BACKEND_LIBRARY_INIT_ENV); + const char * library_name = getenv(GGML_BACKEND_LIBRARY_PATH_ENV); + const char * library_reg = getenv(GGML_BACKEND_LIBRARY_REG_ENV); + const char * library_init = getenv(GGML_BACKEND_LIBRARY_INIT_ENV); INFO("%s: loading %s (%s|%s)", __func__, library_name, library_reg, library_init); if (!library_name) { - ERROR("cannot open the GGML library: env var '%s' not defined\n", GGML_BACKEND_LIBRARY_PATH_ENV); + ERROR("cannot open the GGML library: env var '%s' not defined\n", GGML_BACKEND_LIBRARY_PATH_ENV); - return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; + return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } backend_library_handle = dlopen(library_name, RTLD_LAZY); if (!backend_library_handle) { - ERROR("cannot open the GGML library: %s", dlerror()); + ERROR("cannot open the GGML library: %s", dlerror()); - return APIR_LOAD_LIBRARY_CANNOT_OPEN; + return APIR_LOAD_LIBRARY_CANNOT_OPEN; } if (!library_reg) { - ERROR("cannot register the GGML library: env var '%s' not defined", GGML_BACKEND_LIBRARY_REG_ENV); + ERROR("cannot register the GGML library: env var '%s' not defined", GGML_BACKEND_LIBRARY_REG_ENV); - return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; + return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } - void *ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg); - dlsym_error = dlerror(); + void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg); + dlsym_error = dlerror(); if (dlsym_error) { - ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s", - library_reg, GGML_BACKEND_LIBRARY_REG_ENV, dlsym_error); + ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s", library_reg, + GGML_BACKEND_LIBRARY_REG_ENV, dlsym_error); - return APIR_LOAD_LIBRARY_SYMBOL_MISSING; + return APIR_LOAD_LIBRARY_SYMBOL_MISSING; } if (!library_init) { - ERROR("cannot initialize the GGML library: env var '%s' not defined", library_init); + ERROR("cannot initialize the GGML library: env var '%s' not defined", library_init); - return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; + return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } - void *ggml_backend_init_fct = dlsym(backend_library_handle, library_init); - dlsym_error = dlerror(); + void * ggml_backend_init_fct = dlsym(backend_library_handle, library_init); + dlsym_error = dlerror(); if (dlsym_error) { - ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s", - library_init, GGML_BACKEND_LIBRARY_INIT_ENV, dlsym_error); + ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s", library_init, + GGML_BACKEND_LIBRARY_INIT_ENV, dlsym_error); - return APIR_LOAD_LIBRARY_SYMBOL_MISSING; + return APIR_LOAD_LIBRARY_SYMBOL_MISSING; } uint32_t ret = backend_dispatch_initialize(ggml_backend_reg_fct, ggml_backend_init_fct); return (ApirLoadLibraryReturnCode) (APIR_LOAD_LIBRARY_INIT_BASE_INDEX + ret); - } +} - uint32_t apir_backend_dispatcher(uint32_t cmd_type, struct virgl_apir_context *ctx, - char *dec_cur, const char *dec_end, - char *enc_cur, const char *enc_end, - char **enc_cur_after) { +uint32_t apir_backend_dispatcher(uint32_t cmd_type, + struct virgl_apir_context * ctx, + char * dec_cur, + const char * dec_end, + char * enc_cur, + const char * enc_end, + char ** enc_cur_after) { struct apir_encoder _enc = { - .cur = enc_cur, - .start = enc_cur, - .end = enc_end, + .cur = enc_cur, + .start = enc_cur, + .end = enc_end, }; - struct apir_encoder *enc = &_enc; + struct apir_encoder * enc = &_enc; struct apir_decoder _dec = { - .cur = dec_cur, - .end = dec_end, + .cur = dec_cur, + .end = dec_end, }; - struct apir_decoder *dec = &_dec; - + struct apir_decoder * dec = &_dec; if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { - ERROR("Received an invalid dispatch index (%d >= %d)\n", - cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); - return APIR_BACKEND_FORWARD_INDEX_INVALID; + ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); + return APIR_BACKEND_FORWARD_INDEX_INVALID; } #if 0 @@ -132,10 +132,10 @@ extern "C" { count += 1; #endif backend_dispatch_t forward_fct = apir_backend_dispatch_table[cmd_type]; - uint32_t ret = forward_fct(enc, dec, ctx); + uint32_t ret = forward_fct(enc, dec, ctx); *enc_cur_after = enc->cur; return ret; - } +} } diff --git a/ggml/src/ggml-remotingbackend/shared/api_remoting.h b/ggml/src/ggml-remotingbackend/shared/api_remoting.h index c7e4ed25145..b3b831ffd5f 100644 --- a/ggml/src/ggml-remotingbackend/shared/api_remoting.h +++ b/ggml/src/ggml-remotingbackend/shared/api_remoting.h @@ -4,15 +4,17 @@ #include +#include + #define APIR_PROTOCOL_MAJOR 0 #define APIR_PROTOCOL_MINOR 1 #define APIR_HANDSHAKE_MAGIC 0xab1e typedef enum { - APIR_COMMAND_TYPE_HandShake = 0, + APIR_COMMAND_TYPE_HandShake = 0, APIR_COMMAND_TYPE_LoadLibrary = 1, - APIR_COMMAND_TYPE_Forward = 2, + APIR_COMMAND_TYPE_Forward = 2, APIR_COMMAND_TYPE_LENGTH = 3, } ApirCommandType; @@ -20,67 +22,69 @@ typedef enum { typedef uint64_t ApirCommandFlags; typedef enum { - APIR_LOAD_LIBRARY_SUCCESS = 0, + APIR_LOAD_LIBRARY_SUCCESS = 0, APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1, - APIR_LOAD_LIBRARY_ALREADY_LOADED = 2, - APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3, - APIR_LOAD_LIBRARY_CANNOT_OPEN = 4, - APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5, - APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code + APIR_LOAD_LIBRARY_ALREADY_LOADED = 2, + APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3, + APIR_LOAD_LIBRARY_CANNOT_OPEN = 4, + APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5, + APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code } ApirLoadLibraryReturnCode; typedef enum { - APIR_FORWARD_SUCCESS = 0, + APIR_FORWARD_SUCCESS = 0, APIR_FORWARD_NO_DISPATCH_FCT = 1, - APIR_FORWARD_TIMEOUT = 2, + APIR_FORWARD_TIMEOUT = 2, - APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code + APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code } ApirForwardReturnCode; -__attribute__((unused)) -static inline const char *apir_command_name(ApirCommandType type) -{ - switch (type) { - case APIR_COMMAND_TYPE_HandShake: return "HandShake"; - case APIR_COMMAND_TYPE_LoadLibrary: return "LoadLibrary"; - case APIR_COMMAND_TYPE_Forward: return "Forward"; - default: return "unknown"; - } +__attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) { + switch (type) { + case APIR_COMMAND_TYPE_HandShake: + return "HandShake"; + case APIR_COMMAND_TYPE_LoadLibrary: + return "LoadLibrary"; + case APIR_COMMAND_TYPE_Forward: + return "Forward"; + default: + return "unknown"; + } } -__attribute__((unused)) -static const char *apir_load_library_error(ApirLoadLibraryReturnCode code) { +__attribute__((unused)) static const char * apir_load_library_error(ApirLoadLibraryReturnCode code) { #define APIR_LOAD_LIBRARY_ERROR(code_name) \ - do { \ - if (code == code_name) return #code_name; \ - } while (0) \ + do { \ + if (code == code_name) \ + return #code_name; \ + } while (0) - APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SUCCESS); - APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR); - APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ALREADY_LOADED); - APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ENV_VAR_MISSING); - APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_CANNOT_OPEN); - APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SYMBOL_MISSING); - APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_INIT_BASE_INDEX); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SUCCESS); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ALREADY_LOADED); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_ENV_VAR_MISSING); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_CANNOT_OPEN); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_SYMBOL_MISSING); + APIR_LOAD_LIBRARY_ERROR(APIR_LOAD_LIBRARY_INIT_BASE_INDEX); - return "Unknown APIR_COMMAND_TYPE_LoadLibrary error"; + return "Unknown APIR_COMMAND_TYPE_LoadLibrary error"; #undef APIR_LOAD_LIBRARY_ERROR } -__attribute__((unused)) -static const char *apir_forward_error(ApirForwardReturnCode code) { +__attribute__((unused)) static const char * apir_forward_error(ApirForwardReturnCode code) { #define APIR_FORWARD_ERROR(code_name) \ - do { \ - if (code == code_name) return #code_name; \ - } while (0) \ + do { \ + if (code == code_name) \ + return #code_name; \ + } while (0) - APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS); - APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT); - APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT); - APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX); + APIR_FORWARD_ERROR(APIR_FORWARD_SUCCESS); + APIR_FORWARD_ERROR(APIR_FORWARD_NO_DISPATCH_FCT); + APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT); + APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX); - return "Unknown APIR_COMMAND_TYPE_Forward error"; + return "Unknown APIR_COMMAND_TYPE_Forward error"; #undef APIR_FORWARD_ERROR } diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h b/ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h index 572836db487..d214b6f2a90 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h @@ -1,36 +1,36 @@ typedef enum ApirBackendCommandType { - /* device */ - APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = 0, - APIR_COMMAND_TYPE_DEVICE_GET_COUNT = 1, - APIR_COMMAND_TYPE_DEVICE_GET_NAME = 2, - APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = 3, - APIR_COMMAND_TYPE_DEVICE_GET_TYPE = 4, - APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = 5, - APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = 6, - APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = 7, - APIR_COMMAND_TYPE_DEVICE_GET_PROPS = 8, - APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = 9, + /* device */ + APIR_COMMAND_TYPE_DEVICE_GET_DEVICE_COUNT = 0, + APIR_COMMAND_TYPE_DEVICE_GET_COUNT = 1, + APIR_COMMAND_TYPE_DEVICE_GET_NAME = 2, + APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION = 3, + APIR_COMMAND_TYPE_DEVICE_GET_TYPE = 4, + APIR_COMMAND_TYPE_DEVICE_GET_MEMORY = 5, + APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP = 6, + APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE = 7, + APIR_COMMAND_TYPE_DEVICE_GET_PROPS = 8, + APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR = 9, - /* buffer-type */ - APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = 10, - APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = 11, - APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = 12, - APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = 13, - APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = 14, - APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = 15, + /* buffer-type */ + APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME = 10, + APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT = 11, + APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE = 12, + APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST = 13, + APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER = 14, + APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE = 15, - /* buffer */ - APIR_COMMAND_TYPE_BUFFER_GET_BASE = 16, - APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = 17, - APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = 18, - APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = 19, - APIR_COMMAND_TYPE_BUFFER_CLEAR = 20, - APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = 21, + /* buffer */ + APIR_COMMAND_TYPE_BUFFER_GET_BASE = 16, + APIR_COMMAND_TYPE_BUFFER_SET_TENSOR = 17, + APIR_COMMAND_TYPE_BUFFER_GET_TENSOR = 18, + APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR = 19, + APIR_COMMAND_TYPE_BUFFER_CLEAR = 20, + APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER = 21, - /* backend */ - APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = 22, + /* backend */ + APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE = 22, - // last command_type index + 1 - APIR_BACKEND_DISPATCH_TABLE_COUNT = 23, + // last command_type index + 1 + APIR_BACKEND_DISPATCH_TABLE_COUNT = 23, } ApirBackendCommandType; diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.h b/ggml/src/ggml-remotingbackend/shared/apir_backend.h index 5ba183439d8..4f690f6b6ae 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_backend.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_backend.h @@ -1,15 +1,15 @@ #pragma once -#include // for struct timespec, clock_gettime -#include // for uintptr_t - #include "apir_backend.gen.h" -#define APIR_BACKEND_INITIALIZE_SUCCESS 0 +#include // for uintptr_t +#include // for struct timespec, clock_gettime + +#define APIR_BACKEND_INITIALIZE_SUCCESS 0 #define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY 1 -#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY 2 -#define APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS 3 -#define APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS 4 +#define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY 2 +#define APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS 3 +#define APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS 4 #define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5 // new entries here need to be added to the apir_backend_initialize_error function below @@ -25,20 +25,20 @@ typedef uintptr_t apir_buffer_host_handle_t; struct virgl_opaque_context; struct virgl_apir_callbacks { - void *(*get_shmem_ptr)(struct virgl_opaque_context *ctx, uint32_t res_id); + void * (*get_shmem_ptr)(struct virgl_opaque_context * ctx, uint32_t res_id); }; struct virgl_apir_context { - struct virgl_opaque_context *virgl_ctx; + struct virgl_opaque_context * virgl_ctx; - struct virgl_apir_callbacks iface; + struct virgl_apir_callbacks iface; }; struct timer_data { - long long start; - long long total; - long long count; - const char *name; + long long start; + long long total; + long long count; + const char * name; }; extern struct timer_data graph_compute_timer; @@ -49,52 +49,53 @@ extern struct timer_data wait_host_reply_timer; extern struct timer_data get_tensor_from_ptr_timer; extern struct timer_data set_tensor_from_ptr_timer; -static inline void start_timer(struct timer_data *timer) { - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - timer->start = (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec; +static inline void start_timer(struct timer_data * timer) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + timer->start = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; } // returns the duration in ns -static inline long long stop_timer(struct timer_data *timer) { - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - long long timer_end = (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec; +static inline long long stop_timer(struct timer_data * timer) { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + long long timer_end = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; - long long duration = (timer_end - timer->start); - timer->total += duration; - timer->count += 1; + long long duration = (timer_end - timer->start); + timer->total += duration; + timer->count += 1; - return duration; + return duration; } -static inline void show_timer(struct timer_data *timer) { - double ms = timer->total/1000000; - double itl = ms/timer->count; - double speed = 1/itl * 1000; +static inline void show_timer(struct timer_data * timer) { + double ms = timer->total / 1000000; + double itl = ms / timer->count; + double speed = 1 / itl * 1000; - if (!timer->total) { - return; - } + if (!timer->total) { + return; + } - INFO("%15s [%9.0f] ms for %4ld invocations | ITL %2.2f ms | throughput = %4.2f t/s (%4.2f ms/call)", - timer->name, ms, timer->count, itl, speed, ms/timer->count); + INFO("%15s [%9.0f] ms for %4ld invocations | ITL %2.2f ms | throughput = %4.2f t/s (%4.2f ms/call)", timer->name, + ms, timer->count, itl, speed, ms / timer->count); } -static const char *apir_backend_initialize_error(int code) { +static const char * apir_backend_initialize_error(int code) { #define APIR_BACKEND_INITIALIZE_ERROR(code_name) \ - do { \ - if (code == code_name) return #code_name; \ - } while (0) \ - - APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_SUCCESS); - APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY); - APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY); - APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS); - APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS); - APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED); - - return "Unknown APIR_BACKEND_INITIALIZE error:/"; + do { \ + if (code == code_name) \ + return #code_name; \ + } while (0) + + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_SUCCESS); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS); + APIR_BACKEND_INITIALIZE_ERROR(APIR_BACKEND_INITIALIZE_BACKEND_FAILED); + + return "Unknown APIR_BACKEND_INITIALIZE error:/"; #undef APIR_BACKEND_INITIALIZE_ERROR } diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs.h b/ggml/src/ggml-remotingbackend/shared/apir_cs.h index 22e954305ce..edb96d22777 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs.h @@ -10,134 +10,110 @@ #define unlikely(x) __builtin_expect(!!(x), 0) struct apir_encoder { - char* cur; - const char *start; - const char* end; + char * cur; + const char * start; + const char * end; }; struct apir_decoder { - const char* cur; - const char* end; + const char * cur; + const char * end; }; /* * new encoder and decoder */ -static struct apir_decoder -apir_new_decoder(const char *ptr, size_t size) { - struct apir_decoder dec = { - .cur = ptr, - .end = ptr + size, - }; +static struct apir_decoder apir_new_decoder(const char * ptr, size_t size) { + struct apir_decoder dec = { + .cur = ptr, + .end = ptr + size, + }; - return dec; + return dec; } -static struct apir_encoder -apir_new_encoder(char *ptr, size_t size) { - struct apir_encoder enc = { - .cur = ptr, - .start = ptr, - .end = ptr + size, - }; +static struct apir_encoder apir_new_encoder(char * ptr, size_t size) { + struct apir_encoder enc = { + .cur = ptr, + .start = ptr, + .end = ptr + size, + }; - return enc; + return enc; } /* * encode peek */ -static inline bool -apir_decoder_peek_internal(const struct apir_decoder *dec, - size_t size, - void *val, - size_t val_size) -{ - assert(val_size <= size); - - if (unlikely(size > (size_t) (dec->end - dec->cur))) { - FATAL("READING TOO MUCH FROM THE DECODER :/"); - //apir_decoder_set_fatal(dec); - memset(val, 0, val_size); - return false; - } +static inline bool apir_decoder_peek_internal(const struct apir_decoder * dec, + size_t size, + void * val, + size_t val_size) { + assert(val_size <= size); + + if (unlikely(size > (size_t) (dec->end - dec->cur))) { + FATAL("READING TOO MUCH FROM THE DECODER :/"); + //apir_decoder_set_fatal(dec); + memset(val, 0, val_size); + return false; + } - /* we should not rely on the compiler to optimize away memcpy... */ - memcpy(val, dec->cur, val_size); - return true; + /* we should not rely on the compiler to optimize away memcpy... */ + memcpy(val, dec->cur, val_size); + return true; } -static inline void -apir_decoder_peek(const struct apir_decoder *dec, - size_t size, - void *val, - size_t val_size) -{ - apir_decoder_peek_internal(dec, size, val, val_size); +static inline void apir_decoder_peek(const struct apir_decoder * dec, size_t size, void * val, size_t val_size) { + apir_decoder_peek_internal(dec, size, val, val_size); } -static inline const void * -apir_decoder_use_inplace(struct apir_decoder *dec, - size_t size) -{ - if (unlikely(size > (size_t) (dec->end - dec->cur))) { - FATAL("READING TOO MUCH FROM THE DECODER :/"); - } - const void *addr = dec->cur; - dec->cur += size; +static inline const void * apir_decoder_use_inplace(struct apir_decoder * dec, size_t size) { + if (unlikely(size > (size_t) (dec->end - dec->cur))) { + FATAL("READING TOO MUCH FROM THE DECODER :/"); + return NULL; + } + const void * addr = dec->cur; + dec->cur += size; - return addr; + return addr; } /* * read/write */ -static inline void -apir_decoder_read(struct apir_decoder *dec, - size_t size, - void *val, - size_t val_size) -{ - if (apir_decoder_peek_internal(dec, size, val, val_size)) - dec->cur += size; +static inline void apir_decoder_read(struct apir_decoder * dec, size_t size, void * val, size_t val_size) { + if (apir_decoder_peek_internal(dec, size, val, val_size)) { + dec->cur += size; + } } -static inline char * -apir_encoder_write(struct apir_encoder *enc, - size_t size, - const void *val, - size_t val_size) -{ - assert(val_size <= size); - assert(size <= ((size_t) (enc->end - enc->cur))); +static inline char * apir_encoder_write(struct apir_encoder * enc, size_t size, const void * val, size_t val_size) { + assert(val_size <= size); + assert(size <= ((size_t) (enc->end - enc->cur))); - char *write_addr = enc->cur; - /* we should not rely on the compiler to optimize away memcpy... */ - memcpy(write_addr, val, val_size); - enc->cur += size; + char * write_addr = enc->cur; + /* we should not rely on the compiler to optimize away memcpy... */ + memcpy(write_addr, val, val_size); + enc->cur += size; - return write_addr; + return write_addr; } /* * encode/decode */ -static inline void -apir_decode(struct apir_decoder *dec, size_t size, void *data, size_t data_size) -{ - assert(size % 4 == 0); - apir_decoder_read(dec, size, data, data_size); +static inline void apir_decode(struct apir_decoder * dec, size_t size, void * data, size_t data_size) { + assert(size % 4 == 0); + apir_decoder_read(dec, size, data, data_size); } -static inline void -apir_encode(struct apir_encoder *enc, size_t size, const void *data, size_t data_size) -{ - assert(size % 4 == 0); - apir_encoder_write(enc, size, data, data_size); +static inline void apir_encode(struct apir_encoder * enc, size_t size, const void * data, size_t data_size) { + assert(size % 4 == 0); + apir_encoder_write(enc, size, data, data_size); } /* @@ -146,305 +122,230 @@ apir_encode(struct apir_encoder *enc, size_t size, const void *data, size_t data /* uint8_t */ -static inline void -apir_encode_uint8_t(struct apir_encoder *enc, const uint8_t *val) -{ - apir_encode(enc, sizeof(int), val, sizeof(*val)); +static inline void apir_encode_uint8_t(struct apir_encoder * enc, const uint8_t * val) { + apir_encode(enc, sizeof(int), val, sizeof(*val)); } -static inline void -apir_decode_uint8_t(struct apir_decoder *dec, uint8_t *val) -{ - apir_decode(dec, sizeof(int), val, sizeof(*val)); +static inline void apir_decode_uint8_t(struct apir_decoder * dec, uint8_t * val) { + apir_decode(dec, sizeof(int), val, sizeof(*val)); } /* uint64_t */ -static inline void -apir_encode_uint64_t(struct apir_encoder *enc, const uint64_t *val) -{ - apir_encode(enc, 8, val, sizeof(*val)); +static inline void apir_encode_uint64_t(struct apir_encoder * enc, const uint64_t * val) { + apir_encode(enc, 8, val, sizeof(*val)); } -static inline void -apir_decode_uint64_t(struct apir_decoder *dec, uint64_t *val) -{ - apir_decode(dec, 8, val, sizeof(*val)); +static inline void apir_decode_uint64_t(struct apir_decoder * dec, uint64_t * val) { + apir_decode(dec, 8, val, sizeof(*val)); } -static inline void -apir_encode_uint64_t_array(struct apir_encoder *enc, const uint64_t *val, uint32_t count) -{ - const size_t size = sizeof(*val) * count; - assert(size >= count); - apir_encode(enc, size, val, size); +static inline void apir_encode_uint64_t_array(struct apir_encoder * enc, const uint64_t * val, uint32_t count) { + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_encode(enc, size, val, size); } -static inline void -apir_decode_uint64_t_array(struct apir_decoder *dec, uint64_t *val, uint32_t count) -{ - const size_t size = sizeof(*val) * count; - assert(size >= count); - apir_decode(dec, size, val, size); +static inline void apir_decode_uint64_t_array(struct apir_decoder * dec, uint64_t * val, uint32_t count) { + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_decode(dec, size, val, size); } -static inline const uint64_t * -apir_decode_uint64_t_array_inplace(struct apir_decoder *dec, uint32_t count) -{ - return (uint64_t *)(uintptr_t) apir_decoder_use_inplace(dec, count * sizeof(uint64_t)); +static inline const uint64_t * apir_decode_uint64_t_array_inplace(struct apir_decoder * dec, uint32_t count) { + return (uint64_t *) (uintptr_t) apir_decoder_use_inplace(dec, count * sizeof(uint64_t)); } /* int32_t */ -static inline void -apir_encode_int32_t(struct apir_encoder *enc, const int32_t *val) -{ - apir_encode(enc, 4, val, sizeof(*val)); +static inline void apir_encode_int32_t(struct apir_encoder * enc, const int32_t * val) { + apir_encode(enc, 4, val, sizeof(*val)); } -static inline void -apir_decode_int32_t(struct apir_decoder *dec, int32_t *val) -{ - apir_decode(dec, 4, val, sizeof(*val)); +static inline void apir_decode_int32_t(struct apir_decoder * dec, int32_t * val) { + apir_decode(dec, 4, val, sizeof(*val)); } -static inline void -apir_encode_int32_t_array(struct apir_encoder *enc, const int32_t *val, uint32_t count) -{ - const size_t size = sizeof(*val) * count; - assert(size >= count); - apir_encode(enc, size, val, size); +static inline void apir_encode_int32_t_array(struct apir_encoder * enc, const int32_t * val, uint32_t count) { + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_encode(enc, size, val, size); } -static inline void -apir_decode_int32_t_array(struct apir_decoder *dec, int32_t *val, uint32_t count) -{ - const size_t size = sizeof(*val) * count; - assert(size >= count); - apir_decode(dec, size, val, size); +static inline void apir_decode_int32_t_array(struct apir_decoder * dec, int32_t * val, uint32_t count) { + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_decode(dec, size, val, size); } /* array size (uint64_t) */ -static inline void -apir_encode_array_size(struct apir_encoder *enc, uint64_t size) -{ - apir_encode_uint64_t(enc, &size); +static inline void apir_encode_array_size(struct apir_encoder * enc, uint64_t size) { + apir_encode_uint64_t(enc, &size); } -static inline uint64_t -apir_decode_array_size(struct apir_decoder *dec, uint64_t expected_size) -{ - uint64_t size; - apir_decode_uint64_t(dec, &size); - if (size != expected_size) { - FATAL("ENCODER IS FULL :/"); - size = 0; - } - return size; +static inline uint64_t apir_decode_array_size(struct apir_decoder * dec, uint64_t expected_size) { + uint64_t size; + apir_decode_uint64_t(dec, &size); + if (size != expected_size) { + FATAL("ENCODER IS FULL :/"); + size = 0; + } + return size; } -static inline uint64_t -apir_decode_array_size_unchecked(struct apir_decoder *dec) -{ - uint64_t size; - apir_decode_uint64_t(dec, &size); - return size; +static inline uint64_t apir_decode_array_size_unchecked(struct apir_decoder * dec) { + uint64_t size; + apir_decode_uint64_t(dec, &size); + return size; } /* non-array pointer */ -static inline bool -apir_encode_simple_pointer(struct apir_encoder *enc, const void *val) -{ - apir_encode_array_size(enc, val ? 1 : 0); - return val; +static inline bool apir_encode_simple_pointer(struct apir_encoder * enc, const void * val) { + apir_encode_array_size(enc, val ? 1 : 0); + return val; } -static inline bool -apir_decode_simple_pointer(struct apir_decoder *dec) -{ - return apir_decode_array_size_unchecked(dec); +static inline bool apir_decode_simple_pointer(struct apir_decoder * dec) { + return apir_decode_array_size_unchecked(dec); } /* uint32_t */ -static inline void -apir_encode_uint32_t(struct apir_encoder *enc, const uint32_t *val) -{ - apir_encode(enc, 4, val, sizeof(*val)); +static inline void apir_encode_uint32_t(struct apir_encoder * enc, const uint32_t * val) { + apir_encode(enc, 4, val, sizeof(*val)); } -static inline void -apir_decode_uint32_t(struct apir_decoder *dec, uint32_t *val) -{ - apir_decode(dec, 4, val, sizeof(*val)); +static inline void apir_decode_uint32_t(struct apir_decoder * dec, uint32_t * val) { + apir_decode(dec, 4, val, sizeof(*val)); } -static inline void -apir_encode_uint32_t_array(struct apir_encoder *enc, const uint32_t *val, uint32_t count) -{ - const size_t size = sizeof(*val) * count; - assert(size >= count); - apir_encode(enc, size, val, size); +static inline void apir_encode_uint32_t_array(struct apir_encoder * enc, const uint32_t * val, uint32_t count) { + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_encode(enc, size, val, size); } -static inline void -apir_decode_uint32_t_array(struct apir_decoder *dec, uint32_t *val, uint32_t count) -{ - const size_t size = sizeof(*val) * count; - assert(size >= count); - apir_decode(dec, size, val, size); +static inline void apir_decode_uint32_t_array(struct apir_decoder * dec, uint32_t * val, uint32_t count) { + const size_t size = sizeof(*val) * count; + assert(size >= count); + apir_decode(dec, size, val, size); } /* size_t */ -static inline void -apir_encode_size_t(struct apir_encoder *enc, const size_t *val) -{ +static inline void apir_encode_size_t(struct apir_encoder * enc, const size_t * val) { const uint64_t tmp = *val; apir_encode_uint64_t(enc, &tmp); } -static inline void -apir_decode_size_t(struct apir_decoder *dec, size_t *val) -{ +static inline void apir_decode_size_t(struct apir_decoder * dec, size_t * val) { uint64_t tmp; apir_decode_uint64_t(dec, &tmp); *val = tmp; } -static inline void -apir_encode_size_t_array(struct apir_encoder *enc, const size_t *val, uint32_t count) -{ +static inline void apir_encode_size_t_array(struct apir_encoder * enc, const size_t * val, uint32_t count) { if (sizeof(size_t) == sizeof(uint64_t)) { - apir_encode_uint64_t_array(enc, (const uint64_t *)val, count); + apir_encode_uint64_t_array(enc, (const uint64_t *) val, count); } else { - for (uint32_t i = 0; i < count; i++) + for (uint32_t i = 0; i < count; i++) { apir_encode_size_t(enc, &val[i]); + } } } -static inline void -apir_decode_size_t_array(struct apir_decoder *dec, size_t *val, uint32_t count) -{ +static inline void apir_decode_size_t_array(struct apir_decoder * dec, size_t * val, uint32_t count) { if (sizeof(size_t) == sizeof(uint64_t)) { - apir_decode_uint64_t_array(dec, (uint64_t *)val, count); + apir_decode_uint64_t_array(dec, (uint64_t *) val, count); } else { - for (uint32_t i = 0; i < count; i++) + for (uint32_t i = 0; i < count; i++) { apir_decode_size_t(dec, &val[i]); + } } } /* opaque blob */ -static inline void -apir_encode_blob_array(struct apir_encoder *enc, const void *val, size_t size) -{ - apir_encode(enc, (size + 3) & ~3, val, size); +static inline void apir_encode_blob_array(struct apir_encoder * enc, const void * val, size_t size) { + apir_encode(enc, (size + 3) & ~3, val, size); } -static inline void -apir_decode_blob_array(struct apir_decoder *dec, void *val, size_t size) -{ - apir_decode(dec, (size + 3) & ~3, val, size); +static inline void apir_decode_blob_array(struct apir_decoder * dec, void * val, size_t size) { + apir_decode(dec, (size + 3) & ~3, val, size); } /* string */ -static inline void -apir_encode_char_array(struct apir_encoder *enc, const char *val, size_t size) -{ - assert(size && strlen(val) < size); - apir_encode_blob_array(enc, val, size); +static inline void apir_encode_char_array(struct apir_encoder * enc, const char * val, size_t size) { + assert(size && strlen(val) < size); + apir_encode_blob_array(enc, val, size); } -static inline void -apir_decode_char_array(struct apir_decoder *dec, char *val, size_t size) -{ - apir_decode_blob_array(dec, val, size); - if (size) - val[size - 1] = '\0'; - else { - //apir_decoder_set_fatal(dec); - FATAL("Couldn't decode the blog array"); - } +static inline void apir_decode_char_array(struct apir_decoder * dec, char * val, size_t size) { + apir_decode_blob_array(dec, val, size); + if (size) { + val[size - 1] = '\0'; + } else { + //apir_decoder_set_fatal(dec); + FATAL("Couldn't decode the blog array"); + } } /* (temp) buffer allocation */ -static inline void * -_apir_decoder_alloc_array(struct apir_decoder *dec, size_t size, size_t count) -{ - UNUSED(dec); - size_t alloc_size; - if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { - FATAL("overflow in array allocation of %zu * %zu bytes", size, count); - return NULL; - } +static inline void * apir_decoder_alloc_array(struct apir_decoder * dec, size_t size, size_t count) { + UNUSED(dec); - return malloc(alloc_size); -} + size_t alloc_size; + if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { + FATAL("overflow in array allocation of %zu * %zu bytes", size, count); + return NULL; + } -static inline void * -apir_decoder_alloc_array(struct apir_decoder *dec, size_t size, size_t count) -{ - struct apir_decoder *d = (struct apir_decoder *)dec; - return _apir_decoder_alloc_array(d, size, count); + return malloc(alloc_size); } /* bool */ -static inline void -apir_encode_bool_t(struct apir_encoder *enc, const bool *val) -{ - apir_encode(enc, sizeof(int), val, sizeof(bool)); +static inline void apir_encode_bool_t(struct apir_encoder * enc, const bool * val) { + apir_encode(enc, sizeof(int), val, sizeof(bool)); } -static inline void -apir_decode_bool_t(struct apir_decoder *dec, bool *val) -{ - apir_decode(dec, sizeof(int), val, sizeof(bool)); +static inline void apir_decode_bool_t(struct apir_decoder * dec, bool * val) { + apir_decode(dec, sizeof(int), val, sizeof(bool)); } /* apir_buffer_type_host_handle_t */ -static inline void -apir_encode_apir_buffer_type_host_handle_t(struct apir_encoder *enc, const apir_buffer_type_host_handle_t *val) -{ - apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); +static inline void apir_encode_apir_buffer_type_host_handle_t(struct apir_encoder * enc, + const apir_buffer_type_host_handle_t * val) { + apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); } -static inline void -apir_decode_apir_buffer_type_host_handle_t(struct apir_decoder *dec, apir_buffer_type_host_handle_t *val) -{ - apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); +static inline void apir_decode_apir_buffer_type_host_handle_t(struct apir_decoder * dec, + apir_buffer_type_host_handle_t * val) { + apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); } /* apir_buffer_host_handle_t */ -static inline void -apir_encode_apir_buffer_host_handle_t(struct apir_encoder *enc, const apir_buffer_host_handle_t *val) -{ - apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); +static inline void apir_encode_apir_buffer_host_handle_t(struct apir_encoder * enc, + const apir_buffer_host_handle_t * val) { + apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); } -static inline void -apir_decode_apir_buffer_host_handle_t(struct apir_decoder *dec, apir_buffer_host_handle_t *val) -{ - apir_decode(dec, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); +static inline void apir_decode_apir_buffer_host_handle_t(struct apir_decoder * dec, apir_buffer_host_handle_t * val) { + apir_decode(dec, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); } /* uintptr_t */ -static inline void -apir_encode_uintptr_t(struct apir_encoder *enc, const uintptr_t *val) -{ - apir_encode(enc, sizeof(*val), val, sizeof(*val)); +static inline void apir_encode_uintptr_t(struct apir_encoder * enc, const uintptr_t * val) { + apir_encode(enc, sizeof(*val), val, sizeof(*val)); } -static inline void -apir_decode_uintptr_t(struct apir_decoder *dec, uintptr_t *val) -{ - apir_decode(dec, sizeof(*val), val, sizeof(*val)); +static inline void apir_decode_uintptr_t(struct apir_decoder * dec, uintptr_t * val) { + apir_decode(dec, sizeof(*val), val, sizeof(*val)); } diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h index afc551d76d3..497b91bb516 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h @@ -5,54 +5,49 @@ // ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer); -static inline void -apir_encode_ggml_buffer_host_handle(struct apir_encoder *enc, const apir_buffer_host_handle_t *handle); +static inline void apir_encode_ggml_buffer_host_handle(struct apir_encoder * enc, + const apir_buffer_host_handle_t * handle); -static inline ggml_backend_buffer_t -apir_decode_ggml_buffer(struct apir_decoder *dec); +static inline ggml_backend_buffer_t apir_decode_ggml_buffer(struct apir_decoder * dec); /* apir_rpc_tensor */ -static inline void -apir_encode_rcp_tensor(struct apir_encoder *enc, const apir_rpc_tensor *apir_rpc_tensor) { +static inline void apir_encode_rcp_tensor(struct apir_encoder * enc, const apir_rpc_tensor * apir_rpc_tensor) { size_t apir_rpc_tensor_size = sizeof(*apir_rpc_tensor); apir_encode(enc, apir_rpc_tensor_size, apir_rpc_tensor, apir_rpc_tensor_size); } -static inline apir_rpc_tensor * -apir_decode_apir_rpc_tensor_inplace(struct apir_decoder *dec) { +static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(struct apir_decoder * dec) { size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor); - return (apir_rpc_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); + return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); } -static inline apir_rpc_tensor * -apir_decode_apir_rpc_tensor_array_inplace(struct apir_decoder *dec, uint32_t n_tensors) { +static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(struct apir_decoder * dec, + uint32_t n_tensors) { size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors; - return (apir_rpc_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); + return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); } /* ggml_tensor */ -static inline void -apir_encode_ggml_tensor(struct apir_encoder *enc, const ggml_tensor *tensor) { +static inline void apir_encode_ggml_tensor(struct apir_encoder * enc, const ggml_tensor * tensor) { apir_rpc_tensor serialized = apir_serialize_tensor(tensor); apir_encode_rcp_tensor(enc, &serialized); } -static inline const ggml_tensor * -apir_decode_ggml_tensor(struct apir_decoder *dec) { - const apir_rpc_tensor *apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec); - struct ggml_init_params params { - /*.mem_size =*/ ggml_tensor_overhead(), - /*.mem_buffer =*/ NULL, - /*.no_alloc =*/ true, - }; +static inline const ggml_tensor * apir_decode_ggml_tensor(struct apir_decoder * dec) { + const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec); + struct ggml_init_params params{ + /*.mem_size =*/ggml_tensor_overhead(), + /*.mem_buffer =*/NULL, + /*.no_alloc =*/true, + }; struct ggml_context * ctx = ggml_init(params); - const ggml_tensor *tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor); + const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor); return tensor; } @@ -64,15 +59,12 @@ apir_decode_ggml_tensor(struct apir_decoder *dec) { // The guest stores it in `buft->context`. // The host simply writes the pointer address in the buffer variable. - -static inline void -apir_encode_ggml_buffer_type(struct apir_encoder *enc, ggml_backend_buffer_type_t buft) { +static inline void apir_encode_ggml_buffer_type(struct apir_encoder * enc, ggml_backend_buffer_type_t buft) { apir_buffer_type_host_handle_t handle = ggml_buffer_type_to_apir_handle(buft); apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); } -static inline ggml_backend_buffer_type_t -apir_decode_ggml_buffer_type(struct apir_decoder *dec) { +static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(struct apir_decoder * dec) { apir_buffer_type_host_handle_t handle; apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle)); @@ -80,8 +72,7 @@ apir_decode_ggml_buffer_type(struct apir_decoder *dec) { return (ggml_backend_buffer_type_t) handle; } -static inline apir_buffer_type_host_handle_t -apir_decode_apir_buffer_type_host_handle(struct apir_decoder *dec) { +static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(struct apir_decoder * dec) { apir_buffer_type_host_handle_t handle; apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle)); @@ -94,16 +85,14 @@ apir_decode_apir_buffer_type_host_handle(struct apir_decoder *dec) { // ggml_backend_buffer_t is a POINTER. // same logic as for ggml_backend_buffer_type_t -static inline void -apir_encode_ggml_buffer(struct apir_encoder *enc, const ggml_backend_buffer_t buffer) { +static inline void apir_encode_ggml_buffer(struct apir_encoder * enc, const ggml_backend_buffer_t buffer) { apir_buffer_host_handle_t handle = BUFFER_TO_HOST_HANDLE(buffer); apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); } -static inline ggml_backend_buffer_t -apir_decode_ggml_buffer(struct apir_decoder *dec) { +static inline ggml_backend_buffer_t apir_decode_ggml_buffer(struct apir_decoder * dec) { ggml_backend_buffer_t buffer; - size_t buffer_ptr_size = sizeof(buffer); + size_t buffer_ptr_size = sizeof(buffer); apir_decoder_read(dec, buffer_ptr_size, &buffer, buffer_ptr_size); @@ -112,46 +101,39 @@ apir_decode_ggml_buffer(struct apir_decoder *dec) { /* enum ggml_status */ -static inline void -apir_encode_ggml_status(struct apir_encoder *enc, const enum ggml_status *status) { +static inline void apir_encode_ggml_status(struct apir_encoder * enc, const enum ggml_status * status) { apir_encoder_write(enc, sizeof(*status), status, sizeof(*status)); } -static inline void -apir_decode_ggml_status(struct apir_decoder *dec, enum ggml_status *status) { +static inline void apir_decode_ggml_status(struct apir_decoder * dec, enum ggml_status * status) { apir_decoder_read(dec, sizeof(*status), status, sizeof(*status)); } /* virtgpu_shmem */ -static inline void -apir_encode_virtgpu_shmem_res_id(struct apir_encoder *enc, uint32_t shmem_res_id) { +static inline void apir_encode_virtgpu_shmem_res_id(struct apir_encoder * enc, uint32_t shmem_res_id) { apir_encode_uint32_t(enc, &shmem_res_id); } -static inline void -apir_decode_virtgpu_shmem_res_id(struct apir_decoder *dec, uint32_t *shmem_res_id) { +static inline void apir_decode_virtgpu_shmem_res_id(struct apir_decoder * dec, uint32_t * shmem_res_id) { apir_decode_uint32_t(dec, shmem_res_id); } /* ggml_cgraph */ -static inline size_t -apir_serialize_ggml_cgraph(ggml_cgraph *cgraph, std::vector & cgraph_data) { +static inline size_t apir_serialize_ggml_cgraph(ggml_cgraph * cgraph, std::vector & cgraph_data) { apir_serialize_graph(cgraph, cgraph_data); return cgraph_data.size(); } -static inline void -apir_encode_cgraph_data(struct apir_encoder *enc, std::vector & cgraph_data) { +static inline void apir_encode_cgraph_data(struct apir_encoder * enc, std::vector & cgraph_data) { size_t cgraph_size = cgraph_data.size(); apir_encode(enc, cgraph_size, cgraph_data.data(), cgraph_size); } -static inline ggml_cgraph * -apir_decode_ggml_cgraph(struct apir_decoder *dec, size_t cgraph_size) { +static inline ggml_cgraph * apir_decode_ggml_cgraph(struct apir_decoder * dec, size_t cgraph_size) { UNUSED(cgraph_size); uint32_t n_nodes; @@ -160,18 +142,16 @@ apir_decode_ggml_cgraph(struct apir_decoder *dec, size_t cgraph_size) { uint32_t n_tensors; apir_decode_uint32_t(dec, &n_tensors); - const apir_rpc_tensor *tensors = apir_decode_apir_rpc_tensor_array_inplace(dec, n_tensors); + const apir_rpc_tensor * tensors = apir_decode_apir_rpc_tensor_array_inplace(dec, n_tensors); return apir_deserialize_graph(n_nodes, n_tensors, tensors, nodes); } -static inline void -apir_encode_ggml_buffer_handle(struct apir_encoder *enc, const apir_buffer_host_handle_t *handle) { +static inline void apir_encode_ggml_buffer_handle(struct apir_encoder * enc, const apir_buffer_host_handle_t * handle) { apir_encoder_write(enc, sizeof(*handle), &handle, sizeof(*handle)); } -static inline void -apir_encode_ggml_tensor_inline(struct apir_encoder *enc, const ggml_tensor *tensor) { +static inline void apir_encode_ggml_tensor_inline(struct apir_encoder * enc, const ggml_tensor * tensor) { size_t tensor_size = sizeof(*tensor); if (tensor->extra) { @@ -202,17 +182,15 @@ apir_encode_ggml_tensor_inline(struct apir_encoder *enc, const ggml_tensor *tens } for (int i = 0; tensor->src[i]; i++) { - const ggml_tensor *tensor_src = tensor->src[i]; + const ggml_tensor * tensor_src = tensor->src[i]; apir_encoder_write(enc, tensor_size, tensor_src, tensor_size); } } -static inline const ggml_tensor * -apir_decode_ggml_tensor_inplace(struct apir_decoder *dec) { - +static inline const ggml_tensor * apir_decode_ggml_tensor_inplace(struct apir_decoder * dec) { // it safe to remove the `const` qualifier here, we *do* want to // modify the shared memory data to fix the `src` pointers. - ggml_tensor *tensor = (ggml_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); + ggml_tensor * tensor = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); // tensor->data is a pointer inside the device buffer. No need to touch it // tensor->buffer is a pointer to a buffer. Decode the buffer handle encoded in sequence. @@ -221,13 +199,13 @@ apir_decode_ggml_tensor_inplace(struct apir_decoder *dec) { } if (tensor->view_src) { - ggml_tensor *tensor_view_src = (ggml_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); - tensor->view_src = tensor_view_src; + ggml_tensor * tensor_view_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); + tensor->view_src = tensor_view_src; } for (int i = 0; tensor->src[i]; i++) { - ggml_tensor *tensor_src = (ggml_tensor *)(uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); - tensor->src[i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor + ggml_tensor * tensor_src = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); + tensor->src[i] = tensor_src; // overwrite op->src[i] pointer with the actual location of the src tensor } return tensor; diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h index a92b28317cd..e40d9e8cdce 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h @@ -1,24 +1,28 @@ -#include -#include +#include "ggml.h" +#include "ggml-backend-impl.h" + #include +#include +#include +#include // ggml_tensor is serialized into apir_rpc_tensor struct apir_rpc_tensor { - uint64_t id; - uint32_t type; - uint64_t buffer; - uint32_t ne[GGML_MAX_DIMS]; - uint32_t nb[GGML_MAX_DIMS]; - uint32_t op; - int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)]; - int32_t flags; - uint64_t src[GGML_MAX_SRC]; - uint64_t view_src; - uint64_t view_offs; - uint64_t data; - char name[GGML_MAX_NAME]; - - char padding[4]; + uint64_t id; + uint32_t type; + uint64_t buffer; + uint32_t ne[GGML_MAX_DIMS]; + uint32_t nb[GGML_MAX_DIMS]; + uint32_t op; + int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)]; + int32_t flags; + uint64_t src[GGML_MAX_SRC]; + uint64_t view_src; + uint64_t view_offs; + uint64_t data; + char name[GGML_MAX_NAME]; + + char padding[4]; }; /* frontend */ @@ -29,17 +33,22 @@ void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector & out /* backend */ -void apir_track_backend_buffer(ggml_backend_buffer_t buffer); -bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer); +void apir_track_backend_buffer(ggml_backend_buffer_t buffer); +bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer); std::unordered_set apir_get_track_backend_buffers(); -void apir_add_tensor(ggml_tensor * tensor, std::vector & tensors, std::unordered_set & visited); +void apir_add_tensor(ggml_tensor * tensor, + std::vector & tensors, + std::unordered_set & visited); -ggml_tensor *apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tensor); +ggml_tensor * apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tensor); -ggml_tensor *apir_create_node(uint64_t id, - struct ggml_context * ctx, - const std::unordered_map & tensor_ptrs, - std::unordered_map & tensor_map); +ggml_tensor * apir_create_node(uint64_t id, + struct ggml_context * ctx, + const std::unordered_map & tensor_ptrs, + std::unordered_map & tensor_map); -ggml_cgraph *apir_deserialize_graph(uint32_t n_nodes, uint32_t n_tensors, const apir_rpc_tensor * tensors, const uint64_t * nodes); +ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes, + uint32_t n_tensors, + const apir_rpc_tensor * tensors, + const uint64_t * nodes); diff --git a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp b/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp index a338e3cc9e1..c6e0c522782 100644 --- a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp +++ b/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp @@ -1,18 +1,16 @@ -#include -#include -#include -#include - -#include "ggml-impl.h" -#include "ggml-backend-impl.h" #include "../ggml-remotingbackend/shared/apir_cs_rpc.h" - +#include "ggml-backend-impl.h" +#include "ggml-impl.h" #include "ggml-remoting.h" -apir_rpc_tensor -apir_serialize_tensor(const ggml_tensor * tensor) { +#include +#include +#include +#include + +apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) { apir_rpc_tensor result; - result.id = reinterpret_cast(tensor); + result.id = reinterpret_cast(tensor); result.type = tensor->type; if (tensor->buffer) { ggml_backend_buffer_t buffer = tensor->buffer; @@ -33,9 +31,9 @@ apir_serialize_tensor(const ggml_tensor * tensor) { for (uint32_t i = 0; i < GGML_MAX_SRC; i++) { result.src[i] = reinterpret_cast(tensor->src[i]); } - result.view_src = reinterpret_cast(tensor->view_src); + result.view_src = reinterpret_cast(tensor->view_src); result.view_offs = tensor->view_offs; - result.data = reinterpret_cast(tensor->data); + result.data = reinterpret_cast(tensor->data); if (tensor->data) { if (!tensor->buffer) { FATAL("tensor has data but not buffer :/"); @@ -47,8 +45,9 @@ apir_serialize_tensor(const ggml_tensor * tensor) { return result; } -void -apir_add_tensor(ggml_tensor * tensor, std::vector & tensors, std::unordered_set & visited) { +void apir_add_tensor(ggml_tensor * tensor, + std::vector & tensors, + std::unordered_set & visited) { if (tensor == nullptr) { return; } @@ -63,25 +62,26 @@ apir_add_tensor(ggml_tensor * tensor, std::vector & tensors, st tensors.push_back(apir_serialize_tensor(tensor)); } -void -apir_serialize_graph(const ggml_cgraph * cgraph, std::vector & output) { - uint32_t n_nodes = cgraph->n_nodes; - std::vector tensors; - std::unordered_set visited; +void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector & output) { + uint32_t n_nodes = cgraph->n_nodes; + std::vector tensors; + std::unordered_set visited; for (uint32_t i = 0; i < n_nodes; i++) { apir_add_tensor(cgraph->nodes[i], tensors, visited); } // serialization format: // | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(apir_rpc_tensor)) | uint32_t n_tensors = tensors.size(); - int output_size = sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(apir_rpc_tensor); + int output_size = + sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(apir_rpc_tensor); output.resize(output_size, 0); memcpy(output.data(), &n_nodes, sizeof(n_nodes)); for (uint32_t i = 0; i < n_nodes; i++) { memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t)); } - uint32_t * out_ntensors = (uint32_t *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t)); - *out_ntensors = n_tensors; - apir_rpc_tensor * out_tensors = (apir_rpc_tensor *)(output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t)); + uint32_t * out_ntensors = (uint32_t *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t)); + *out_ntensors = n_tensors; + apir_rpc_tensor * out_tensors = + (apir_rpc_tensor *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t)); memcpy(out_tensors, tensors.data(), n_tensors * sizeof(apir_rpc_tensor)); } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp index 7af60209f9c..5e696ca49e7 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp @@ -1,10 +1,11 @@ #include "ggml-remoting.h" -static ggml_backend_buffer_t -ggml_backend_remoting_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { - struct virtgpu *gpu = BUFT_TO_GPU(buft); +static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, + size_t size) { + struct virtgpu * gpu = BUFT_TO_GPU(buft); - struct ggml_backend_remoting_buffer_context *context = (struct ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + struct ggml_backend_remoting_buffer_context * context = + (struct ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); if (!context) { FATAL("Couldn't allocate the buffer context ..."); } @@ -13,38 +14,32 @@ ggml_backend_remoting_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, bool async__unused, host_buffer__unused, events__unused; bool buffer_from_host_ptr; - apir_device_get_props(gpu, - &async__unused, - &host_buffer__unused, - &buffer_from_host_ptr, - &events__unused - ); + apir_device_get_props(gpu, &async__unused, &host_buffer__unused, &buffer_from_host_ptr, &events__unused); if (buffer_from_host_ptr) { context->apir_context = apir_device_buffer_from_ptr(gpu, size, size); - context->base = context->apir_context.shmem.mmap_ptr; - context->is_from_ptr = true; + context->base = context->apir_context.shmem.mmap_ptr; + context->is_from_ptr = true; } else { context->apir_context = apir_buffer_type_alloc_buffer(gpu, buft, size); - context->is_from_ptr = false; - context->base = NULL; + context->is_from_ptr = false; + context->base = NULL; } - ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft, ggml_backend_remoting_buffer_interface, (void *) context, size); + ggml_backend_buffer_t buffer = + ggml_backend_buffer_init(buft, ggml_backend_remoting_buffer_interface, (void *) context, size); return buffer; } -static const char * -ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { - struct virtgpu *gpu = BUFT_TO_GPU(buft); +static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { + struct virtgpu * gpu = BUFT_TO_GPU(buft); return apir_buffer_type_get_name(gpu, buft); } -static size_t -ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { - struct virtgpu *gpu = BUFT_TO_GPU(buft); +static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { + struct virtgpu * gpu = BUFT_TO_GPU(buft); static size_t align = 0; @@ -55,9 +50,8 @@ ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) return align; } -static size_t -ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { - struct virtgpu *gpu = BUFT_TO_GPU(buft); +static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { + struct virtgpu * gpu = BUFT_TO_GPU(buft); static size_t max_size = 0; if (max_size == 0) { @@ -67,16 +61,15 @@ ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) return max_size; } -static bool -ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) { - struct virtgpu *gpu = BUFT_TO_GPU(buft); +static bool ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) { + struct virtgpu * gpu = BUFT_TO_GPU(buft); return apir_buffer_type_is_host(gpu, buft); } -static size_t -ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { - struct virtgpu *gpu = BUFT_TO_GPU(buft); +static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, + const ggml_tensor * tensor) { + struct virtgpu * gpu = BUFT_TO_GPU(buft); return apir_buffer_type_get_alloc_size(gpu, buft, tensor); } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp index 87c34d4a188..217a81f878b 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp @@ -1,33 +1,36 @@ #include "ggml-remoting.h" -#define BUFFER_TO_GPU(name) \ - ((struct ggml_backend_remoting_buffer_context *) (name)->context)->gpu +#define BUFFER_TO_GPU(name) ((struct ggml_backend_remoting_buffer_context *) (name)->context)->gpu -struct timer_data get_tensor_timer = {0, 0, 0, "get_tensor"}; -struct timer_data set_tensor_timer = {0, 0, 0, "set_tensor"}; -struct timer_data cpy_tensor_timer = {0, 0, 0, "cpy_tensor"}; +struct timer_data get_tensor_timer = { 0, 0, 0, "get_tensor" }; +struct timer_data set_tensor_timer = { 0, 0, 0, "set_tensor" }; +struct timer_data cpy_tensor_timer = { 0, 0, 0, "cpy_tensor" }; -struct timer_data get_tensor_from_ptr_timer = {0, 0, 0, "get_tensor_from_ptr"}; -struct timer_data set_tensor_from_ptr_timer = {0, 0, 0, "set_tensor_from_ptr"}; +struct timer_data get_tensor_from_ptr_timer = { 0, 0, 0, "get_tensor_from_ptr" }; +struct timer_data set_tensor_from_ptr_timer = { 0, 0, 0, "set_tensor_from_ptr" }; static void * ggml_backend_remoting_buffer_get_base(ggml_backend_buffer_t buffer) { - struct ggml_backend_remoting_buffer_context *context = (struct ggml_backend_remoting_buffer_context *) buffer->context; + struct ggml_backend_remoting_buffer_context * context = + (struct ggml_backend_remoting_buffer_context *) buffer->context; if (context->base) { return context->base; } - context->base = apir_buffer_get_base(BUFFER_TO_GPU(buffer), - BUFFER_TO_APIR_CONTEXT(buffer)); + context->base = apir_buffer_get_base(BUFFER_TO_GPU(buffer), BUFFER_TO_APIR_CONTEXT(buffer)); return context->base; } -static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { - struct virtgpu *gpu = BUFFER_TO_GPU(buffer); +static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer, + ggml_tensor * tensor, + const void * data, + size_t offset, + size_t size) { + struct virtgpu * gpu = BUFFER_TO_GPU(buffer); - struct ggml_backend_remoting_buffer_context *context = BUFFER_TO_GGML_CONTEXT(buffer); + struct ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); if (context->is_from_ptr) { - memcpy((char *)tensor->data + offset, data, size); + memcpy((char *) tensor->data + offset, data, size); } else { apir_buffer_set_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size); } @@ -35,32 +38,46 @@ static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer return; } -static void ggml_backend_remoting_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { - struct virtgpu *gpu = BUFFER_TO_GPU(buffer); - struct ggml_backend_remoting_buffer_context *context = BUFFER_TO_GGML_CONTEXT(buffer); +static void ggml_backend_remoting_buffer_get_tensor(ggml_backend_buffer_t buffer, + const ggml_tensor * tensor, + void * data, + size_t offset, + size_t size) { + struct virtgpu * gpu = BUFFER_TO_GPU(buffer); + struct ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); if (context->is_from_ptr) { - memcpy(data, (const char *)tensor->data + offset, size); + memcpy(data, (const char *) tensor->data + offset, size); } else { apir_buffer_get_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), tensor, data, offset, size); } } -static void ggml_backend_remoting_buffer_set_tensor_from_ptr(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { +static void ggml_backend_remoting_buffer_set_tensor_from_ptr(ggml_backend_buffer_t buffer, + ggml_tensor * tensor, + const void * data, + size_t offset, + size_t size) { UNUSED(buffer); - memcpy((char *)tensor->data + offset, data, size); + memcpy((char *) tensor->data + offset, data, size); return; } -static void ggml_backend_remoting_buffer_get_tensor_from_ptr(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { +static void ggml_backend_remoting_buffer_get_tensor_from_ptr(ggml_backend_buffer_t buffer, + const ggml_tensor * tensor, + void * data, + size_t offset, + size_t size) { UNUSED(buffer); - memcpy(data, (const char *)tensor->data + offset, size); + memcpy(data, (const char *) tensor->data + offset, size); } -static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { - struct virtgpu *gpu = BUFFER_TO_GPU(buffer); +static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer, + const ggml_tensor * src, + ggml_tensor * dst) { + struct virtgpu * gpu = BUFFER_TO_GPU(buffer); bool ret = apir_buffer_cpy_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), src, dst); @@ -68,7 +85,7 @@ static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer } static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { - struct virtgpu *gpu = BUFFER_TO_GPU(buffer); + struct virtgpu * gpu = BUFFER_TO_GPU(buffer); apir_buffer_clear(gpu, BUFFER_TO_APIR_CONTEXT(buffer), value); @@ -76,11 +93,11 @@ static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uin } static void ggml_backend_remoting_buffer_free_buffer(ggml_backend_buffer_t buffer) { - struct virtgpu *gpu = BUFFER_TO_GPU(buffer); + struct virtgpu * gpu = BUFFER_TO_GPU(buffer); apir_buffer_free_buffer(gpu, BUFFER_TO_APIR_CONTEXT(buffer)); - struct ggml_backend_remoting_buffer_context *context = BUFFER_TO_GGML_CONTEXT(buffer); + struct ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); free(context); buffer->context = NULL; } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp index 5be945b558c..b8440ceb202 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp @@ -1,42 +1,37 @@ #include "ggml-remoting.h" -static const char * -ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { - struct virtgpu *gpu = DEV_TO_GPU(dev); +static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { + struct virtgpu * gpu = DEV_TO_GPU(dev); return apir_device_get_name(gpu); } -static const char * -ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) { - struct virtgpu *gpu = DEV_TO_GPU(dev); +static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) { + struct virtgpu * gpu = DEV_TO_GPU(dev); return apir_device_get_description(gpu); } -static enum ggml_backend_dev_type -ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) { - struct virtgpu *gpu = DEV_TO_GPU(dev); +static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) { + struct virtgpu * gpu = DEV_TO_GPU(dev); static enum ggml_backend_dev_type type; - static bool has_type = false; + static bool has_type = false; if (!has_type) { has_type = true; - type = (enum ggml_backend_dev_type) apir_device_get_type(gpu); + type = (enum ggml_backend_dev_type) apir_device_get_type(gpu); } return type; } -static void -ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { - struct virtgpu *gpu = DEV_TO_GPU(dev); +static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { + struct virtgpu * gpu = DEV_TO_GPU(dev); return apir_device_get_memory(gpu, free, total); } -static bool -ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { +static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { #if USE_ALWAYS_TRUE_SUPPORTS_OP == 1 /* ggml-rpc cheats it like this */ /* with the current implementation of serialize_tensor, the src/view aren't properly passed */ @@ -45,54 +40,46 @@ ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, const ggml_tens return true; #else - struct virtgpu *gpu = DEV_TO_GPU(dev); + struct virtgpu * gpu = DEV_TO_GPU(dev); return apir_device_supports_op(gpu, op); #endif } -static bool -ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { +static bool ggml_backend_remoting_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { bool supported = buft->device == dev; return supported; } -static bool -ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { +static bool ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { UNUSED(dev); UNUSED(op); return false; } -static void -ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { +static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { props->name = ggml_backend_remoting_device_get_name(dev); props->description = ggml_backend_remoting_device_get_description(dev); props->type = ggml_backend_remoting_device_get_type(dev); ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total); - struct virtgpu *gpu = DEV_TO_GPU(dev); - apir_device_get_props(gpu, - &props->caps.async, - &props->caps.host_buffer, - &props->caps.buffer_from_host_ptr, - &props->caps.events - ); + struct virtgpu * gpu = DEV_TO_GPU(dev); + apir_device_get_props(gpu, &props->caps.async, &props->caps.host_buffer, &props->caps.buffer_from_host_ptr, + &props->caps.events); props->caps.buffer_from_host_ptr = false; - props->caps.async = false; - props->caps.events = false; + props->caps.async = false; + props->caps.events = false; } -ggml_backend_buffer_type_t -ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { - struct virtgpu *gpu = DEV_TO_GPU(dev); +ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { + struct virtgpu * gpu = DEV_TO_GPU(dev); apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); - static struct ggml_backend_buffer_type buft { + static struct ggml_backend_buffer_type buft{ /* .iface = */ ggml_backend_remoting_buffer_type_interface, /* .device = */ dev, /* .context = */ (void *) ctx, @@ -101,13 +88,12 @@ ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { return &buft; } -static ggml_backend_buffer_type_t -ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { - struct virtgpu *gpu = DEV_TO_GPU(dev); +static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { + struct virtgpu * gpu = DEV_TO_GPU(dev); apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); - static struct ggml_backend_buffer_type buft { + static struct ggml_backend_buffer_type buft{ /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, /* .device = */ dev, /* .context = */ (void *) ctx, @@ -116,22 +102,26 @@ ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { return &buft; } -static ggml_backend_buffer_t -ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) { +static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_backend_dev_t dev, + void * ptr, + size_t size, + size_t max_tensor_size) { + struct virtgpu * gpu = DEV_TO_GPU(dev); - struct virtgpu *gpu = DEV_TO_GPU(dev); - - struct ggml_backend_remoting_buffer_context *context = (struct ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + struct ggml_backend_remoting_buffer_context * context = + (struct ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); if (!context) { FATAL("Couldn't allocate the buffer context ..."); } - context->gpu = gpu; + context->gpu = gpu; context->apir_context = apir_device_buffer_from_ptr(gpu, size, max_tensor_size); - context->base = ptr; - context->is_from_ptr = true; + context->base = ptr; + context->is_from_ptr = true; - ggml_backend_buffer_t buffer = ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev), ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size); + ggml_backend_buffer_t buffer = + ggml_backend_buffer_init(ggml_backend_remoting_device_get_buffer_from_ptr_type(dev), + ggml_backend_remoting_buffer_from_ptr_interface, (void *) context, size); return buffer; } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp index 31527eac6cf..663f26e20bd 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp @@ -1,11 +1,11 @@ -#include -#include - #include "ggml-remoting.h" -static struct virtgpu *apir_initialize() { - static struct virtgpu *apir_gpu_instance = NULL; - static bool apir_initialized = false; +#include +#include + +static struct virtgpu * apir_initialize() { + static struct virtgpu * apir_gpu_instance = NULL; + static bool apir_initialized = false; if (apir_initialized) { return apir_gpu_instance; @@ -22,7 +22,7 @@ static struct virtgpu *apir_initialize() { } static int ggml_backend_remoting_get_device_count() { - struct virtgpu *gpu = apir_initialize(); + struct virtgpu * gpu = apir_initialize(); if (!gpu) { WARNING("apir_initialize failed :/"); return 0; @@ -50,7 +50,7 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { return; } - struct virtgpu *gpu = apir_initialize(); + struct virtgpu * gpu = apir_initialize(); if (!gpu) { FATAL("apir_initialize failed :/"); return; @@ -59,20 +59,19 @@ static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { static bool initialized = false; { - static std::mutex mutex; + static std::mutex mutex; std::lock_guard lock(mutex); if (!initialized) { - for (int i = 0; i < ggml_backend_remoting_get_device_count(); i++) { - ggml_backend_remoting_device_context *ctx = new ggml_backend_remoting_device_context; - char desc[256] = "API Remoting device"; + ggml_backend_remoting_device_context * ctx = new ggml_backend_remoting_device_context; + char desc[256] = "API Remoting device"; - ctx->device = i; - ctx->name = GGML_REMOTING_FRONTEND_NAME + std::to_string(i); + ctx->device = i; + ctx->name = GGML_REMOTING_FRONTEND_NAME + std::to_string(i); ctx->description = desc; - ctx->gpu = gpu; + ctx->gpu = gpu; - ggml_backend_dev_t dev = new ggml_backend_device { + ggml_backend_dev_t dev = new ggml_backend_device{ /* .iface = */ ggml_backend_remoting_device_interface, /* .reg = */ reg, /* .context = */ ctx, @@ -90,7 +89,7 @@ static ggml_backend_dev_t ggml_backend_remoting_reg_get_device(ggml_backend_reg_ return ggml_backend_remoting_get_device(device); } -static const char *ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) { +static const char * ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) { UNUSED(reg); return GGML_REMOTING_FRONTEND_NAME; @@ -103,7 +102,6 @@ static const struct ggml_backend_reg_i ggml_backend_remoting_reg_i = { /* .get_proc_address = */ NULL, }; - static void showTime() { show_timer(&graph_compute_timer); show_timer(&get_tensor_timer); @@ -121,7 +119,7 @@ static void showTime() { } ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { - struct virtgpu *gpu = apir_initialize(); + struct virtgpu * gpu = apir_initialize(); if (!gpu) { FATAL("apir_initialize failed :/"); return NULL; diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend.cpp index 6c6495ac909..e400be2af0d 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend.cpp @@ -1,31 +1,31 @@ #include "ggml-remoting.h" static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) { - UNUSED(backend); + UNUSED(backend); - return "API Remoting backend"; + return "API Remoting backend"; } static void ggml_backend_remoting_free(ggml_backend_t backend) { - delete backend; + delete backend; } -struct timer_data graph_compute_timer = {0, 0, 0, "compute_timer"}; +struct timer_data graph_compute_timer = { 0, 0, 0, "compute_timer" }; static ggml_status ggml_backend_remoting_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { - struct virtgpu *gpu = DEV_TO_GPU(backend->device); + struct virtgpu * gpu = DEV_TO_GPU(backend->device); - start_timer(&graph_compute_timer); + start_timer(&graph_compute_timer); - ggml_status status = apir_backend_graph_compute(gpu, cgraph); + ggml_status status = apir_backend_graph_compute(gpu, cgraph); - stop_timer(&graph_compute_timer); + stop_timer(&graph_compute_timer); - return status; + return status; } static void ggml_backend_remoting_graph_optimize(ggml_backend_t backend, ggml_cgraph * cgraph) { - struct virtgpu *gpu = DEV_TO_GPU(backend->device); + struct virtgpu * gpu = DEV_TO_GPU(backend->device); #if true UNUSED(gpu); UNUSED(cgraph); @@ -41,40 +41,40 @@ static void ggml_backend_remoting_graph_optimize(ggml_backend_t backend, ggml_cg } static ggml_backend_i ggml_backend_remoting_interface = { - /* .get_name = */ ggml_backend_remoting_get_name, - /* .free = */ ggml_backend_remoting_free, - /* .set_tensor_async = */ NULL, // ggml_backend_remoting_set_tensor_async, - /* .get_tensor_async = */ NULL, // ggml_backend_remoting_get_tensor_async, - /* .cpy_tensor_async = */ NULL, // ggml_backend_remoting_cpy_tensor_async, - /* .synchronize = */ NULL, // ggml_backend_remoting_synchronize, - /* .graph_plan_create = */ NULL, - /* .graph_plan_free = */ NULL, - /* .graph_plan_update = */ NULL, - /* .graph_plan_compute = */ NULL, - /* .graph_compute = */ ggml_backend_remoting_graph_compute, - /* .event_record = */ NULL, - /* .event_wait = */ NULL, - /* .graph_optimize = */ ggml_backend_remoting_graph_optimize, + /* .get_name = */ ggml_backend_remoting_get_name, + /* .free = */ ggml_backend_remoting_free, + /* .set_tensor_async = */ NULL, // ggml_backend_remoting_set_tensor_async, + /* .get_tensor_async = */ NULL, // ggml_backend_remoting_get_tensor_async, + /* .cpy_tensor_async = */ NULL, // ggml_backend_remoting_cpy_tensor_async, + /* .synchronize = */ NULL, // ggml_backend_remoting_synchronize, + /* .graph_plan_create = */ NULL, + /* .graph_plan_free = */ NULL, + /* .graph_plan_update = */ NULL, + /* .graph_plan_compute = */ NULL, + /* .graph_compute = */ ggml_backend_remoting_graph_compute, + /* .event_record = */ NULL, + /* .event_wait = */ NULL, + /* .graph_optimize = */ ggml_backend_remoting_graph_optimize, }; static ggml_guid_t ggml_backend_remoting_guid() { - static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x14, 0x03, 0x86, 0x02, 0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b }; + static ggml_guid guid = { 0xb8, 0xf7, 0x4f, 0x86, 0x14, 0x03, 0x86, 0x02, + 0x91, 0xc8, 0xdd, 0xe9, 0x02, 0x3f, 0xc0, 0x2b }; - return &guid; + return &guid; } - ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params) { - UNUSED(params); + UNUSED(params); - ggml_backend_remoting_device_context * ctx = (ggml_backend_remoting_device_context *)dev->context; + ggml_backend_remoting_device_context * ctx = (ggml_backend_remoting_device_context *) dev->context; - ggml_backend_t remoting_backend = new ggml_backend { - /* .guid = */ ggml_backend_remoting_guid(), - /* .interface = */ ggml_backend_remoting_interface, - /* .device = */ ggml_backend_reg_dev_get(ggml_backend_remoting_frontend_reg(), ctx->device), - /* .context = */ ctx, - }; + ggml_backend_t remoting_backend = new ggml_backend{ + /* .guid = */ ggml_backend_remoting_guid(), + /* .interface = */ ggml_backend_remoting_interface, + /* .device = */ ggml_backend_reg_dev_get(ggml_backend_remoting_frontend_reg(), ctx->device), + /* .context = */ ctx, + }; - return remoting_backend; + return remoting_backend; } diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp b/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp index 87679fe59a8..4ac7f9c2821 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp @@ -1,26 +1,22 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - #include "ggml-remoting-frontend.h" -#include "remoting.h" -#include "ggml-impl.h" #include "ggml-backend-impl.h" +#include "ggml-impl.h" +#include "remoting.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include int ggml_backend_remoting_get_device_count(); - - - struct remoting_device_struct { std::mutex mutex; }; diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting.h b/ggml/src/ggml-remotingfrontend/ggml-remoting.h index 9c5f14360d3..0683fec8424 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-remoting.h +++ b/ggml/src/ggml-remotingfrontend/ggml-remoting.h @@ -1,75 +1,68 @@ #pragma once -#include -#include - -#include "ggml-remoting-frontend.h" - -#include "ggml-impl.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" - +#include "ggml-impl.h" +#include "ggml-remoting-frontend.h" #include "virtgpu.h" +#include +#include + // USE_ALWAYS_TRUE_SUPPORTS_OP: 1 is fast, 0 avoid micro-benchmark crashes #define USE_ALWAYS_TRUE_SUPPORTS_OP 1 #define USE_METAL_GUEST_SUPPORTS_OP 0 -#define DEV_TO_GPU(name) \ - ((struct ggml_backend_remoting_device_context *) (name)->context)->gpu +#define DEV_TO_GPU(name) ((struct ggml_backend_remoting_device_context *) (name)->context)->gpu -#define BUFFER_TO_GGML_CONTEXT(name) \ - ((struct ggml_backend_remoting_buffer_context *) (name)->context) +#define BUFFER_TO_GGML_CONTEXT(name) ((struct ggml_backend_remoting_buffer_context *) (name)->context) -#define BUFFER_TO_APIR_CONTEXT(name) \ - &((struct ggml_backend_remoting_buffer_context *) (name)->context)->apir_context +#define BUFFER_TO_APIR_CONTEXT(name) &((struct ggml_backend_remoting_buffer_context *) (name)->context)->apir_context #define BUFFER_TO_HOST_HANDLE(name) \ - ((struct ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle + ((struct ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle #define GET_DEVICE_CONTEXT() \ - (struct ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context + (struct ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context -#define BUFT_TO_GPU(name) \ - ((struct ggml_backend_remoting_device_context *) (name)->device->context)->gpu +#define BUFT_TO_GPU(name) ((struct ggml_backend_remoting_device_context *) (name)->device->context)->gpu struct ggml_backend_remoting_device_context { - size_t device; - std::string name; - std::string description; + size_t device; + std::string name; + std::string description; - std::vector> shared_memory; + std::vector> shared_memory; - struct virtgpu *gpu; + struct virtgpu * gpu; }; struct ggml_backend_remoting_buffer_context { - apir_buffer_context_t apir_context; + apir_buffer_context_t apir_context; - struct virtgpu *gpu; + struct virtgpu * gpu; - void *base; + void * base; - bool is_from_ptr; + bool is_from_ptr; }; -extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface; +extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface; extern const struct ggml_backend_device_i ggml_backend_remoting_device_interface; -extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface; -extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface; -extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface; +extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface; +extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface; +extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface; -ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device); -ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params); +ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device); +ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params); ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev); -static inline apir_buffer_type_host_handle_t -ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) { - // in the backend, the buffer handle is the buffer pointer - return (apir_buffer_type_host_handle_t) buft->context; +static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggml_backend_buffer_type_t buft) { + // in the backend, the buffer handle is the buffer pointer + return (apir_buffer_type_host_handle_t) buft->context; } static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) { - return BUFFER_TO_HOST_HANDLE(buffer); + return BUFFER_TO_HOST_HANDLE(buffer); } diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-apir.h b/ggml/src/ggml-remotingfrontend/virtgpu-apir.h index 230fa19f3a6..bad4b1e3ffb 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-apir.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-apir.h @@ -1,17 +1,15 @@ -#include "ggml.h" -#include "ggml-impl.h" +#include "../ggml-remotingbackend/shared/apir_backend.h" #include "ggml-alloc.h" - +#include "ggml-impl.h" +#include "ggml.h" #include "virtgpu-shm.h" #include "virtgpu-utils.h" -#include "../ggml-remotingbackend/shared/apir_backend.h" - typedef struct { - apir_buffer_host_handle_t host_handle; + apir_buffer_host_handle_t host_handle; - struct virtgpu_shmem shmem; - apir_buffer_type_host_handle_t buft_host_handle; + struct virtgpu_shmem shmem; + apir_buffer_type_host_handle_t buft_host_handle; } apir_buffer_context_t; #include "virtgpu-forward.gen.h" diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp index 73c213641ac..2bcf91d3916 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp @@ -3,35 +3,34 @@ static long long current_time_ms() { struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time - return (long long)ts.tv_sec * 1000000000LL + ts.tv_nsec; + return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; } -ggml_status -apir_backend_graph_compute(struct virtgpu *gpu, ggml_cgraph *cgraph) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgraph) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE); std::vector cgraph_data; - size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data); + size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data); - struct virtgpu_shmem temp_shmem; // Local storage for large buffers - struct virtgpu_shmem *shmem = &temp_shmem; + struct virtgpu_shmem temp_shmem; // Local storage for large buffers + struct virtgpu_shmem * shmem = &temp_shmem; if (cgraph_size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough - shmem = &gpu->data_shmem; + // prefer the init-time allocated page, if large enough + shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { - FATAL("Couldn't allocate the guest-host shared buffer :/"); + FATAL("Couldn't allocate the guest-host shared buffer :/"); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); apir_encode_size_t(encoder, &cgraph_size); - char *shmem_data = (char *) shmem->mmap_ptr; + char * shmem_data = (char *) shmem->mmap_ptr; struct apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size); apir_encode_cgraph_data(&secondary_enc, cgraph_data); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp index 9609e5e3149..2d5b2a1f592 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp @@ -1,131 +1,126 @@ #include "virtgpu-forward-impl.h" -const char * -apir_buffer_type_get_name(struct virtgpu *gpu, ggml_backend_buffer_type_t buft) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; - ApirForwardReturnCode ret; +const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; + ApirForwardReturnCode ret; - REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME); + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_ggml_buffer_type(encoder, buft); - REMOTE_CALL(gpu, encoder, decoder, ret); + REMOTE_CALL(gpu, encoder, decoder, ret); - const size_t string_size = apir_decode_array_size_unchecked(decoder); - char *string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); - if (!string) { - FATAL("%s: Could not allocate the device name buffer", __func__); - } - apir_decode_char_array(decoder, string, string_size); + const size_t string_size = apir_decode_array_size_unchecked(decoder); + char * string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + if (!string) { + FATAL("%s: Could not allocate the device name buffer", __func__); + } + apir_decode_char_array(decoder, string, string_size); + remote_call_finish(gpu, encoder, decoder); - remote_call_finish(gpu, encoder, decoder); - - return string; + return string; } -size_t -apir_buffer_type_get_alignment(struct virtgpu *gpu, ggml_backend_buffer_type_t buft) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; - ApirForwardReturnCode ret; +size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; + ApirForwardReturnCode ret; - REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT); + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_ggml_buffer_type(encoder, buft); - REMOTE_CALL(gpu, encoder, decoder, ret); + REMOTE_CALL(gpu, encoder, decoder, ret); - size_t alignment; - apir_decode_size_t(decoder, &alignment); + size_t alignment; + apir_decode_size_t(decoder, &alignment); - remote_call_finish(gpu, encoder, decoder); + remote_call_finish(gpu, encoder, decoder); - return alignment; + return alignment; } -size_t -apir_buffer_type_get_max_size(struct virtgpu *gpu, ggml_backend_buffer_type_t buft) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; - ApirForwardReturnCode ret; +size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; + ApirForwardReturnCode ret; - REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE); + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_ggml_buffer_type(encoder, buft); - REMOTE_CALL(gpu, encoder, decoder, ret); + REMOTE_CALL(gpu, encoder, decoder, ret); - size_t max_size; - apir_decode_size_t(decoder, &max_size); + size_t max_size; + apir_decode_size_t(decoder, &max_size); - remote_call_finish(gpu, encoder, decoder); + remote_call_finish(gpu, encoder, decoder); - return max_size; + return max_size; } -bool -apir_buffer_type_is_host(struct virtgpu *gpu, ggml_backend_buffer_type_t buft) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; - ApirForwardReturnCode ret; +bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; + ApirForwardReturnCode ret; - REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST); + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_ggml_buffer_type(encoder, buft); - REMOTE_CALL(gpu, encoder, decoder, ret); + REMOTE_CALL(gpu, encoder, decoder, ret); - bool is_host; - apir_decode_bool_t(decoder, &is_host); + bool is_host; + apir_decode_bool_t(decoder, &is_host); - remote_call_finish(gpu, encoder, decoder); + remote_call_finish(gpu, encoder, decoder); - return is_host; + return is_host; } -apir_buffer_context_t -apir_buffer_type_alloc_buffer(struct virtgpu *gpu, ggml_backend_buffer_type_t buft, size_t size) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; - ApirForwardReturnCode ret; +apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, + ggml_backend_buffer_type_t buft, + size_t size) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; + ApirForwardReturnCode ret; - apir_buffer_context_t buffer_context; + apir_buffer_context_t buffer_context; - REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER); + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_ALLOC_BUFFER); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_ggml_buffer_type(encoder, buft); - apir_encode_size_t(encoder, &size); + apir_encode_size_t(encoder, &size); - REMOTE_CALL(gpu, encoder, decoder, ret); + REMOTE_CALL(gpu, encoder, decoder, ret); - apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle); + apir_decode_apir_buffer_host_handle_t(decoder, &buffer_context.host_handle); - remote_call_finish(gpu, encoder, decoder); + remote_call_finish(gpu, encoder, decoder); - return buffer_context; + return buffer_context; } -size_t -apir_buffer_type_get_alloc_size(struct virtgpu *gpu, ggml_backend_buffer_type_t buft, const ggml_tensor *op) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; - ApirForwardReturnCode ret; +size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; + ApirForwardReturnCode ret; - REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE); + REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE); - apir_encode_ggml_buffer_type(encoder, buft); + apir_encode_ggml_buffer_type(encoder, buft); - apir_encode_ggml_tensor_inline(encoder, op); + apir_encode_ggml_tensor_inline(encoder, op); - REMOTE_CALL(gpu, encoder, decoder, ret); + REMOTE_CALL(gpu, encoder, decoder, ret); - size_t alloc_size; - apir_decode_size_t(decoder, &alloc_size); + size_t alloc_size; + apir_decode_size_t(decoder, &alloc_size); - remote_call_finish(gpu, encoder, decoder); + remote_call_finish(gpu, encoder, decoder); - return alloc_size; + return alloc_size; } diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp index 87b7bc897bd..e1d79331a14 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp @@ -1,9 +1,8 @@ #include "virtgpu-forward-impl.h" -void * -apir_buffer_get_base(struct virtgpu *gpu, apir_buffer_context_t *buffer_context) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_BASE); @@ -20,11 +19,14 @@ apir_buffer_get_base(struct virtgpu *gpu, apir_buffer_context_t *buffer_context) return (void *) base; } -void -apir_buffer_set_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, - ggml_tensor *tensor, const void *data, size_t offset, size_t size) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +void apir_buffer_set_tensor(struct virtgpu * gpu, + apir_buffer_context_t * buffer_context, + ggml_tensor * tensor, + const void * data, + size_t offset, + size_t size) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_SET_TENSOR); @@ -32,15 +34,15 @@ apir_buffer_set_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_contex apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); apir_encode_ggml_tensor(encoder, tensor); - struct virtgpu_shmem temp_shmem; // Local storage for large buffers - struct virtgpu_shmem *shmem = &temp_shmem; + struct virtgpu_shmem temp_shmem; // Local storage for large buffers + struct virtgpu_shmem * shmem = &temp_shmem; if (size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough - shmem = &gpu->data_shmem; + // prefer the init-time allocated page, if large enough + shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { - FATAL("Couldn't allocate the guest-host shared buffer :/"); + FATAL("Couldn't allocate the guest-host shared buffer :/"); } memcpy(shmem->mmap_ptr, data, size); @@ -71,11 +73,14 @@ apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_contex memcpy(data, buffer_base_addr+offset, size); } #else -void -apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, - const ggml_tensor *tensor, void *data, size_t offset, size_t size) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +void apir_buffer_get_tensor(struct virtgpu * gpu, + apir_buffer_context_t * buffer_context, + const ggml_tensor * tensor, + void * data, + size_t offset, + size_t size) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_TENSOR); @@ -83,15 +88,15 @@ apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_contex apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); apir_encode_ggml_tensor(encoder, tensor); - struct virtgpu_shmem temp_shmem; // Local storage for large buffers - struct virtgpu_shmem *shmem = &temp_shmem; + struct virtgpu_shmem temp_shmem; // Local storage for large buffers + struct virtgpu_shmem * shmem = &temp_shmem; if (size <= gpu->data_shmem.mmap_size) { - // prefer the init-time allocated page, if large enough - shmem = &gpu->data_shmem; + // prefer the init-time allocated page, if large enough + shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { - FATAL("Couldn't allocate the guest-host shared buffer :/"); + FATAL("Couldn't allocate the guest-host shared buffer :/"); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); @@ -110,10 +115,12 @@ apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_contex } #endif -bool -apir_buffer_cpy_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, const ggml_tensor *src, const ggml_tensor *dst) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +bool apir_buffer_cpy_tensor(struct virtgpu * gpu, + apir_buffer_context_t * buffer_context, + const ggml_tensor * src, + const ggml_tensor * dst) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR); @@ -132,11 +139,9 @@ apir_buffer_cpy_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_contex return ret_val; } -void -apir_buffer_clear(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, - uint8_t value) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CLEAR); @@ -149,11 +154,9 @@ apir_buffer_clear(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, remote_call_finish(gpu, encoder, decoder); } - -void -apir_buffer_free_buffer(struct virtgpu *gpu, apir_buffer_context_t *buffer_context) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +void apir_buffer_free_buffer(struct virtgpu * gpu, apir_buffer_context_t * buffer_context) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp index 1b99128d735..f501657f851 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp @@ -1,15 +1,14 @@ #include "virtgpu-forward-impl.h" #include "virtgpu-shm.h" -int -apir_device_get_count(struct virtgpu *gpu) { +int apir_device_get_count(struct virtgpu * gpu) { static int32_t dev_count = -1; if (dev_count != -1) { return dev_count; } - struct apir_encoder *encoder; - struct apir_decoder *decoder; + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT); @@ -22,21 +21,20 @@ apir_device_get_count(struct virtgpu *gpu) { return dev_count; } -const char * -apir_device_get_name(struct virtgpu *gpu) { - static char *string = nullptr; +const char * apir_device_get_name(struct virtgpu * gpu) { + static char * string = nullptr; if (string) { return string; } - struct apir_encoder *encoder; - struct apir_decoder *decoder; + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_NAME); REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); if (!string) { FATAL("%s: Could not allocate the device name buffer", __func__); } @@ -47,10 +45,9 @@ apir_device_get_name(struct virtgpu *gpu) { return string; } -const char * -apir_device_get_description(struct virtgpu *gpu) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +const char * apir_device_get_description(struct virtgpu * gpu) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION); @@ -58,10 +55,10 @@ apir_device_get_description(struct virtgpu *gpu) { REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - char *string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + char * string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); if (!string) { FATAL("%s: Could not allocate the device description buffer", __func__); - return NULL; + return NULL; } apir_decode_char_array(decoder, string, string_size); @@ -70,15 +67,14 @@ apir_device_get_description(struct virtgpu *gpu) { return string; } -uint32_t -apir_device_get_type(struct virtgpu *gpu) { +uint32_t apir_device_get_type(struct virtgpu * gpu) { static uint32_t dev_type = 255; if (dev_type != 255) { return dev_type; } - struct apir_encoder *encoder; - struct apir_decoder *decoder; + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_TYPE); @@ -92,12 +88,11 @@ apir_device_get_type(struct virtgpu *gpu) { return dev_type; } -void -apir_device_get_memory(struct virtgpu *gpu, size_t *free, size_t *total) { - static size_t dev_free = 0; - static size_t dev_total = 0; - struct apir_encoder *encoder; - struct apir_decoder *decoder; +void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total) { + static size_t dev_free = 0; + static size_t dev_total = 0; + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_MEMORY); @@ -107,7 +102,7 @@ apir_device_get_memory(struct virtgpu *gpu, size_t *free, size_t *total) { apir_decode_size_t(decoder, &dev_free); apir_decode_size_t(decoder, &dev_total); - *free = dev_free; + *free = dev_free; *total = dev_total; remote_call_finish(gpu, encoder, decoder); @@ -115,10 +110,9 @@ apir_device_get_memory(struct virtgpu *gpu, size_t *free, size_t *total) { return; } -bool -apir_device_supports_op(struct virtgpu *gpu, const ggml_tensor *op) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP); @@ -135,10 +129,9 @@ apir_device_supports_op(struct virtgpu *gpu, const ggml_tensor *op) { return supports_op; } -apir_buffer_type_host_handle_t -apir_device_get_buffer_type(struct virtgpu *gpu) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE); @@ -153,14 +146,13 @@ apir_device_get_buffer_type(struct virtgpu *gpu) { return buft_handle; } -void -apir_device_get_props(struct virtgpu *gpu, - bool *async, - bool *host_buffer, - bool *buffer_from_host_ptr, - bool *events) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +void apir_device_get_props(struct virtgpu * gpu, + bool * async, + bool * host_buffer, + bool * buffer_from_host_ptr, + bool * events) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_PROPS); @@ -177,12 +169,9 @@ apir_device_get_props(struct virtgpu *gpu, return; } -apir_buffer_context_t -apir_device_buffer_from_ptr(struct virtgpu *gpu, - size_t size, - size_t max_tensor_size) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirForwardReturnCode ret; apir_buffer_context_t buffer_context; diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h index 8f9a9695d48..237cb3890ec 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h @@ -1,27 +1,29 @@ -#include "ggml-backend-impl.h" -#include "ggml-remoting.h" #include "virtgpu.h" + +#include "ggml-remoting.h" #include "../ggml-remotingbackend/shared/apir_backend.h" #include "../ggml-remotingbackend/shared/apir_cs_ggml.h" -#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ - do { \ - int32_t forward_flag = (int32_t) apir_command_type__; \ - encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_Forward, forward_flag); \ - if (!encoder_name) { \ - FATAL("%s: failed to prepare the remote call encoder :/", __func__); \ - } \ - } while(0) +#include "ggml-backend-impl.h" +#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ + do { \ + int32_t forward_flag = (int32_t) apir_command_type__; \ + encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_Forward, forward_flag); \ + if (!encoder_name) { \ + FATAL("%s: failed to prepare the remote call encoder :/", __func__); \ + } \ + } while (0) -#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ - do { \ - ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ - if (!decoder_name) { \ - FATAL("%s: failed to kick the remote call :/", __func__); \ - } \ - if (ret_name < APIR_FORWARD_BASE_INDEX) { \ - FATAL("%s: failed to forward the API call: %s: code %d", __func__, apir_forward_error(ret_name), ret_name); \ - } \ - ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ - } while(0) +#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ + do { \ + ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ + if (!decoder_name) { \ + FATAL("%s: failed to kick the remote call :/", __func__); \ + } \ + if (ret_name < APIR_FORWARD_BASE_INDEX) { \ + FATAL("%s: failed to forward the API call: %s: code %d", __func__, apir_forward_error(ret_name), \ + ret_name); \ + } \ + ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ + } while (0) diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h index 7d0848ffdc4..c27c07f0865 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h @@ -1,32 +1,51 @@ #pragma once /* device */ -void apir_device_get_device_count(struct virtgpu *gpu); -int apir_device_get_count(struct virtgpu *gpu); -const char * apir_device_get_name(struct virtgpu *gpu); -const char * apir_device_get_description(struct virtgpu *gpu); -uint32_t apir_device_get_type(struct virtgpu *gpu); -void apir_device_get_memory(struct virtgpu *gpu, size_t *free, size_t *total); -bool apir_device_supports_op(struct virtgpu *gpu, const ggml_tensor *op); -apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu *gpu); -void apir_device_get_props(struct virtgpu *gpu, bool *async, bool *host_buffer, bool *buffer_from_host_ptr, bool *events); -apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu *gpu, size_t size, size_t max_tensor_size); +void apir_device_get_device_count(struct virtgpu * gpu); +int apir_device_get_count(struct virtgpu * gpu); +const char * apir_device_get_name(struct virtgpu * gpu); +const char * apir_device_get_description(struct virtgpu * gpu); +uint32_t apir_device_get_type(struct virtgpu * gpu); +void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total); +bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op); +apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu); +void apir_device_get_props(struct virtgpu * gpu, + bool * async, + bool * host_buffer, + bool * buffer_from_host_ptr, + bool * events); +apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size); /* buffer-type */ -const char * apir_buffer_type_get_name(struct virtgpu *gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_alignment(struct virtgpu *gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_max_size(struct virtgpu *gpu, ggml_backend_buffer_type_t buft); -bool apir_buffer_type_is_host(struct virtgpu *gpu, ggml_backend_buffer_type_t buft); -apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu *gpu, ggml_backend_buffer_type_t buffer_buft, size_t size); -size_t apir_buffer_type_get_alloc_size(struct virtgpu *gpu, ggml_backend_buffer_type_t buft, const ggml_tensor *op); +const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); +size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); +size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); +bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); +apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, + ggml_backend_buffer_type_t buffer_buft, + size_t size); +size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op); /* buffer */ -void * apir_buffer_get_base(struct virtgpu *gpu, apir_buffer_context_t *buffer_context); -void apir_buffer_set_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, ggml_tensor *tensor, const void *data, size_t offset, size_t size); -void apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, const ggml_tensor *tensor, void *data, size_t offset, size_t size); -bool apir_buffer_cpy_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, const ggml_tensor *src, const ggml_tensor *dst); -void apir_buffer_clear(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, uint8_t value); -void apir_buffer_free_buffer(struct virtgpu *gpu, apir_buffer_context_t *buffer_context); +void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); +void apir_buffer_set_tensor(struct virtgpu * gpu, + apir_buffer_context_t * buffer_context, + ggml_tensor * tensor, + const void * data, + size_t offset, + size_t size); +void apir_buffer_get_tensor(struct virtgpu * gpu, + apir_buffer_context_t * buffer_context, + const ggml_tensor * tensor, + void * data, + size_t offset, + size_t size); +bool apir_buffer_cpy_tensor(struct virtgpu * gpu, + apir_buffer_context_t * buffer_context, + const ggml_tensor * src, + const ggml_tensor * dst); +void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value); +void apir_buffer_free_buffer(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); /* backend */ -ggml_status apir_backend_graph_compute(struct virtgpu *gpu, ggml_cgraph *cgraph); +ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgraph); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp index c921fc1813a..22d9b668002 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp @@ -1,105 +1,97 @@ -#include +#include "virtgpu-shm.h" #include "virtgpu.h" -#include "virtgpu-shm.h" -static uint32_t -virtgpu_ioctl_resource_create_blob(struct virtgpu *gpu, - uint32_t blob_mem, - uint32_t blob_flags, - size_t blob_size, - uint64_t blob_id, - uint32_t *res_id) -{ +#include + +static uint32_t virtgpu_ioctl_resource_create_blob(struct virtgpu * gpu, + uint32_t blob_mem, + uint32_t blob_flags, + size_t blob_size, + uint64_t blob_id, + uint32_t * res_id) { #ifdef SIMULATE_BO_SIZE_FIX - blob_size = align64(blob_size, 4096); + blob_size = align64(blob_size, 4096); #endif - struct drm_virtgpu_resource_create_blob args = { - .blob_mem = blob_mem, - .blob_flags = blob_flags, - .bo_handle = 0, - .res_handle = 0, - .size = blob_size, - .pad = 0, - .cmd_size = 0, - .cmd = 0, - .blob_id = blob_id, - }; - - if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args)) - return 0; - - *res_id = args.res_handle; - return args.bo_handle; + struct drm_virtgpu_resource_create_blob args = { + .blob_mem = blob_mem, + .blob_flags = blob_flags, + .bo_handle = 0, + .res_handle = 0, + .size = blob_size, + .pad = 0, + .cmd_size = 0, + .cmd = 0, + .blob_id = blob_id, + }; + + if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB, &args)) { + return 0; + } + + *res_id = args.res_handle; + return args.bo_handle; } -static void -virtgpu_ioctl_gem_close(struct virtgpu *gpu, uint32_t gem_handle) -{ - struct drm_gem_close args = { - .handle = gem_handle, - .pad = 0, - }; +static void virtgpu_ioctl_gem_close(struct virtgpu * gpu, uint32_t gem_handle) { + struct drm_gem_close args = { + .handle = gem_handle, + .pad = 0, + }; - const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args); - assert(!ret); + const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_GEM_CLOSE, &args); + assert(!ret); #ifdef NDEBUG - UNUSED(ret); + UNUSED(ret); #endif } -static void * -virtgpu_ioctl_map(struct virtgpu *gpu, uint32_t gem_handle, size_t size) -{ - struct drm_virtgpu_map args = { - .offset = 0, - .handle = gem_handle, - .pad = 0, - }; +static void * virtgpu_ioctl_map(struct virtgpu * gpu, uint32_t gem_handle, size_t size) { + struct drm_virtgpu_map args = { + .offset = 0, + .handle = gem_handle, + .pad = 0, + }; - if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args)) - return NULL; + if (virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_MAP, &args)) { + return NULL; + } - void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd, - args.offset); - if (ptr == MAP_FAILED) - return NULL; + void * ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, gpu->fd, args.offset); + if (ptr == MAP_FAILED) { + return NULL; + } - return ptr; + return ptr; } -void -virtgpu_shmem_destroy(struct virtgpu *gpu, - struct virtgpu_shmem *shmem) -{ - munmap(shmem->mmap_ptr, shmem->mmap_size); - virtgpu_ioctl_gem_close(gpu, shmem->gem_handle); +void virtgpu_shmem_destroy(struct virtgpu * gpu, struct virtgpu_shmem * shmem) { + munmap(shmem->mmap_ptr, shmem->mmap_size); + virtgpu_ioctl_gem_close(gpu, shmem->gem_handle); } -int -virtgpu_shmem_create(struct virtgpu *gpu, size_t size, struct virtgpu_shmem *shmem) -{ - size = align64(size, 16384); +int virtgpu_shmem_create(struct virtgpu * gpu, size_t size, struct virtgpu_shmem * shmem) { + size = align64(size, 16384); - uint32_t res_id; - uint32_t gem_handle = virtgpu_ioctl_resource_create_blob( - gpu, VIRTGPU_BLOB_MEM_HOST3D, VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0, - &res_id); + uint32_t res_id; + uint32_t gem_handle = virtgpu_ioctl_resource_create_blob(gpu, VIRTGPU_BLOB_MEM_HOST3D, + VIRTGPU_BLOB_FLAG_USE_MAPPABLE, size, 0, &res_id); - if (!gem_handle) - return 1; + if (!gem_handle) { + return 1; + } - void *ptr = virtgpu_ioctl_map(gpu, gem_handle, size); - if (!ptr) { - virtgpu_ioctl_gem_close(gpu, gem_handle); - return 1; - } + void * ptr = virtgpu_ioctl_map(gpu, gem_handle, size); + if (!ptr) { + virtgpu_ioctl_gem_close(gpu, gem_handle); + return 1; + } - shmem->res_id = res_id; - shmem->mmap_size = size; - shmem->mmap_ptr = ptr; - shmem->gem_handle = gem_handle; + shmem->res_id = res_id; + shmem->mmap_size = size; + shmem->mmap_ptr = ptr; + shmem->gem_handle = gem_handle; - return 0; + return 0; } diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.h b/ggml/src/ggml-remotingfrontend/virtgpu-shm.h index bcd361217ad..bc890c6717c 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-shm.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-shm.h @@ -1,22 +1,23 @@ #pragma once -#include -#include -#include -#include +#include "virtgpu-utils.h" + #include -#include "virtgpu-utils.h" +#include +#include +#include +#include struct virtgpu; struct virtgpu_shmem { uint32_t res_id; - size_t mmap_size; - void *mmap_ptr; + size_t mmap_size; + void * mmap_ptr; uint32_t gem_handle; }; -int virtgpu_shmem_create(struct virtgpu *gpu, size_t size, struct virtgpu_shmem *shmem); -void virtgpu_shmem_destroy(struct virtgpu *gpu, struct virtgpu_shmem *shmem); +int virtgpu_shmem_create(struct virtgpu * gpu, size_t size, struct virtgpu_shmem * shmem); +void virtgpu_shmem_destroy(struct virtgpu * gpu, struct virtgpu_shmem * shmem); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp index 100f495add1..80046fe2688 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp @@ -1,186 +1,179 @@ #include "virtgpu-utils.h" + #include -#include #include +#include + #define NODE_ALLOC_ALIGN 64 -#define NODE_PTR_MASK (~((uintptr_t)NODE_ALLOC_ALIGN - 1)) -#define NODE_LEVEL_MASK ((uintptr_t)NODE_ALLOC_ALIGN - 1) -#define NULL_NODE 0 +#define NODE_PTR_MASK (~((uintptr_t) NODE_ALLOC_ALIGN - 1)) +#define NODE_LEVEL_MASK ((uintptr_t) NODE_ALLOC_ALIGN - 1) +#define NULL_NODE 0 #define os_malloc_aligned(_size, _align) _aligned_malloc(_size, _align) -#define os_free_aligned(_ptr) free(_ptr) -#define p_atomic_cmpxchg(v, old, _new) \ - __sync_val_compare_and_swap((v), (old), (_new)) +#define os_free_aligned(_ptr) free(_ptr) +#define p_atomic_cmpxchg(v, old, _new) __sync_val_compare_and_swap((v), (old), (_new)) -static inline uint64_t -util_logbase2_64(uint64_t n) -{ +static inline uint64_t util_logbase2_64(uint64_t n) { #if defined(HAVE___BUILTIN_CLZLL) - return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1)); + return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1)); #else - uint64_t pos = 0ull; - if (n >= 1ull<<32) { n >>= 32; pos += 32; } - if (n >= 1ull<<16) { n >>= 16; pos += 16; } - if (n >= 1ull<< 8) { n >>= 8; pos += 8; } - if (n >= 1ull<< 4) { n >>= 4; pos += 4; } - if (n >= 1ull<< 2) { n >>= 2; pos += 2; } - if (n >= 1ull<< 1) { pos += 1; } - return pos; + uint64_t pos = 0ull; + if (n >= 1ull << 32) { + n >>= 32; + pos += 32; + } + if (n >= 1ull << 16) { + n >>= 16; + pos += 16; + } + if (n >= 1ull << 8) { + n >>= 8; + pos += 8; + } + if (n >= 1ull << 4) { + n >>= 4; + pos += 4; + } + if (n >= 1ull << 2) { + n >>= 2; + pos += 2; + } + if (n >= 1ull << 1) { + pos += 1; + } + return pos; #endif } -void -util_sparse_array_init(struct util_sparse_array *arr, - size_t elem_size, size_t node_size) -{ - memset(arr, 0, sizeof(*arr)); - arr->elem_size = elem_size; - arr->node_size_log2 = util_logbase2_64(node_size); - assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2)); +void util_sparse_array_init(struct util_sparse_array * arr, size_t elem_size, size_t node_size) { + memset(arr, 0, sizeof(*arr)); + arr->elem_size = elem_size; + arr->node_size_log2 = util_logbase2_64(node_size); + assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2)); } -static inline void * -os_malloc_aligned(size_t size, size_t alignment) -{ - void *ptr; - alignment = (alignment + sizeof(void*) - 1) & ~(sizeof(void*) - 1); - if(posix_memalign(&ptr, alignment, size) != 0) - return NULL; - return ptr; +static inline void * os_malloc_aligned(size_t size, size_t alignment) { + void * ptr; + alignment = (alignment + sizeof(void *) - 1) & ~(sizeof(void *) - 1); + if (posix_memalign(&ptr, alignment, size) != 0) { + return NULL; + } + return ptr; } -static inline void * -_util_sparse_array_node_data(uintptr_t handle) -{ - return (void *)(handle & NODE_PTR_MASK); +static inline void * _util_sparse_array_node_data(uintptr_t handle) { + return (void *) (handle & NODE_PTR_MASK); } -static inline unsigned -_util_sparse_array_node_level(uintptr_t handle) -{ - return handle & NODE_LEVEL_MASK; +static inline unsigned _util_sparse_array_node_level(uintptr_t handle) { + return handle & NODE_LEVEL_MASK; } -static inline void -_util_sparse_array_node_finish(struct util_sparse_array *arr, - uintptr_t node) -{ - if (_util_sparse_array_node_level(node) > 0) { - uintptr_t *children = (uintptr_t *) _util_sparse_array_node_data(node); - size_t node_size = 1ull << arr->node_size_log2; - for (size_t i = 0; i < node_size; i++) { - if (children[i]) - _util_sparse_array_node_finish(arr, children[i]); - } - } - - os_free_aligned(_util_sparse_array_node_data(node)); +static inline void _util_sparse_array_node_finish(struct util_sparse_array * arr, uintptr_t node) { + if (_util_sparse_array_node_level(node) > 0) { + uintptr_t * children = (uintptr_t *) _util_sparse_array_node_data(node); + size_t node_size = 1ull << arr->node_size_log2; + for (size_t i = 0; i < node_size; i++) { + if (children[i]) { + _util_sparse_array_node_finish(arr, children[i]); + } + } + } + + os_free_aligned(_util_sparse_array_node_data(node)); } -static inline uintptr_t -_util_sparse_array_node(void *data, unsigned level) -{ - assert(data != NULL); - assert(((uintptr_t)data & NODE_LEVEL_MASK) == 0); - assert((level & NODE_PTR_MASK) == 0); - return (uintptr_t)data | level; +static inline uintptr_t _util_sparse_array_node(void * data, unsigned level) { + assert(data != NULL); + assert(((uintptr_t) data & NODE_LEVEL_MASK) == 0); + assert((level & NODE_PTR_MASK) == 0); + return (uintptr_t) data | level; } -inline uintptr_t -_util_sparse_array_node_alloc(struct util_sparse_array *arr, - unsigned level) -{ - size_t size; - if (level == 0) { - size = arr->elem_size << arr->node_size_log2; - } else { - size = sizeof(uintptr_t) << arr->node_size_log2; - } - - void *data = os_malloc_aligned(size, NODE_ALLOC_ALIGN); - memset(data, 0, size); - - return _util_sparse_array_node(data, level); +inline uintptr_t _util_sparse_array_node_alloc(struct util_sparse_array * arr, unsigned level) { + size_t size; + if (level == 0) { + size = arr->elem_size << arr->node_size_log2; + } else { + size = sizeof(uintptr_t) << arr->node_size_log2; + } + + void * data = os_malloc_aligned(size, NODE_ALLOC_ALIGN); + memset(data, 0, size); + + return _util_sparse_array_node(data, level); } -static inline uintptr_t -_util_sparse_array_set_or_free_node(uintptr_t *node_ptr, - uintptr_t cmp_node, - uintptr_t node) -{ - uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node); +static inline uintptr_t _util_sparse_array_set_or_free_node(uintptr_t * node_ptr, uintptr_t cmp_node, uintptr_t node) { + uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node); - if (prev_node != cmp_node) { - /* We lost the race. Free this one and return the one that was already + if (prev_node != cmp_node) { + /* We lost the race. Free this one and return the one that was already * allocated. */ - os_free_aligned(_util_sparse_array_node_data(node)); - return prev_node; - } else { - return node; - } + os_free_aligned(_util_sparse_array_node_data(node)); + return prev_node; + } else { + return node; + } } -void * -util_sparse_array_get(struct util_sparse_array *arr, uint64_t idx) -{ - const unsigned node_size_log2 = arr->node_size_log2; - uintptr_t root = p_atomic_read(&arr->root); - if (unlikely(!root)) { - unsigned root_level = 0; - uint64_t idx_iter = idx >> node_size_log2; - while (idx_iter) { - idx_iter >>= node_size_log2; - root_level++; - } - uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level); - root = _util_sparse_array_set_or_free_node(&arr->root, - NULL_NODE, new_root); - } - - while (1) { - unsigned root_level = _util_sparse_array_node_level(root); - uint64_t root_idx = idx >> (root_level * node_size_log2); - if (likely(root_idx < (1ull << node_size_log2))) - break; - - /* In this case, we have a root but its level is low enough that the +void * util_sparse_array_get(struct util_sparse_array * arr, uint64_t idx) { + const unsigned node_size_log2 = arr->node_size_log2; + uintptr_t root = p_atomic_read(&arr->root); + if (unlikely(!root)) { + unsigned root_level = 0; + uint64_t idx_iter = idx >> node_size_log2; + while (idx_iter) { + idx_iter >>= node_size_log2; + root_level++; + } + uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level); + root = _util_sparse_array_set_or_free_node(&arr->root, NULL_NODE, new_root); + } + + while (1) { + unsigned root_level = _util_sparse_array_node_level(root); + uint64_t root_idx = idx >> (root_level * node_size_log2); + if (likely(root_idx < (1ull << node_size_log2))) { + break; + } + + /* In this case, we have a root but its level is low enough that the * requested index is out-of-bounds. */ - uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1); + uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1); - uintptr_t *new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root); - new_root_children[0] = root; + uintptr_t * new_root_children = (uintptr_t *) _util_sparse_array_node_data(new_root); + new_root_children[0] = root; - /* We only add one at a time instead of the whole tree because it's + /* We only add one at a time instead of the whole tree because it's * easier to ensure correctness of both the tree building and the * clean-up path. Because we're only adding one node we never have to * worry about trying to free multiple things without freeing the old * things. */ - root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root); - } - - void *node_data = _util_sparse_array_node_data(root); - unsigned node_level = _util_sparse_array_node_level(root); - while (node_level > 0) { - uint64_t child_idx = (idx >> (node_level * node_size_log2)) & - ((1ull << node_size_log2) - 1); - - uintptr_t *children = (uintptr_t *) node_data; - uintptr_t child = p_atomic_read(&children[child_idx]); - - if (unlikely(!child)) { - child = _util_sparse_array_node_alloc(arr, node_level - 1); - child = _util_sparse_array_set_or_free_node(&children[child_idx], - NULL_NODE, child); - } - - node_data = _util_sparse_array_node_data(child); - node_level = _util_sparse_array_node_level(child); - } - - uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1); - return (void *)((char *)node_data + (elem_idx * arr->elem_size)); + root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root); + } + + void * node_data = _util_sparse_array_node_data(root); + unsigned node_level = _util_sparse_array_node_level(root); + while (node_level > 0) { + uint64_t child_idx = (idx >> (node_level * node_size_log2)) & ((1ull << node_size_log2) - 1); + + uintptr_t * children = (uintptr_t *) node_data; + uintptr_t child = p_atomic_read(&children[child_idx]); + + if (unlikely(!child)) { + child = _util_sparse_array_node_alloc(arr, node_level - 1); + child = _util_sparse_array_set_or_free_node(&children[child_idx], NULL_NODE, child); + } + + node_data = _util_sparse_array_node_data(child); + node_level = _util_sparse_array_node_level(child); + } + + uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1); + return (void *) ((char *) node_data + (elem_idx * arr->elem_size)); } diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.h b/ggml/src/ggml-remotingfrontend/virtgpu-utils.h index dd911a63b59..c2bcd9589f6 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-utils.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-utils.h @@ -1,20 +1,20 @@ #pragma once -#include +#include #include -#include -#include #include -#include -#include #include +#include +#include #include +#include +#include #define unlikely(x) __builtin_expect(!!(x), 0) -#define likely(x) __builtin_expect(!!(x), 1) +#define likely(x) __builtin_expect(!!(x), 1) #ifndef UNUSED -#define UNUSED(x) (void)(x) +# define UNUSED(x) (void) (x) #endif /** Checks is a value is a power of two. Does not handle zero. */ @@ -32,102 +32,89 @@ void thks_bye(); void breakpoint(); #ifndef NDEBUG -inline void -INFO(const char *format, ...) { - fprintf(stderr, "INFO: "); - - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); +inline void INFO(const char * format, ...) { + fprintf(stderr, "INFO: "); + + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); } #else -inline void -INFO(...) {} +inline void INFO(...) {} #endif -inline void -MESSAGE(const char *format, ...) { - fprintf(stderr, "APIR: "); +inline void MESSAGE(const char * format, ...) { + fprintf(stderr, "APIR: "); - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); } -inline void -WARNING(const char *format, ...) { - fprintf(stderr, "WARNING: "); +inline void WARNING(const char * format, ...) { + fprintf(stderr, "WARNING: "); - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); } -inline void -ERROR(const char *format, ...) { - fprintf(stderr, "ERROR: "); +inline void ERROR(const char * format, ...) { + fprintf(stderr, "ERROR: "); - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); } -inline void -FATAL(const char *format, ...) { - fprintf(stderr, "FATAL: "); +inline void FATAL(const char * format, ...) { + fprintf(stderr, "FATAL: "); - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); + va_list argptr; + va_start(argptr, format); + vfprintf(stderr, format, argptr); + fprintf(stderr, "\n"); + va_end(argptr); - abort(); + abort(); } -static inline bool -util_is_power_of_two_nonzero64(uint64_t v) -{ - return IS_POT_NONZERO(v); +static inline bool util_is_power_of_two_nonzero64(uint64_t v) { + return IS_POT_NONZERO(v); } -static inline uint64_t -align64(uint64_t value, uint64_t alignment) -{ - assert(util_is_power_of_two_nonzero64(alignment)); - return ALIGN_POT(value, alignment); +static inline uint64_t align64(uint64_t value, uint64_t alignment) { + assert(util_is_power_of_two_nonzero64(alignment)); + return ALIGN_POT(value, alignment); } -struct list_head -{ - struct list_head *prev; - struct list_head *next; +struct list_head { + struct list_head * prev; + struct list_head * next; }; struct util_sparse_array { - size_t elem_size; - unsigned node_size_log2; + size_t elem_size; + unsigned node_size_log2; - uintptr_t root; + uintptr_t root; }; -void *util_sparse_array_get(struct util_sparse_array *arr, uint64_t idx); -void util_sparse_array_init(struct util_sparse_array *arr, - size_t elem_size, size_t node_size); - -inline void -os_time_sleep(int64_t usecs) -{ - struct timespec time; - time.tv_sec = usecs / 1000000; - time.tv_nsec = (usecs % 1000000) * 1000; - while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR); +void * util_sparse_array_get(struct util_sparse_array * arr, uint64_t idx); +void util_sparse_array_init(struct util_sparse_array * arr, size_t elem_size, size_t node_size); + +inline void os_time_sleep(int64_t usecs) { + struct timespec time; + time.tv_sec = usecs / 1000000; + time.tv_nsec = (usecs % 1000000) * 1000; + while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR) + ; } diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-remotingfrontend/virtgpu.cpp index cc418c89306..f60b00a582d 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu.cpp @@ -1,43 +1,39 @@ +#include "virtgpu.h" + #include -#include -#include #include +#include +#include #include -#include "virtgpu.h" - -static virt_gpu_result_t virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev); -static virt_gpu_result_t virtgpu_open(struct virtgpu *gpu); - +static virt_gpu_result_t virtgpu_open_device(struct virtgpu * gpu, const drmDevicePtr dev); +static virt_gpu_result_t virtgpu_open(struct virtgpu * gpu); -static virt_gpu_result_t virtgpu_init_capset(struct virtgpu *gpu); -static virt_gpu_result_t virtgpu_init_context(struct virtgpu *gpu); +static virt_gpu_result_t virtgpu_init_capset(struct virtgpu * gpu); +static virt_gpu_result_t virtgpu_init_context(struct virtgpu * gpu); -static int virtgpu_ioctl_context_init(struct virtgpu *gpu, - enum virgl_renderer_capset capset_id); -static int -virtgpu_ioctl_get_caps(struct virtgpu *gpu, - enum virgl_renderer_capset id, - uint32_t version, - void *capset, - size_t capset_size); -static uint64_t virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param); -static void virtgpu_init_renderer_info(struct virtgpu *gpu); +static int virtgpu_ioctl_context_init(struct virtgpu * gpu, enum virgl_renderer_capset capset_id); +static int virtgpu_ioctl_get_caps(struct virtgpu * gpu, + enum virgl_renderer_capset id, + uint32_t version, + void * capset, + size_t capset_size); +static uint64_t virtgpu_ioctl_getparam(struct virtgpu * gpu, uint64_t param); +static void virtgpu_init_renderer_info(struct virtgpu * gpu); -struct timer_data wait_host_reply_timer = {0, 0, 0, "wait_host_reply"}; +struct timer_data wait_host_reply_timer = { 0, 0, 0, "wait_host_reply" }; -static void log_call_duration(long long call_duration_ns, const char *name); +static void log_call_duration(long long call_duration_ns, const char * name); -const uint64_t APIR_HANDSHAKE_MAX_WAIT_MS = 2*1000; // 2s -const uint64_t APIR_LOADLIBRARY_MAX_WAIT_MS = 60*1000; // 60s +const uint64_t APIR_HANDSHAKE_MAX_WAIT_MS = 2 * 1000; // 2s +const uint64_t APIR_LOADLIBRARY_MAX_WAIT_MS = 60 * 1000; // 60s -static int -virtgpu_handshake(struct virtgpu *gpu) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +static int virtgpu_handshake(struct virtgpu * gpu) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; - encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HandShake, 0); + encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HandShake, 0); if (!encoder) { FATAL("%s: failed to prepare the remote call encoder :/", __func__); return 1; @@ -52,14 +48,16 @@ virtgpu_handshake(struct virtgpu *gpu) { /* *** */ - uint32_t ret_magic; + uint32_t ret_magic; long long call_duration_ns; ret_magic = remote_call(gpu, encoder, &decoder, APIR_HANDSHAKE_MAX_WAIT_MS, &call_duration_ns); log_call_duration(call_duration_ns, "API Remoting handshake"); if (!decoder) { - FATAL("%s: failed to initiate the communication with the virglrenderer library. " - "Most likely, the wrong virglrenderer library was loaded in the hypervisor.", __func__); + FATAL( + "%s: failed to initiate the communication with the virglrenderer library. " + "Most likely, the wrong virglrenderer library was loaded in the hypervisor.", + __func__); return 1; } @@ -69,8 +67,8 @@ virtgpu_handshake(struct virtgpu *gpu) { uint32_t host_minor; if (ret_magic != APIR_HANDSHAKE_MAGIC) { - FATAL("%s: handshake with the virglrenderer failed (code=%d | %s):/", - __func__, ret_magic, apir_backend_initialize_error(ret_magic)); + FATAL("%s: handshake with the virglrenderer failed (code=%d | %s):/", __func__, ret_magic, + apir_backend_initialize_error(ret_magic)); } else { apir_decode_uint32_t(decoder, &host_major); apir_decode_uint32_t(decoder, &host_minor); @@ -94,13 +92,12 @@ virtgpu_handshake(struct virtgpu *gpu) { return 0; } -static ApirLoadLibraryReturnCode -virtgpu_load_library(struct virtgpu *gpu) { - struct apir_encoder *encoder; - struct apir_decoder *decoder; +static ApirLoadLibraryReturnCode virtgpu_load_library(struct virtgpu * gpu) { + struct apir_encoder * encoder; + struct apir_decoder * decoder; ApirLoadLibraryReturnCode ret; - encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LoadLibrary, 0); + encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LoadLibrary, 0); if (!encoder) { FATAL("%s: hypercall error: failed to prepare the remote call encoder :/", __func__); return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; @@ -108,8 +105,8 @@ virtgpu_load_library(struct virtgpu *gpu) { long long call_duration_ns; - ret = (ApirLoadLibraryReturnCode) remote_call(gpu, encoder, &decoder, - APIR_LOADLIBRARY_MAX_WAIT_MS, &call_duration_ns); + ret = (ApirLoadLibraryReturnCode) remote_call(gpu, encoder, &decoder, APIR_LOADLIBRARY_MAX_WAIT_MS, + &call_duration_ns); log_call_duration(call_duration_ns, "API Remoting LoadLibrary"); if (!decoder) { @@ -128,8 +125,8 @@ virtgpu_load_library(struct virtgpu *gpu) { // something wrong happened, find out what. if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { - FATAL("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", - __func__, apir_load_library_error(ret), ret); + FATAL("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__, + apir_load_library_error(ret), ret); return ret; } @@ -138,19 +135,18 @@ virtgpu_load_library(struct virtgpu *gpu) { ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX); if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { - FATAL("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s):/", - __func__, apir_ret, apir_load_library_error(apir_ret)); + FATAL("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s):/", __func__, + apir_ret, apir_load_library_error(apir_ret)); } else { uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX; - FATAL("%s: the API Remoting backend library initialize its backend library: apir code=%d):/", - __func__, lib_ret); + FATAL("%s: the API Remoting backend library initialize its backend library: apir code=%d):/", __func__, + lib_ret); } return ret; } -struct virtgpu * -create_virtgpu() { - struct virtgpu *gpu = new struct virtgpu(); +struct virtgpu * create_virtgpu() { + struct virtgpu * gpu = new struct virtgpu(); gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr; @@ -162,13 +158,13 @@ create_virtgpu() { } if (virtgpu_init_capset(gpu) != APIR_SUCCESS) { - FATAL("%s: failed to initialize the GPU capset :/", __func__); - return NULL; + FATAL("%s: failed to initialize the GPU capset :/", __func__); + return NULL; } if (virtgpu_init_context(gpu) != APIR_SUCCESS) { - FATAL("%s: failed to initialize the GPU context :/", __func__); - return NULL; + FATAL("%s: failed to initialize the GPU context :/", __func__); + return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) { @@ -194,11 +190,9 @@ create_virtgpu() { return gpu; } -static virt_gpu_result_t -virtgpu_open(struct virtgpu *gpu) -{ +static virt_gpu_result_t virtgpu_open(struct virtgpu * gpu) { drmDevicePtr devs[8]; - int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs)); + int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs)); if (count < 0) { ERROR("%s: failed to enumerate DRM devices", __func__); return APIR_ERROR_INITIALIZATION_FAILED; @@ -207,8 +201,9 @@ virtgpu_open(struct virtgpu *gpu) virt_gpu_result_t result = APIR_ERROR_INITIALIZATION_FAILED; for (int i = 0; i < count; i++) { result = virtgpu_open_device(gpu, devs[i]); - if (result == APIR_SUCCESS) + if (result == APIR_SUCCESS) { break; + } } drmFreeDevices(devs, count); @@ -216,10 +211,8 @@ virtgpu_open(struct virtgpu *gpu) return result; } -static virt_gpu_result_t -virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev) -{ - const char *node_path = dev->nodes[DRM_NODE_RENDER]; +static virt_gpu_result_t virtgpu_open_device(struct virtgpu * gpu, const drmDevicePtr dev) { + const char * node_path = dev->nodes[DRM_NODE_RENDER]; int fd = open(node_path, O_RDWR | O_CLOEXEC); if (fd < 0) { @@ -228,17 +221,16 @@ virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev) } drmVersionPtr version = drmGetVersion(fd); - if (!version || strcmp(version->name, "virtio_gpu") || - version->version_major != 0) { + if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) { if (version) { - MESSAGE("unknown DRM driver %s version %d", - version->name, version->version_major); + MESSAGE("unknown DRM driver %s version %d", version->name, version->version_major); } else { MESSAGE("failed to get DRM driver version"); } - if (version) + if (version) { drmFreeVersion(version); + } close(fd); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -252,9 +244,7 @@ virtgpu_open_device(struct virtgpu *gpu, const drmDevicePtr dev) return APIR_SUCCESS; } -static virt_gpu_result_t -virtgpu_init_context(struct virtgpu *gpu) -{ +static virt_gpu_result_t virtgpu_init_context(struct virtgpu * gpu) { assert(!gpu->capset.version); const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id); if (ret) { @@ -265,21 +255,18 @@ virtgpu_init_context(struct virtgpu *gpu) return APIR_SUCCESS; } -static virt_gpu_result_t -virtgpu_init_capset(struct virtgpu *gpu) -{ +static virt_gpu_result_t virtgpu_init_capset(struct virtgpu * gpu) { if (gpu->use_apir_capset) { - MESSAGE("Using the APIR capset"); - gpu->capset.id = VIRGL_RENDERER_CAPSET_APIR; + MESSAGE("Using the APIR capset"); + gpu->capset.id = VIRGL_RENDERER_CAPSET_APIR; } else { - MESSAGE("Using the Venus capset"); - gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS; + MESSAGE("Using the Venus capset"); + gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS; } gpu->capset.version = 0; - int ret = \ - virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, - &gpu->capset.data, sizeof(gpu->capset.data)); + int ret = + virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data)); if (ret) { MESSAGE("failed to get APIR v%d capset: %s", gpu->capset.version, strerror(errno)); @@ -291,73 +278,60 @@ virtgpu_init_capset(struct virtgpu *gpu) return APIR_SUCCESS; } -static int -virtgpu_ioctl_context_init(struct virtgpu *gpu, - enum virgl_renderer_capset capset_id) -{ +static int virtgpu_ioctl_context_init(struct virtgpu * gpu, enum virgl_renderer_capset capset_id) { struct drm_virtgpu_context_set_param ctx_set_params[3] = { { - .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID, - .value = capset_id, - }, + .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID, + .value = capset_id, + }, { - .param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS, - .value = 1, - }, + .param = VIRTGPU_CONTEXT_PARAM_NUM_RINGS, + .value = 1, + }, { - .param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK, - .value = 0, /* don't generate drm_events on fence signaling */ + .param = VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK, + .value = 0, /* don't generate drm_events on fence signaling */ }, }; struct drm_virtgpu_context_init args = { - .num_params = ARRAY_SIZE(ctx_set_params), - .pad = 0, - .ctx_set_params = (uintptr_t)&ctx_set_params, + .num_params = ARRAY_SIZE(ctx_set_params), + .pad = 0, + .ctx_set_params = (uintptr_t) &ctx_set_params, }; return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args); } -static int -virtgpu_ioctl_get_caps(struct virtgpu *gpu, - enum virgl_renderer_capset id, - uint32_t version, - void *capset, - size_t capset_size) -{ +static int virtgpu_ioctl_get_caps(struct virtgpu * gpu, + enum virgl_renderer_capset id, + uint32_t version, + void * capset, + size_t capset_size) { struct drm_virtgpu_get_caps args = { - .cap_set_id = id, + .cap_set_id = id, .cap_set_ver = version, - .addr = (uintptr_t)capset, - .size = (__u32) capset_size, - .pad = 0, + .addr = (uintptr_t) capset, + .size = (__u32) capset_size, + .pad = 0, }; return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args); } -static uint64_t -virtgpu_ioctl_getparam(struct virtgpu *gpu, uint64_t param) -{ +static uint64_t virtgpu_ioctl_getparam(struct virtgpu * gpu, uint64_t param) { /* val must be zeroed because kernel only writes the lower 32 bits */ - uint64_t val = 0; + uint64_t val = 0; struct drm_virtgpu_getparam args = { .param = param, - .value = (uintptr_t)&val, + .value = (uintptr_t) &val, }; const int ret = virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GETPARAM, &args); return ret ? 0 : val; } - -struct apir_encoder * -remote_call_prepare( - struct virtgpu *gpu, - ApirCommandType apir_cmd_type, - int32_t cmd_flags) -{ +struct apir_encoder * remote_call_prepare(struct virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags) { /* * Prepare the command encoder and its buffer */ @@ -382,7 +356,7 @@ remote_call_prepare( // for testing during the hypervisor transition if (!gpu->use_apir_capset) { - cmd_type += VENUS_COMMAND_TYPE_LENGTH; + cmd_type += VENUS_COMMAND_TYPE_LENGTH; } apir_encode_int32_t(&enc, &cmd_type); apir_encode_int32_t(&enc, &cmd_flags); @@ -393,11 +367,7 @@ remote_call_prepare( return &enc; } -void -remote_call_finish( - struct virtgpu *gpu, - struct apir_encoder *enc, - struct apir_decoder *dec) { +void remote_call_finish(struct virtgpu * gpu, struct apir_encoder * enc, struct apir_decoder * dec) { UNUSED(gpu); if (!enc) { @@ -411,40 +381,37 @@ remote_call_finish( // encoder and decoder are statically allocated, nothing to do to release them } -uint32_t -remote_call( - struct virtgpu *gpu, - struct apir_encoder *encoder, - struct apir_decoder **decoder, - float max_wait_ms, - long long *call_duration_ns) -{ +uint32_t remote_call(struct virtgpu * gpu, + struct apir_encoder * encoder, + struct apir_decoder ** decoder, + float max_wait_ms, + long long * call_duration_ns) { /* * Prepare the reply notification pointer */ - volatile std::atomic_uint *atomic_reply_notif = (volatile std::atomic_uint *) gpu->reply_shmem.mmap_ptr; - *atomic_reply_notif = 0; + volatile std::atomic_uint * atomic_reply_notif = (volatile std::atomic_uint *) gpu->reply_shmem.mmap_ptr; + *atomic_reply_notif = 0; /* * Trigger the execbuf ioctl */ struct drm_virtgpu_execbuffer args = { - .flags = VIRTGPU_EXECBUF_RING_IDX, - .size = (uint32_t) (encoder->cur - encoder->start), + .flags = VIRTGPU_EXECBUF_RING_IDX, + .size = (uint32_t) (encoder->cur - encoder->start), .command = (uintptr_t) encoder->start, - .bo_handles = 0, + .bo_handles = 0, .num_bo_handles = 0, - .fence_fd = 0, - .ring_idx = 0, - .syncobj_stride = 0, - .num_in_syncobjs = 0, + .fence_fd = 0, + .ring_idx = 0, + .syncobj_stride = 0, + .num_in_syncobjs = 0, .num_out_syncobjs = 0, - .in_syncobjs = 0, - .out_syncobjs = 0, + .in_syncobjs = 0, + .out_syncobjs = 0, }; *decoder = NULL; @@ -463,9 +430,9 @@ remote_call( struct timespec ts_start, ts_end; clock_gettime(CLOCK_MONOTONIC, &ts_start); - long long start_time = (long long)ts_start.tv_sec * 1000000000LL + ts_start.tv_nsec; + long long start_time = (long long) ts_start.tv_sec * 1000000000LL + ts_start.tv_nsec; - bool timedout = false; + bool timedout = false; uint32_t notif_value = 0; while (true) { notif_value = std::atomic_load_explicit(atomic_reply_notif, std::memory_order_acquire); @@ -480,8 +447,8 @@ remote_call( if (max_wait_ms) { clock_gettime(CLOCK_MONOTONIC, &ts_end); - long long end_time = (long long)ts_end.tv_sec * 1000000000LL + ts_end.tv_nsec; - float duration_ms = (end_time - start_time) / 1000000; + long long end_time = (long long) ts_end.tv_sec * 1000000000LL + ts_end.tv_nsec; + float duration_ms = (end_time - start_time) / 1000000; if (duration_ms > max_wait_ms) { timedout = true; @@ -505,14 +472,14 @@ remote_call( static struct apir_decoder response_dec; response_dec.cur = (char *) gpu->reply_shmem.mmap_ptr + sizeof(*atomic_reply_notif); response_dec.end = (char *) gpu->reply_shmem.mmap_ptr + gpu->reply_shmem.mmap_size; - *decoder = &response_dec; + *decoder = &response_dec; // extract the actual return value from the notif flag uint32_t returned_value = notif_value - 1; return returned_value; } -static void log_call_duration(long long call_duration_ns, const char *name) { +static void log_call_duration(long long call_duration_ns, const char * name) { double call_duration_ms = (double) call_duration_ns / 1e6; // 1 millisecond = 1e6 nanoseconds double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.h b/ggml/src/ggml-remotingfrontend/virtgpu.h index 1f445852cd0..c8f432eaefe 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu.h @@ -1,69 +1,70 @@ #pragma once -#include +#include "virtgpu-utils.h" +#include "virtgpu-shm.h" +#include "virtgpu-apir.h" + +#include "../ggml-remotingbackend/shared/api_remoting.h" +#include "../ggml-remotingbackend/shared/apir_cs.h" + #include -#include #include -#include -#include +#include #include #include +#include +#include -#include "virtgpu-apir.h" -#include "virtgpu-utils.h" -#include "../ggml-remotingbackend/shared/api_remoting.h" -#include "../ggml-remotingbackend/shared/apir_cs.h" - -#include "virtgpu-shm.h" +#include #define VIRGL_RENDERER_UNSTABLE_APIS 1 -#include "drm-uapi/virtgpu_drm.h" #include "apir_hw.h" +#include "drm-uapi/virtgpu_drm.h" #include "venus_hw.h" // must match https://gitlab.freedesktop.org/kpouget/virglrenderer/-/blob/main/src/virglrenderer_hw.h?ref_type=heads enum virgl_renderer_capset { - VIRGL_RENDERER_CAPSET_VIRGL = 1, - VIRGL_RENDERER_CAPSET_VIRGL2 = 2, + VIRGL_RENDERER_CAPSET_VIRGL = 1, + VIRGL_RENDERER_CAPSET_VIRGL2 = 2, /* 3 is reserved for gfxstream */ - VIRGL_RENDERER_CAPSET_VENUS = 4, + VIRGL_RENDERER_CAPSET_VENUS = 4, /* 5 is reserved for cross-domain */ - VIRGL_RENDERER_CAPSET_DRM = 6, + VIRGL_RENDERER_CAPSET_DRM = 6, - VIRGL_RENDERER_CAPSET_APIR = 10, + VIRGL_RENDERER_CAPSET_APIR = 10, }; #define VENUS_COMMAND_TYPE_LENGTH 331 /* from src/virtio/vulkan/vn_renderer_virtgpu.c */ -#define VIRTGPU_PCI_VENDOR_ID 0x1af4 -#define VIRTGPU_PCI_DEVICE_ID 0x1050 +#define VIRTGPU_PCI_VENDOR_ID 0x1af4 +#define VIRTGPU_PCI_DEVICE_ID 0x1050 #define VIRTGPU_BLOB_MEM_GUEST_VRAM 0x0004 -#define VIRTGPU_PARAM_GUEST_VRAM 9 +#define VIRTGPU_PARAM_GUEST_VRAM 9 -#define SHMEM_DATA_SIZE 0x1830000 // 24MiB +#define SHMEM_DATA_SIZE 0x1830000 // 24MiB #define SHMEM_REPLY_SIZE 0x4000 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) typedef enum virt_gpu_result_t { - APIR_SUCCESS = 0, + APIR_SUCCESS = 0, APIR_ERROR_INITIALIZATION_FAILED = -1, } virt_gpu_result_t; -#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) +#define PRINTFLIKE(f, a) __attribute__((format(__printf__, f, a))) struct virtgpu { - struct remoting_dev_instance *instance; + struct remoting_dev_instance * instance; bool use_apir_capset; int fd; struct { - enum virgl_renderer_capset id; - uint32_t version; - struct virgl_renderer_capset_apir data; + enum virgl_renderer_capset id; + uint32_t version; + struct virgl_renderer_capset_apir data; } capset; struct util_sparse_array shmem_array; @@ -73,29 +74,18 @@ struct virtgpu { struct virtgpu_shmem data_shmem; }; - -static inline int -virtgpu_ioctl(struct virtgpu *gpu, unsigned long request, void *args) -{ +static inline int virtgpu_ioctl(struct virtgpu * gpu, unsigned long request, void * args) { return drmIoctl(gpu->fd, request, args); } -struct virtgpu *create_virtgpu(); - -struct apir_encoder *remote_call_prepare( - struct virtgpu *gpu, - ApirCommandType apir_cmd_type, - int32_t cmd_flags); - -uint32_t remote_call( - struct virtgpu *gpu, - struct apir_encoder *enc, - struct apir_decoder **dec, - float max_wait_ms, - long long *call_duration_ns - ); - -void remote_call_finish( - struct virtgpu *gpu, - struct apir_encoder *enc, - struct apir_decoder *dec); +struct virtgpu * create_virtgpu(); + +struct apir_encoder * remote_call_prepare(struct virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags); + +uint32_t remote_call(struct virtgpu * gpu, + struct apir_encoder * enc, + struct apir_decoder ** dec, + float max_wait_ms, + long long * call_duration_ns); + +void remote_call_finish(struct virtgpu * gpu, struct apir_encoder * enc, struct apir_decoder * dec); From a37f3792ab677ceb1e82e56f07289f9d97f7cddd Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 12 Jan 2026 10:50:12 +0100 Subject: [PATCH 08/37] Remove FATAL errors in the backend --- .../backend-dispatched-backend.cpp | 4 +- .../backend-dispatched-buffer.cpp | 19 ++---- .../backend-dispatched-device.cpp | 4 +- .../backend-dispatched.cpp | 6 +- ggml/src/ggml-remotingbackend/backend-utils.h | 5 -- ggml/src/ggml-remotingbackend/backend.cpp | 7 +- .../shared/apir_backend.h | 4 +- .../src/ggml-remotingbackend/shared/apir_cs.h | 66 ++++++++++++++----- .../shared/apir_cs_ggml.h | 3 +- .../virtgpu-forward-buffer-type.cpp | 5 +- .../virtgpu-forward-device.cpp | 11 ++-- ggml/src/ggml-remotingfrontend/virtgpu.cpp | 15 +++-- 12 files changed, 90 insertions(+), 59 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp index 22d2920e110..8b1d698a719 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp @@ -33,7 +33,9 @@ uint32_t backend_backend_graph_compute(struct apir_encoder * enc, const void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - FATAL("Couldn't get the shmem addr from virgl :/"); + ERROR("Couldn't get the shmem addr from virgl"); + apir_decoder_set_fatal(dec); + return 1; } size_t cgraph_size; apir_decode_size_t(dec, &cgraph_size); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp index a04ef0f1c55..40605180a93 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp @@ -50,22 +50,10 @@ uint32_t backend_buffer_set_tensor(struct apir_encoder * enc, void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - FATAL("Couldn't get the shmem addr from virgl :/"); + ERROR("Couldn't get the shmem addr from virgl :/"); + return 1; } -#if 0 - INFO("Calling (%p)->set_tensor(tensor=%p, data=%p, offset=%lu, size=%lu", - buffer, tensor, shmem_data, offset, size); -#endif -#if 0 - void **addr = (void **)(uintptr_t) shmem_data; - for (int i = 0; i <= 10; i++) { - INFO("%s: %p | %llx", __func__, addr, *addr); - addr++; - } - INFO("\n"); -#endif - buffer->iface.set_tensor(buffer, tensor, shmem_data, offset, size); stop_timer(&set_tensor_timer); @@ -99,7 +87,8 @@ uint32_t backend_buffer_get_tensor(struct apir_encoder * enc, void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - FATAL("Couldn't get the shmem addr from virgl :/"); + ERROR("Couldn't get the shmem addr from virgl :/"); + return 1; } buffer->iface.get_tensor(buffer, tensor, shmem_data, offset, size); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp index 03ff8ad0558..bdeee123e81 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp @@ -144,7 +144,9 @@ uint32_t backend_device_buffer_from_ptr(struct apir_encoder * enc, void * shmem_ptr = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_ptr) { - FATAL("Couldn't get the shmem addr from virgl :/"); + ERROR("Couldn't get the shmem addr from virgl"); + apir_decoder_set_fatal(dec); + return 1; } size_t size; diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp index b75a5d2ad7b..7800f7e814b 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp @@ -17,13 +17,15 @@ long long timer_count = 0; uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_backend_init_fct_p) { if (reg != NULL) { - FATAL("%s: already initialized :/", __func__); + WARNING("%s: already initialized :/", __func__); + return APIR_BACKEND_INITIALIZE_ALREADY_INITED; } ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p; reg = ggml_backend_reg_fct(); if (reg == NULL) { - FATAL("%s: backend registration failed :/", __func__); + ERROR("%s: backend registration failed :/", __func__); + return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED; } if (reg->iface.get_device_count(reg)) { diff --git a/ggml/src/ggml-remotingbackend/backend-utils.h b/ggml/src/ggml-remotingbackend/backend-utils.h index 95315e09937..bb1a5a57bc1 100644 --- a/ggml/src/ggml-remotingbackend/backend-utils.h +++ b/ggml/src/ggml-remotingbackend/backend-utils.h @@ -49,8 +49,3 @@ inline void WARNING(const char * format, ...) { inline void ERROR(const char * format, ...) { APIR_VA_PRINT("ERROR: ", format); } - -[[noreturn]] inline void FATAL(const char * format, ...) { - APIR_VA_PRINT("FATAL: ", format); - abort(); -} diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index a9334d48073..91d8859e923 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -112,12 +112,14 @@ uint32_t apir_backend_dispatcher(uint32_t cmd_type, .cur = enc_cur, .start = enc_cur, .end = enc_end, + .fatal = false, }; struct apir_encoder * enc = &_enc; struct apir_decoder _dec = { .cur = dec_cur, .end = dec_end, + .fatal = false, }; struct apir_decoder * dec = &_dec; @@ -126,11 +128,6 @@ uint32_t apir_backend_dispatcher(uint32_t cmd_type, return APIR_BACKEND_FORWARD_INDEX_INVALID; } -#if 0 - static long long count = 0; - INFO("[%lld] Calling %s", count, backend_dispatch_command_name((ApirBackendCommandType) cmd_type)); - count += 1; -#endif backend_dispatch_t forward_fct = apir_backend_dispatch_table[cmd_type]; uint32_t ret = forward_fct(enc, dec, ctx); diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.h b/ggml/src/ggml-remotingbackend/shared/apir_backend.h index 4f690f6b6ae..7bc5afd2e98 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_backend.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_backend.h @@ -10,8 +10,10 @@ #define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_GGML_LIBRARY 2 #define APIR_BACKEND_INITIALIZE_MISSING_BACKEND_SYMBOLS 3 #define APIR_BACKEND_INITIALIZE_MISSING_GGML_SYMBOLS 4 +#define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5 +#define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED 6 +#define APIR_BACKEND_INITIALIZE_ALREADY_INITED 7 -#define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5 // new entries here need to be added to the apir_backend_initialize_error function below #define APIR_BACKEND_FORWARD_INDEX_INVALID 6 diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs.h b/ggml/src/ggml-remotingbackend/shared/apir_cs.h index edb96d22777..bb251a4e278 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs.h @@ -3,9 +3,6 @@ #include #include -// needs UNUSED to be defined -// needs FATAL to be defined - #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) @@ -13,11 +10,14 @@ struct apir_encoder { char * cur; const char * start; const char * end; + bool fatal; + }; struct apir_decoder { const char * cur; const char * end; + bool fatal; }; /* @@ -28,6 +28,7 @@ static struct apir_decoder apir_new_decoder(const char * ptr, size_t size) { struct apir_decoder dec = { .cur = ptr, .end = ptr + size, + .fatal = false, }; return dec; @@ -38,24 +39,53 @@ static struct apir_encoder apir_new_encoder(char * ptr, size_t size) { .cur = ptr, .start = ptr, .end = ptr + size, + .fatal = false, }; return enc; } +/* + * fatal flag handling + */ + +static inline void apir_encoder_reset_fatal(struct apir_encoder * enc) { + enc->fatal = false; +} + +static inline void apir_encoder_set_fatal(struct apir_encoder * enc) { + enc->fatal = true; +} + +static inline bool apir_encoder_get_fatal(const struct apir_encoder * enc) { + return enc->fatal; +} + +static inline void apir_decoder_reset_fatal(struct apir_decoder * dec) { + dec->fatal = false; +} + +static inline void apir_decoder_set_fatal(struct apir_decoder * dec) { + dec->fatal = true; +} + +static inline bool apir_decoder_get_fatal(const struct apir_decoder * dec) { + return dec->fatal; +} + /* * encode peek */ -static inline bool apir_decoder_peek_internal(const struct apir_decoder * dec, - size_t size, - void * val, - size_t val_size) { +static inline bool apir_decoder_peek_internal(struct apir_decoder * dec, + size_t size, + void * val, + size_t val_size) { assert(val_size <= size); if (unlikely(size > (size_t) (dec->end - dec->cur))) { - FATAL("READING TOO MUCH FROM THE DECODER :/"); - //apir_decoder_set_fatal(dec); + ERROR("reading too much from the decoder ..."); + apir_decoder_set_fatal(dec); memset(val, 0, val_size); return false; } @@ -65,13 +95,14 @@ static inline bool apir_decoder_peek_internal(const struct apir_decoder * dec, return true; } -static inline void apir_decoder_peek(const struct apir_decoder * dec, size_t size, void * val, size_t val_size) { +static inline void apir_decoder_peek(struct apir_decoder * dec, size_t size, void * val, size_t val_size) { apir_decoder_peek_internal(dec, size, val, val_size); } static inline const void * apir_decoder_use_inplace(struct apir_decoder * dec, size_t size) { if (unlikely(size > (size_t) (dec->end - dec->cur))) { - FATAL("READING TOO MUCH FROM THE DECODER :/"); + ERROR("reading too much from the decoder ..."); + apir_decoder_set_fatal(dec); return NULL; } const void * addr = dec->cur; @@ -188,7 +219,8 @@ static inline uint64_t apir_decode_array_size(struct apir_decoder * dec, uint64_ uint64_t size; apir_decode_uint64_t(dec, &size); if (size != expected_size) { - FATAL("ENCODER IS FULL :/"); + ERROR("Couldn't decode array from the decoder"); + apir_decoder_set_fatal(dec); size = 0; } return size; @@ -288,19 +320,17 @@ static inline void apir_decode_char_array(struct apir_decoder * dec, char * val, if (size) { val[size - 1] = '\0'; } else { - //apir_decoder_set_fatal(dec); - FATAL("Couldn't decode the blog array"); + ERROR("Couldn't decode the blog array"); + apir_decoder_set_fatal(dec); } } /* (temp) buffer allocation */ -static inline void * apir_decoder_alloc_array(struct apir_decoder * dec, size_t size, size_t count) { - UNUSED(dec); - +static inline void * apir_decoder_alloc_array(size_t size, size_t count) { size_t alloc_size; if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { - FATAL("overflow in array allocation of %zu * %zu bytes", size, count); + ERROR("overflow in array allocation of %zu * %zu bytes", size, count); return NULL; } diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h index 497b91bb516..3b5ce3df113 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h @@ -155,7 +155,8 @@ static inline void apir_encode_ggml_tensor_inline(struct apir_encoder * enc, con size_t tensor_size = sizeof(*tensor); if (tensor->extra) { - FATAL("Cannot pass tensors with extra"); + ERROR("Cannot pass tensors with extra"); + apir_encoder_set_fatal(enc); } if (tensor->src[0] && tensor->buffer) { diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp index 2d5b2a1f592..c8195e3ef75 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp @@ -12,9 +12,10 @@ const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - char * string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - FATAL("%s: Could not allocate the device name buffer", __func__); + ERROR("%s: Could not allocate the device name buffer", __func__); + apir_decoder_set_fatal(decoder); } apir_decode_char_array(decoder, string, string_size); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp index f501657f851..e01893ce28a 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp @@ -34,9 +34,10 @@ const char * apir_device_get_name(struct virtgpu * gpu) { REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - FATAL("%s: Could not allocate the device name buffer", __func__); + ERROR("%s: Could not allocate the device name buffer", __func__); + apir_decoder_set_fatal(decoder); } apir_decode_char_array(decoder, string, string_size); @@ -55,9 +56,11 @@ const char * apir_device_get_description(struct virtgpu * gpu) { REMOTE_CALL(gpu, encoder, decoder, ret); const size_t string_size = apir_decode_array_size_unchecked(decoder); - char * string = (char *) apir_decoder_alloc_array(decoder, sizeof(char), string_size); + char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - FATAL("%s: Could not allocate the device description buffer", __func__); + ERROR("%s: Could not allocate the device description buffer", __func__); + apir_decoder_set_fatal(decoder); + return NULL; } apir_decode_char_array(decoder, string, string_size); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-remotingfrontend/virtgpu.cpp index f60b00a582d..f43cd6fdd5f 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu.cpp @@ -340,9 +340,10 @@ struct apir_encoder * remote_call_prepare(struct virtgpu * gpu, ApirCommandType static struct apir_encoder enc; enc = { - encoder_buffer, - encoder_buffer, - encoder_buffer + sizeof(encoder_buffer), + .cur = encoder_buffer, + .start = encoder_buffer, + .end = encoder_buffer + sizeof(encoder_buffer), + .fatal = false, }; /* @@ -378,7 +379,13 @@ void remote_call_finish(struct virtgpu * gpu, struct apir_encoder * enc, struct ERROR("Invalid (null) decoder :/"); } - // encoder and decoder are statically allocated, nothing to do to release them + if (apir_encoder_get_fatal(enc)) { + ERROR("Failed to encode the output parameters."); + } + + if (apir_decoder_get_fatal(dec)) { + ERROR("Failed to decode the input parameters."); + } } uint32_t remote_call(struct virtgpu * gpu, From cf88b8fe972b28603cf90d13ed5cbe3874be41bd Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 12 Jan 2026 11:51:59 +0100 Subject: [PATCH 09/37] remove structs --- .../apir_cs_ggml-rpc-back.cpp | 14 +- .../backend-dispatched-backend.cpp | 17 +- .../backend-dispatched-buffer-type.cpp | 36 ++-- .../backend-dispatched-buffer.cpp | 48 ++--- .../backend-dispatched-device.cpp | 62 +++--- .../backend-dispatched.gen.h | 122 ++++++------ .../ggml-remotingbackend/backend-dispatched.h | 6 +- .../ggml-remotingbackend/backend-internal.h | 14 +- ggml/src/ggml-remotingbackend/backend.cpp | 27 ++- .../shared/api_remoting.h | 12 +- .../shared/apir_backend.h | 55 +----- .../src/ggml-remotingbackend/shared/apir_cs.h | 118 ++++++------ .../shared/apir_cs_ggml.h | 62 +++--- .../ggml-remotingbackend/shared/apir_cs_rpc.h | 6 +- .../apir_cs_ggml-rpc-front.cpp | 2 +- .../ggml-backend-buffer-type.cpp | 18 +- .../ggml-backend-buffer.cpp | 29 ++- .../ggml-backend-device.cpp | 32 ++-- .../ggml-backend-reg.cpp | 43 ++--- .../ggml-remotingfrontend/ggml-backend.cpp | 20 +- .../src/ggml-remotingfrontend/ggml-remoting.h | 20 +- ggml/src/ggml-remotingfrontend/virtgpu-apir.h | 4 +- .../virtgpu-forward-backend.cpp | 16 +- .../virtgpu-forward-buffer-type.cpp | 38 ++-- .../virtgpu-forward-buffer.cpp | 50 ++--- .../virtgpu-forward-device.cpp | 71 ++++--- .../virtgpu-forward-impl.h | 34 ++-- .../virtgpu-forward.gen.h | 46 ++--- .../src/ggml-remotingfrontend/virtgpu-shm.cpp | 26 +-- ggml/src/ggml-remotingfrontend/virtgpu-shm.h | 4 +- .../ggml-remotingfrontend/virtgpu-utils.cpp | 8 +- .../src/ggml-remotingfrontend/virtgpu-utils.h | 94 +++------- ggml/src/ggml-remotingfrontend/virtgpu.cpp | 177 +++++++++--------- ggml/src/ggml-remotingfrontend/virtgpu.h | 34 ++-- 34 files changed, 607 insertions(+), 758 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp b/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp index 7d1088f2526..ddc0b7cd445 100644 --- a/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp +++ b/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp @@ -27,7 +27,7 @@ std::unordered_set apir_get_track_backend_buffers() { return backend_buffers; } -ggml_tensor * apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tensor) { +ggml_tensor * apir_deserialize_tensor(ggml_context * ctx, const apir_rpc_tensor * tensor) { ggml_tensor * result = ggml_new_tensor_4d(ctx, (ggml_type) tensor->type, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) { @@ -64,9 +64,9 @@ ggml_tensor * apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_ } ggml_tensor * apir_create_node(uint64_t id, - struct ggml_context * ctx, + ggml_context * ctx, const std::unordered_map & tensor_ptrs, - std::unordered_map & tensor_map) { + std::unordered_map & tensor_map) { if (id == 0) { return nullptr; } @@ -74,7 +74,7 @@ ggml_tensor * apir_create_node(uint64_t return tensor_map[id]; } const apir_rpc_tensor * tensor = tensor_ptrs.at(id); - struct ggml_tensor * result = apir_deserialize_tensor(ctx, tensor); + ggml_tensor * result = apir_deserialize_tensor(ctx, tensor); if (result == nullptr) { return nullptr; } @@ -92,13 +92,13 @@ ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes, const apir_rpc_tensor * tensors, const uint64_t * nodes) { size_t buf_size = ggml_tensor_overhead() * (n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false); - struct ggml_init_params params = { + ggml_init_params params = { /*.mem_size =*/buf_size, /*.mem_buffer =*/NULL, /*.no_alloc =*/true, }; - struct ggml_context * ctx = ggml_init(params); - struct ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false); + ggml_context * ctx = ggml_init(params); + ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false); graph->n_nodes = n_nodes; std::unordered_map tensor_ptrs; for (uint32_t i = 0; i < n_tensors; i++) { diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp index 8b1d698a719..229ffa7f9c7 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp @@ -7,11 +7,9 @@ #include -struct timer_data graph_compute_timer = { 0, 0, 0, "compute_timer" }; - -uint32_t backend_backend_graph_compute(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_backend_graph_compute(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); @@ -19,15 +17,13 @@ uint32_t backend_backend_graph_compute(struct apir_encoder * enc, static bool async_backend; if (!async_backend_initialized) { - struct ggml_backend_dev_props props; + ggml_backend_dev_props props; dev->iface.get_props(dev, &props); async_backend = props.caps.async; async_backend_initialized = true; } - start_timer(&graph_compute_timer); - uint32_t shmem_res_id; apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); @@ -40,7 +36,7 @@ uint32_t backend_backend_graph_compute(struct apir_encoder * enc, size_t cgraph_size; apir_decode_size_t(dec, &cgraph_size); - struct apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size); + apir_decoder secondary_dec = apir_new_decoder((const char *) shmem_data, cgraph_size); ggml_cgraph * cgraph = apir_decode_ggml_cgraph(&secondary_dec, cgraph_size); @@ -56,7 +52,6 @@ uint32_t backend_backend_graph_compute(struct apir_encoder * enc, status = GGML_STATUS_ABORTED; apir_encode_ggml_status(enc, &status); - stop_timer(&graph_compute_timer); return 0; } #endif @@ -68,7 +63,5 @@ uint32_t backend_backend_graph_compute(struct apir_encoder * enc, apir_encode_ggml_status(enc, &status); - stop_timer(&graph_compute_timer); - return 0; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp index e05f4f87f50..ad908d243a3 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp @@ -6,9 +6,9 @@ #include -uint32_t backend_buffer_type_get_name(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_type_get_name(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -22,9 +22,9 @@ uint32_t backend_buffer_type_get_name(struct apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_get_alignment(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -35,9 +35,9 @@ uint32_t backend_buffer_type_get_alignment(struct apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_get_max_size(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -48,9 +48,9 @@ uint32_t backend_buffer_type_get_max_size(struct apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_is_host(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_type_is_host(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -61,9 +61,9 @@ uint32_t backend_buffer_type_is_host(struct apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_alloc_buffer(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; @@ -85,9 +85,9 @@ uint32_t backend_buffer_type_alloc_buffer(struct apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_get_alloc_size(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp index 40605180a93..03c2299ed60 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp @@ -6,13 +6,9 @@ #include -struct timer_data get_tensor_timer = { 0, 0, 0, "get_tensor" }; -struct timer_data set_tensor_timer = { 0, 0, 0, "set_tensor" }; -struct timer_data cpy_tensor_timer = { 0, 0, 0, "set_tensor" }; - -uint32_t backend_buffer_get_base(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_get_base(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); @@ -23,14 +19,12 @@ uint32_t backend_buffer_get_base(struct apir_encoder * enc, return 0; } -uint32_t backend_buffer_set_tensor(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_set_tensor(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); - start_timer(&set_tensor_timer); - ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); @@ -56,19 +50,15 @@ uint32_t backend_buffer_set_tensor(struct apir_encoder * enc, buffer->iface.set_tensor(buffer, tensor, shmem_data, offset, size); - stop_timer(&set_tensor_timer); - return 0; } -uint32_t backend_buffer_get_tensor(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_get_tensor(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); - start_timer(&get_tensor_timer); - ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); @@ -93,18 +83,14 @@ uint32_t backend_buffer_get_tensor(struct apir_encoder * enc, buffer->iface.get_tensor(buffer, tensor, shmem_data, offset, size); - stop_timer(&get_tensor_timer); - return 0; } -uint32_t backend_buffer_cpy_tensor(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); - start_timer(&cpy_tensor_timer); - ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); INFO("%s <---->", __func__); @@ -117,12 +103,10 @@ uint32_t backend_buffer_cpy_tensor(struct apir_encoder * enc, apir_encode_bool_t(enc, &ret); - stop_timer(&cpy_tensor_timer); - return 0; } -uint32_t backend_buffer_clear(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx) { +uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); @@ -137,9 +121,9 @@ uint32_t backend_buffer_clear(struct apir_encoder * enc, struct apir_decoder * d return 0; } -uint32_t backend_buffer_free_buffer(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_buffer_free_buffer(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp index bdeee123e81..26f0cb88f40 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp @@ -6,9 +6,9 @@ #include -uint32_t backend_device_get_device_count(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_get_device_count(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(ctx); UNUSED(dec); @@ -19,9 +19,9 @@ uint32_t backend_device_get_device_count(struct apir_encoder * enc, return 0; } -uint32_t backend_device_get_count(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_get_count(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(ctx); UNUSED(dec); @@ -32,9 +32,9 @@ uint32_t backend_device_get_count(struct apir_encoder * enc, return 0; } -uint32_t backend_device_get_name(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_get_name(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -47,9 +47,9 @@ uint32_t backend_device_get_name(struct apir_encoder * enc, return 0; } -uint32_t backend_device_get_description(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_get_description(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -62,9 +62,9 @@ uint32_t backend_device_get_description(struct apir_encoder * enc, return 0; } -uint32_t backend_device_get_type(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_get_type(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -74,9 +74,9 @@ uint32_t backend_device_get_type(struct apir_encoder * enc, return 0; } -uint32_t backend_device_get_memory(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_get_memory(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -89,9 +89,9 @@ uint32_t backend_device_get_memory(struct apir_encoder * enc, return 0; } -uint32_t backend_device_supports_op(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_supports_op(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec); @@ -103,9 +103,9 @@ uint32_t backend_device_supports_op(struct apir_encoder * enc, return 0; } -uint32_t backend_device_get_buffer_type(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_get_buffer_type(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -116,13 +116,13 @@ uint32_t backend_device_get_buffer_type(struct apir_encoder * enc, return 0; } -uint32_t backend_device_get_props(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_get_props(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); - struct ggml_backend_dev_props props; + ggml_backend_dev_props props; dev->iface.get_props(dev, &props); apir_encode_bool_t(enc, &props.caps.async); @@ -133,9 +133,9 @@ uint32_t backend_device_get_props(struct apir_encoder * enc, return 0; } -uint32_t backend_device_buffer_from_ptr(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx) { +uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h b/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h index da82846db2a..9336c29ad7c 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h @@ -1,73 +1,73 @@ #pragma once /* device */ -uint32_t backend_device_get_device_count(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_device_get_count(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_device_get_name(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx); -uint32_t backend_device_get_description(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_device_get_type(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx); -uint32_t backend_device_get_memory(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_device_supports_op(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_device_get_buffer_type(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_device_get_props(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_device_buffer_from_ptr(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); +uint32_t backend_device_get_device_count(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_device_get_count(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_device_get_description(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_device_get_memory(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_device_supports_op(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_device_get_buffer_type(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_device_get_props(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); /* buffer-type */ -uint32_t backend_buffer_type_get_name(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_buffer_type_get_alignment(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_buffer_type_get_max_size(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_buffer_type_is_host(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_buffer_type_alloc_buffer(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_buffer_type_get_alloc_size(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_name(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_buffer_type_is_host(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); /* buffer */ -uint32_t backend_buffer_get_base(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx); -uint32_t backend_buffer_set_tensor(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_buffer_get_tensor(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_buffer_cpy_tensor(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); -uint32_t backend_buffer_clear(struct apir_encoder * enc, struct apir_decoder * dec, struct virgl_apir_context * ctx); -uint32_t backend_buffer_free_buffer(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); +uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_set_tensor(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_buffer_get_tensor(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); +uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_free_buffer(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); /* backend */ -uint32_t backend_backend_graph_compute(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); +uint32_t backend_backend_graph_compute(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); static inline const char * backend_dispatch_command_name(ApirBackendCommandType type) { switch (type) { diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.h b/ggml/src/ggml-remotingbackend/backend-dispatched.h index e4d054dad15..2268ed8966a 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.h @@ -12,9 +12,9 @@ #include "shared/apir_cs_ggml.h" -typedef uint32_t (*backend_dispatch_t)(struct apir_encoder * enc, - struct apir_decoder * dec, - struct virgl_apir_context * ctx); +typedef uint32_t (*backend_dispatch_t)(apir_encoder * enc, + apir_decoder * dec, + virgl_apir_context * ctx); #include "backend-dispatched.gen.h" diff --git a/ggml/src/ggml-remotingbackend/backend-internal.h b/ggml/src/ggml-remotingbackend/backend-internal.h index 939d7d588e1..56ce9ea3a86 100644 --- a/ggml/src/ggml-remotingbackend/backend-internal.h +++ b/ggml/src/ggml-remotingbackend/backend-internal.h @@ -14,11 +14,11 @@ extern ggml_backend_t bck; extern "C" { ApirLoadLibraryReturnCode apir_backend_initialize(); void apir_backend_deinit(void); -uint32_t apir_backend_dispatcher(uint32_t cmd_type, - struct virgl_apir_context * ctx, - char * dec_cur, - const char * dec_end, - char * enc_cur, - const char * enc_end, - char ** enc_cur_after); +uint32_t apir_backend_dispatcher(uint32_t cmd_type, + virgl_apir_context * ctx, + char * dec_cur, + const char * dec_end, + char * enc_cur, + const char * enc_end, + char ** enc_cur_after); } diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index 91d8859e923..e8db1d07ad3 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -30,11 +30,6 @@ void apir_backend_deinit(void) { INFO("%s: free memory: %ld MB", __func__, (size_t) free / 1024 / 1024); } - show_timer(&graph_compute_timer); - show_timer(&set_tensor_timer); - show_timer(&get_tensor_timer); - /* *** */ - if (backend_library_handle) { INFO("%s: The GGML backend library was loaded. Unloading it.", __func__); dlclose(backend_library_handle); @@ -101,27 +96,27 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { return (ApirLoadLibraryReturnCode) (APIR_LOAD_LIBRARY_INIT_BASE_INDEX + ret); } -uint32_t apir_backend_dispatcher(uint32_t cmd_type, - struct virgl_apir_context * ctx, - char * dec_cur, - const char * dec_end, - char * enc_cur, - const char * enc_end, - char ** enc_cur_after) { - struct apir_encoder _enc = { +uint32_t apir_backend_dispatcher(uint32_t cmd_type, + virgl_apir_context * ctx, + char * dec_cur, + const char * dec_end, + char * enc_cur, + const char * enc_end, + char ** enc_cur_after) { + apir_encoder _enc = { .cur = enc_cur, .start = enc_cur, .end = enc_end, .fatal = false, }; - struct apir_encoder * enc = &_enc; + apir_encoder * enc = &_enc; - struct apir_decoder _dec = { + apir_decoder _dec = { .cur = dec_cur, .end = dec_end, .fatal = false, }; - struct apir_decoder * dec = &_dec; + apir_decoder * dec = &_dec; if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); diff --git a/ggml/src/ggml-remotingbackend/shared/api_remoting.h b/ggml/src/ggml-remotingbackend/shared/api_remoting.h index b3b831ffd5f..4025586cc3a 100644 --- a/ggml/src/ggml-remotingbackend/shared/api_remoting.h +++ b/ggml/src/ggml-remotingbackend/shared/api_remoting.h @@ -11,17 +11,17 @@ #define APIR_HANDSHAKE_MAGIC 0xab1e -typedef enum { +enum ApirCommandType { APIR_COMMAND_TYPE_HandShake = 0, APIR_COMMAND_TYPE_LoadLibrary = 1, APIR_COMMAND_TYPE_Forward = 2, APIR_COMMAND_TYPE_LENGTH = 3, -} ApirCommandType; +}; typedef uint64_t ApirCommandFlags; -typedef enum { +enum ApirLoadLibraryReturnCode { APIR_LOAD_LIBRARY_SUCCESS = 0, APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR = 1, APIR_LOAD_LIBRARY_ALREADY_LOADED = 2, @@ -29,15 +29,15 @@ typedef enum { APIR_LOAD_LIBRARY_CANNOT_OPEN = 4, APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5, APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code -} ApirLoadLibraryReturnCode; +}; -typedef enum { +enum ApirForwardReturnCode { APIR_FORWARD_SUCCESS = 0, APIR_FORWARD_NO_DISPATCH_FCT = 1, APIR_FORWARD_TIMEOUT = 2, APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code -} ApirForwardReturnCode; +} ; __attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) { switch (type) { diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.h b/ggml/src/ggml-remotingbackend/shared/apir_backend.h index 7bc5afd2e98..094f004c630 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_backend.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_backend.h @@ -3,7 +3,7 @@ #include "apir_backend.gen.h" #include // for uintptr_t -#include // for struct timespec, clock_gettime +#include // for timespec, clock_gettime #define APIR_BACKEND_INITIALIZE_SUCCESS 0 #define APIR_BACKEND_INITIALIZE_CANNOT_OPEN_BACKEND_LIBRARY 1 @@ -27,62 +27,15 @@ typedef uintptr_t apir_buffer_host_handle_t; struct virgl_opaque_context; struct virgl_apir_callbacks { - void * (*get_shmem_ptr)(struct virgl_opaque_context * ctx, uint32_t res_id); + void * (*get_shmem_ptr)(virgl_opaque_context * ctx, uint32_t res_id); }; struct virgl_apir_context { - struct virgl_opaque_context * virgl_ctx; + virgl_opaque_context * virgl_ctx; - struct virgl_apir_callbacks iface; + virgl_apir_callbacks iface; }; -struct timer_data { - long long start; - long long total; - long long count; - const char * name; -}; - -extern struct timer_data graph_compute_timer; -extern struct timer_data get_tensor_timer; -extern struct timer_data set_tensor_timer; -extern struct timer_data cpy_tensor_timer; -extern struct timer_data wait_host_reply_timer; -extern struct timer_data get_tensor_from_ptr_timer; -extern struct timer_data set_tensor_from_ptr_timer; - -static inline void start_timer(struct timer_data * timer) { - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - timer->start = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; -} - -// returns the duration in ns -static inline long long stop_timer(struct timer_data * timer) { - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - long long timer_end = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; - - long long duration = (timer_end - timer->start); - timer->total += duration; - timer->count += 1; - - return duration; -} - -static inline void show_timer(struct timer_data * timer) { - double ms = timer->total / 1000000; - double itl = ms / timer->count; - double speed = 1 / itl * 1000; - - if (!timer->total) { - return; - } - - INFO("%15s [%9.0f] ms for %4ld invocations | ITL %2.2f ms | throughput = %4.2f t/s (%4.2f ms/call)", timer->name, - ms, timer->count, itl, speed, ms / timer->count); -} - static const char * apir_backend_initialize_error(int code) { #define APIR_BACKEND_INITIALIZE_ERROR(code_name) \ do { \ diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs.h b/ggml/src/ggml-remotingbackend/shared/apir_cs.h index bb251a4e278..c8e8f96f86c 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs.h @@ -1,5 +1,7 @@ #pragma once +#include "ggml-impl.h" + #include #include @@ -24,8 +26,8 @@ struct apir_decoder { * new encoder and decoder */ -static struct apir_decoder apir_new_decoder(const char * ptr, size_t size) { - struct apir_decoder dec = { +static apir_decoder apir_new_decoder(const char * ptr, size_t size) { + apir_decoder dec = { .cur = ptr, .end = ptr + size, .fatal = false, @@ -34,8 +36,8 @@ static struct apir_decoder apir_new_decoder(const char * ptr, size_t size) { return dec; } -static struct apir_encoder apir_new_encoder(char * ptr, size_t size) { - struct apir_encoder enc = { +static apir_encoder apir_new_encoder(char * ptr, size_t size) { + apir_encoder enc = { .cur = ptr, .start = ptr, .end = ptr + size, @@ -49,27 +51,27 @@ static struct apir_encoder apir_new_encoder(char * ptr, size_t size) { * fatal flag handling */ -static inline void apir_encoder_reset_fatal(struct apir_encoder * enc) { +static inline void apir_encoder_reset_fatal(apir_encoder * enc) { enc->fatal = false; } -static inline void apir_encoder_set_fatal(struct apir_encoder * enc) { +static inline void apir_encoder_set_fatal(apir_encoder * enc) { enc->fatal = true; } -static inline bool apir_encoder_get_fatal(const struct apir_encoder * enc) { +static inline bool apir_encoder_get_fatal(const apir_encoder * enc) { return enc->fatal; } -static inline void apir_decoder_reset_fatal(struct apir_decoder * dec) { +static inline void apir_decoder_reset_fatal(apir_decoder * dec) { dec->fatal = false; } -static inline void apir_decoder_set_fatal(struct apir_decoder * dec) { +static inline void apir_decoder_set_fatal(apir_decoder * dec) { dec->fatal = true; } -static inline bool apir_decoder_get_fatal(const struct apir_decoder * dec) { +static inline bool apir_decoder_get_fatal(const apir_decoder * dec) { return dec->fatal; } @@ -77,14 +79,14 @@ static inline bool apir_decoder_get_fatal(const struct apir_decoder * dec) { * encode peek */ -static inline bool apir_decoder_peek_internal(struct apir_decoder * dec, +static inline bool apir_decoder_peek_internal(apir_decoder * dec, size_t size, void * val, size_t val_size) { assert(val_size <= size); if (unlikely(size > (size_t) (dec->end - dec->cur))) { - ERROR("reading too much from the decoder ..."); + GGML_LOG_ERROR("reading too much from the decoder ..."); apir_decoder_set_fatal(dec); memset(val, 0, val_size); return false; @@ -95,13 +97,13 @@ static inline bool apir_decoder_peek_internal(struct apir_decoder * dec, return true; } -static inline void apir_decoder_peek(struct apir_decoder * dec, size_t size, void * val, size_t val_size) { +static inline void apir_decoder_peek(apir_decoder * dec, size_t size, void * val, size_t val_size) { apir_decoder_peek_internal(dec, size, val, val_size); } -static inline const void * apir_decoder_use_inplace(struct apir_decoder * dec, size_t size) { +static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) { if (unlikely(size > (size_t) (dec->end - dec->cur))) { - ERROR("reading too much from the decoder ..."); + GGML_LOG_ERROR("reading too much from the decoder ..."); apir_decoder_set_fatal(dec); return NULL; } @@ -115,13 +117,13 @@ static inline const void * apir_decoder_use_inplace(struct apir_decoder * dec, s * read/write */ -static inline void apir_decoder_read(struct apir_decoder * dec, size_t size, void * val, size_t val_size) { +static inline void apir_decoder_read(apir_decoder * dec, size_t size, void * val, size_t val_size) { if (apir_decoder_peek_internal(dec, size, val, val_size)) { dec->cur += size; } } -static inline char * apir_encoder_write(struct apir_encoder * enc, size_t size, const void * val, size_t val_size) { +static inline char * apir_encoder_write(apir_encoder * enc, size_t size, const void * val, size_t val_size) { assert(val_size <= size); assert(size <= ((size_t) (enc->end - enc->cur))); @@ -137,12 +139,12 @@ static inline char * apir_encoder_write(struct apir_encoder * enc, size_t size, * encode/decode */ -static inline void apir_decode(struct apir_decoder * dec, size_t size, void * data, size_t data_size) { +static inline void apir_decode(apir_decoder * dec, size_t size, void * data, size_t data_size) { assert(size % 4 == 0); apir_decoder_read(dec, size, data, data_size); } -static inline void apir_encode(struct apir_encoder * enc, size_t size, const void * data, size_t data_size) { +static inline void apir_encode(apir_encoder * enc, size_t size, const void * data, size_t data_size) { assert(size % 4 == 0); apir_encoder_write(enc, size, data, data_size); } @@ -153,57 +155,57 @@ static inline void apir_encode(struct apir_encoder * enc, size_t size, const voi /* uint8_t */ -static inline void apir_encode_uint8_t(struct apir_encoder * enc, const uint8_t * val) { +static inline void apir_encode_uint8_t(apir_encoder * enc, const uint8_t * val) { apir_encode(enc, sizeof(int), val, sizeof(*val)); } -static inline void apir_decode_uint8_t(struct apir_decoder * dec, uint8_t * val) { +static inline void apir_decode_uint8_t(apir_decoder * dec, uint8_t * val) { apir_decode(dec, sizeof(int), val, sizeof(*val)); } /* uint64_t */ -static inline void apir_encode_uint64_t(struct apir_encoder * enc, const uint64_t * val) { +static inline void apir_encode_uint64_t(apir_encoder * enc, const uint64_t * val) { apir_encode(enc, 8, val, sizeof(*val)); } -static inline void apir_decode_uint64_t(struct apir_decoder * dec, uint64_t * val) { +static inline void apir_decode_uint64_t(apir_decoder * dec, uint64_t * val) { apir_decode(dec, 8, val, sizeof(*val)); } -static inline void apir_encode_uint64_t_array(struct apir_encoder * enc, const uint64_t * val, uint32_t count) { +static inline void apir_encode_uint64_t_array(apir_encoder * enc, const uint64_t * val, uint32_t count) { const size_t size = sizeof(*val) * count; assert(size >= count); apir_encode(enc, size, val, size); } -static inline void apir_decode_uint64_t_array(struct apir_decoder * dec, uint64_t * val, uint32_t count) { +static inline void apir_decode_uint64_t_array(apir_decoder * dec, uint64_t * val, uint32_t count) { const size_t size = sizeof(*val) * count; assert(size >= count); apir_decode(dec, size, val, size); } -static inline const uint64_t * apir_decode_uint64_t_array_inplace(struct apir_decoder * dec, uint32_t count) { +static inline const uint64_t * apir_decode_uint64_t_array_inplace(apir_decoder * dec, uint32_t count) { return (uint64_t *) (uintptr_t) apir_decoder_use_inplace(dec, count * sizeof(uint64_t)); } /* int32_t */ -static inline void apir_encode_int32_t(struct apir_encoder * enc, const int32_t * val) { +static inline void apir_encode_int32_t(apir_encoder * enc, const int32_t * val) { apir_encode(enc, 4, val, sizeof(*val)); } -static inline void apir_decode_int32_t(struct apir_decoder * dec, int32_t * val) { +static inline void apir_decode_int32_t(apir_decoder * dec, int32_t * val) { apir_decode(dec, 4, val, sizeof(*val)); } -static inline void apir_encode_int32_t_array(struct apir_encoder * enc, const int32_t * val, uint32_t count) { +static inline void apir_encode_int32_t_array(apir_encoder * enc, const int32_t * val, uint32_t count) { const size_t size = sizeof(*val) * count; assert(size >= count); apir_encode(enc, size, val, size); } -static inline void apir_decode_int32_t_array(struct apir_decoder * dec, int32_t * val, uint32_t count) { +static inline void apir_decode_int32_t_array(apir_decoder * dec, int32_t * val, uint32_t count) { const size_t size = sizeof(*val) * count; assert(size >= count); apir_decode(dec, size, val, size); @@ -211,22 +213,22 @@ static inline void apir_decode_int32_t_array(struct apir_decoder * dec, int32_t /* array size (uint64_t) */ -static inline void apir_encode_array_size(struct apir_encoder * enc, uint64_t size) { +static inline void apir_encode_array_size(apir_encoder * enc, uint64_t size) { apir_encode_uint64_t(enc, &size); } -static inline uint64_t apir_decode_array_size(struct apir_decoder * dec, uint64_t expected_size) { +static inline uint64_t apir_decode_array_size(apir_decoder * dec, uint64_t expected_size) { uint64_t size; apir_decode_uint64_t(dec, &size); if (size != expected_size) { - ERROR("Couldn't decode array from the decoder"); + GGML_LOG_ERROR("Couldn't decode array from the decoder"); apir_decoder_set_fatal(dec); size = 0; } return size; } -static inline uint64_t apir_decode_array_size_unchecked(struct apir_decoder * dec) { +static inline uint64_t apir_decode_array_size_unchecked(apir_decoder * dec) { uint64_t size; apir_decode_uint64_t(dec, &size); return size; @@ -234,32 +236,32 @@ static inline uint64_t apir_decode_array_size_unchecked(struct apir_decoder * de /* non-array pointer */ -static inline bool apir_encode_simple_pointer(struct apir_encoder * enc, const void * val) { +static inline bool apir_encode_simple_pointer(apir_encoder * enc, const void * val) { apir_encode_array_size(enc, val ? 1 : 0); return val; } -static inline bool apir_decode_simple_pointer(struct apir_decoder * dec) { +static inline bool apir_decode_simple_pointer(apir_decoder * dec) { return apir_decode_array_size_unchecked(dec); } /* uint32_t */ -static inline void apir_encode_uint32_t(struct apir_encoder * enc, const uint32_t * val) { +static inline void apir_encode_uint32_t(apir_encoder * enc, const uint32_t * val) { apir_encode(enc, 4, val, sizeof(*val)); } -static inline void apir_decode_uint32_t(struct apir_decoder * dec, uint32_t * val) { +static inline void apir_decode_uint32_t(apir_decoder * dec, uint32_t * val) { apir_decode(dec, 4, val, sizeof(*val)); } -static inline void apir_encode_uint32_t_array(struct apir_encoder * enc, const uint32_t * val, uint32_t count) { +static inline void apir_encode_uint32_t_array(apir_encoder * enc, const uint32_t * val, uint32_t count) { const size_t size = sizeof(*val) * count; assert(size >= count); apir_encode(enc, size, val, size); } -static inline void apir_decode_uint32_t_array(struct apir_decoder * dec, uint32_t * val, uint32_t count) { +static inline void apir_decode_uint32_t_array(apir_decoder * dec, uint32_t * val, uint32_t count) { const size_t size = sizeof(*val) * count; assert(size >= count); apir_decode(dec, size, val, size); @@ -267,18 +269,18 @@ static inline void apir_decode_uint32_t_array(struct apir_decoder * dec, uint32_ /* size_t */ -static inline void apir_encode_size_t(struct apir_encoder * enc, const size_t * val) { +static inline void apir_encode_size_t(apir_encoder * enc, const size_t * val) { const uint64_t tmp = *val; apir_encode_uint64_t(enc, &tmp); } -static inline void apir_decode_size_t(struct apir_decoder * dec, size_t * val) { +static inline void apir_decode_size_t(apir_decoder * dec, size_t * val) { uint64_t tmp; apir_decode_uint64_t(dec, &tmp); *val = tmp; } -static inline void apir_encode_size_t_array(struct apir_encoder * enc, const size_t * val, uint32_t count) { +static inline void apir_encode_size_t_array(apir_encoder * enc, const size_t * val, uint32_t count) { if (sizeof(size_t) == sizeof(uint64_t)) { apir_encode_uint64_t_array(enc, (const uint64_t *) val, count); } else { @@ -288,7 +290,7 @@ static inline void apir_encode_size_t_array(struct apir_encoder * enc, const siz } } -static inline void apir_decode_size_t_array(struct apir_decoder * dec, size_t * val, uint32_t count) { +static inline void apir_decode_size_t_array(apir_decoder * dec, size_t * val, uint32_t count) { if (sizeof(size_t) == sizeof(uint64_t)) { apir_decode_uint64_t_array(dec, (uint64_t *) val, count); } else { @@ -300,27 +302,27 @@ static inline void apir_decode_size_t_array(struct apir_decoder * dec, size_t * /* opaque blob */ -static inline void apir_encode_blob_array(struct apir_encoder * enc, const void * val, size_t size) { +static inline void apir_encode_blob_array(apir_encoder * enc, const void * val, size_t size) { apir_encode(enc, (size + 3) & ~3, val, size); } -static inline void apir_decode_blob_array(struct apir_decoder * dec, void * val, size_t size) { +static inline void apir_decode_blob_array(apir_decoder * dec, void * val, size_t size) { apir_decode(dec, (size + 3) & ~3, val, size); } /* string */ -static inline void apir_encode_char_array(struct apir_encoder * enc, const char * val, size_t size) { +static inline void apir_encode_char_array(apir_encoder * enc, const char * val, size_t size) { assert(size && strlen(val) < size); apir_encode_blob_array(enc, val, size); } -static inline void apir_decode_char_array(struct apir_decoder * dec, char * val, size_t size) { +static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t size) { apir_decode_blob_array(dec, val, size); if (size) { val[size - 1] = '\0'; } else { - ERROR("Couldn't decode the blog array"); + GGML_LOG_ERROR("Couldn't decode the blog array"); apir_decoder_set_fatal(dec); } } @@ -330,7 +332,7 @@ static inline void apir_decode_char_array(struct apir_decoder * dec, char * val, static inline void * apir_decoder_alloc_array(size_t size, size_t count) { size_t alloc_size; if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { - ERROR("overflow in array allocation of %zu * %zu bytes", size, count); + GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes", size, count); return NULL; } @@ -339,43 +341,43 @@ static inline void * apir_decoder_alloc_array(size_t size, size_t count) { /* bool */ -static inline void apir_encode_bool_t(struct apir_encoder * enc, const bool * val) { +static inline void apir_encode_bool_t(apir_encoder * enc, const bool * val) { apir_encode(enc, sizeof(int), val, sizeof(bool)); } -static inline void apir_decode_bool_t(struct apir_decoder * dec, bool * val) { +static inline void apir_decode_bool_t(apir_decoder * dec, bool * val) { apir_decode(dec, sizeof(int), val, sizeof(bool)); } /* apir_buffer_type_host_handle_t */ -static inline void apir_encode_apir_buffer_type_host_handle_t(struct apir_encoder * enc, +static inline void apir_encode_apir_buffer_type_host_handle_t(apir_encoder * enc, const apir_buffer_type_host_handle_t * val) { apir_encode(enc, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); } -static inline void apir_decode_apir_buffer_type_host_handle_t(struct apir_decoder * dec, +static inline void apir_decode_apir_buffer_type_host_handle_t(apir_decoder * dec, apir_buffer_type_host_handle_t * val) { apir_decode(dec, sizeof(apir_buffer_type_host_handle_t), val, sizeof(apir_buffer_type_host_handle_t)); } /* apir_buffer_host_handle_t */ -static inline void apir_encode_apir_buffer_host_handle_t(struct apir_encoder * enc, +static inline void apir_encode_apir_buffer_host_handle_t(apir_encoder * enc, const apir_buffer_host_handle_t * val) { apir_encode(enc, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); } -static inline void apir_decode_apir_buffer_host_handle_t(struct apir_decoder * dec, apir_buffer_host_handle_t * val) { +static inline void apir_decode_apir_buffer_host_handle_t(apir_decoder * dec, apir_buffer_host_handle_t * val) { apir_decode(dec, sizeof(apir_buffer_host_handle_t), val, sizeof(apir_buffer_host_handle_t)); } /* uintptr_t */ -static inline void apir_encode_uintptr_t(struct apir_encoder * enc, const uintptr_t * val) { +static inline void apir_encode_uintptr_t(apir_encoder * enc, const uintptr_t * val) { apir_encode(enc, sizeof(*val), val, sizeof(*val)); } -static inline void apir_decode_uintptr_t(struct apir_decoder * dec, uintptr_t * val) { +static inline void apir_decode_uintptr_t(apir_decoder * dec, uintptr_t * val) { apir_decode(dec, sizeof(*val), val, sizeof(*val)); } diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h index 3b5ce3df113..2e1b26a01ba 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h @@ -1,30 +1,29 @@ -// needs the ggml-backend-impl.h definition - +#include "ggml-impl.h" #include "apir_cs.h" #include "apir_cs_rpc.h" // ggml_buffer_to_apir_host_handle(ggml_backend_buffer_t buffer); -static inline void apir_encode_ggml_buffer_host_handle(struct apir_encoder * enc, +static inline void apir_encode_ggml_buffer_host_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle); -static inline ggml_backend_buffer_t apir_decode_ggml_buffer(struct apir_decoder * dec); +static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec); /* apir_rpc_tensor */ -static inline void apir_encode_rcp_tensor(struct apir_encoder * enc, const apir_rpc_tensor * apir_rpc_tensor) { +static inline void apir_encode_rcp_tensor(apir_encoder * enc, const apir_rpc_tensor * apir_rpc_tensor) { size_t apir_rpc_tensor_size = sizeof(*apir_rpc_tensor); apir_encode(enc, apir_rpc_tensor_size, apir_rpc_tensor, apir_rpc_tensor_size); } -static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(struct apir_decoder * dec) { +static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_inplace(apir_decoder * dec) { size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor); return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); } -static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(struct apir_decoder * dec, - uint32_t n_tensors) { +static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(apir_decoder * dec, + uint32_t n_tensors) { size_t apir_rpc_tensor_size = sizeof(apir_rpc_tensor) * n_tensors; return (apir_rpc_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, apir_rpc_tensor_size); @@ -32,20 +31,20 @@ static inline apir_rpc_tensor * apir_decode_apir_rpc_tensor_array_inplace(struct /* ggml_tensor */ -static inline void apir_encode_ggml_tensor(struct apir_encoder * enc, const ggml_tensor * tensor) { +static inline void apir_encode_ggml_tensor(apir_encoder * enc, const ggml_tensor * tensor) { apir_rpc_tensor serialized = apir_serialize_tensor(tensor); apir_encode_rcp_tensor(enc, &serialized); } -static inline const ggml_tensor * apir_decode_ggml_tensor(struct apir_decoder * dec) { +static inline const ggml_tensor * apir_decode_ggml_tensor(apir_decoder * dec) { const apir_rpc_tensor * apir_rpc_tensor = apir_decode_apir_rpc_tensor_inplace(dec); - struct ggml_init_params params{ - /*.mem_size =*/ggml_tensor_overhead(), - /*.mem_buffer =*/NULL, - /*.no_alloc =*/true, + ggml_init_params params{ + /*.mem_size =*/ ggml_tensor_overhead(), + /*.mem_buffer =*/ NULL, + /*.no_alloc =*/ true, }; - struct ggml_context * ctx = ggml_init(params); + ggml_context * ctx = ggml_init(params); const ggml_tensor * tensor = apir_deserialize_tensor(ctx, apir_rpc_tensor); @@ -59,12 +58,12 @@ static inline const ggml_tensor * apir_decode_ggml_tensor(struct apir_decoder * // The guest stores it in `buft->context`. // The host simply writes the pointer address in the buffer variable. -static inline void apir_encode_ggml_buffer_type(struct apir_encoder * enc, ggml_backend_buffer_type_t buft) { +static inline void apir_encode_ggml_buffer_type(apir_encoder * enc, ggml_backend_buffer_type_t buft) { apir_buffer_type_host_handle_t handle = ggml_buffer_type_to_apir_handle(buft); apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); } -static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(struct apir_decoder * dec) { +static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(apir_decoder * dec) { apir_buffer_type_host_handle_t handle; apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle)); @@ -72,7 +71,7 @@ static inline ggml_backend_buffer_type_t apir_decode_ggml_buffer_type(struct api return (ggml_backend_buffer_type_t) handle; } -static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(struct apir_decoder * dec) { +static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_handle(apir_decoder * dec) { apir_buffer_type_host_handle_t handle; apir_decoder_read(dec, sizeof(handle), &handle, sizeof(handle)); @@ -85,12 +84,12 @@ static inline apir_buffer_type_host_handle_t apir_decode_apir_buffer_type_host_h // ggml_backend_buffer_t is a POINTER. // same logic as for ggml_backend_buffer_type_t -static inline void apir_encode_ggml_buffer(struct apir_encoder * enc, const ggml_backend_buffer_t buffer) { +static inline void apir_encode_ggml_buffer(apir_encoder * enc, const ggml_backend_buffer_t buffer) { apir_buffer_host_handle_t handle = BUFFER_TO_HOST_HANDLE(buffer); apir_encoder_write(enc, sizeof(handle), &handle, sizeof(handle)); } -static inline ggml_backend_buffer_t apir_decode_ggml_buffer(struct apir_decoder * dec) { +static inline ggml_backend_buffer_t apir_decode_ggml_buffer(apir_decoder * dec) { ggml_backend_buffer_t buffer; size_t buffer_ptr_size = sizeof(buffer); @@ -101,21 +100,21 @@ static inline ggml_backend_buffer_t apir_decode_ggml_buffer(struct apir_decoder /* enum ggml_status */ -static inline void apir_encode_ggml_status(struct apir_encoder * enc, const enum ggml_status * status) { +static inline void apir_encode_ggml_status(apir_encoder * enc, const ggml_status * status) { apir_encoder_write(enc, sizeof(*status), status, sizeof(*status)); } -static inline void apir_decode_ggml_status(struct apir_decoder * dec, enum ggml_status * status) { +static inline void apir_decode_ggml_status(apir_decoder * dec, ggml_status * status) { apir_decoder_read(dec, sizeof(*status), status, sizeof(*status)); } /* virtgpu_shmem */ -static inline void apir_encode_virtgpu_shmem_res_id(struct apir_encoder * enc, uint32_t shmem_res_id) { +static inline void apir_encode_virtgpu_shmem_res_id(apir_encoder * enc, uint32_t shmem_res_id) { apir_encode_uint32_t(enc, &shmem_res_id); } -static inline void apir_decode_virtgpu_shmem_res_id(struct apir_decoder * dec, uint32_t * shmem_res_id) { +static inline void apir_decode_virtgpu_shmem_res_id(apir_decoder * dec, uint32_t * shmem_res_id) { apir_decode_uint32_t(dec, shmem_res_id); } @@ -127,13 +126,13 @@ static inline size_t apir_serialize_ggml_cgraph(ggml_cgraph * cgraph, std::vecto return cgraph_data.size(); } -static inline void apir_encode_cgraph_data(struct apir_encoder * enc, std::vector & cgraph_data) { +static inline void apir_encode_cgraph_data(apir_encoder * enc, std::vector & cgraph_data) { size_t cgraph_size = cgraph_data.size(); apir_encode(enc, cgraph_size, cgraph_data.data(), cgraph_size); } -static inline ggml_cgraph * apir_decode_ggml_cgraph(struct apir_decoder * dec, size_t cgraph_size) { +static inline ggml_cgraph * apir_decode_ggml_cgraph(apir_decoder * dec, size_t cgraph_size) { UNUSED(cgraph_size); uint32_t n_nodes; @@ -147,22 +146,21 @@ static inline ggml_cgraph * apir_decode_ggml_cgraph(struct apir_decoder * dec, s return apir_deserialize_graph(n_nodes, n_tensors, tensors, nodes); } -static inline void apir_encode_ggml_buffer_handle(struct apir_encoder * enc, const apir_buffer_host_handle_t * handle) { +static inline void apir_encode_ggml_buffer_handle(apir_encoder * enc, const apir_buffer_host_handle_t * handle) { apir_encoder_write(enc, sizeof(*handle), &handle, sizeof(*handle)); } -static inline void apir_encode_ggml_tensor_inline(struct apir_encoder * enc, const ggml_tensor * tensor) { +static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml_tensor * tensor) { size_t tensor_size = sizeof(*tensor); if (tensor->extra) { - ERROR("Cannot pass tensors with extra"); - apir_encoder_set_fatal(enc); + GGML_ABORT("Cannot pass tensors with extra"); } if (tensor->src[0] && tensor->buffer) { static int first = 1; if (first) { - WARNING("Cannot pass tensors with src and buffer"); + GGML_LOG_WARN("Cannot pass tensors with src and buffer"); first = 0; } } @@ -188,7 +186,7 @@ static inline void apir_encode_ggml_tensor_inline(struct apir_encoder * enc, con } } -static inline const ggml_tensor * apir_decode_ggml_tensor_inplace(struct apir_decoder * dec) { +static inline const ggml_tensor * apir_decode_ggml_tensor_inplace(apir_decoder * dec) { // it safe to remove the `const` qualifier here, we *do* want to // modify the shared memory data to fix the `src` pointers. ggml_tensor * tensor = (ggml_tensor *) (uintptr_t) apir_decoder_use_inplace(dec, sizeof(ggml_tensor)); diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h index e40d9e8cdce..f6817989528 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h @@ -41,12 +41,12 @@ void apir_add_tensor(ggml_tensor * tensor, std::vector & tensors, std::unordered_set & visited); -ggml_tensor * apir_deserialize_tensor(struct ggml_context * ctx, const apir_rpc_tensor * tensor); +ggml_tensor * apir_deserialize_tensor(ggml_context * ctx, const apir_rpc_tensor * tensor); ggml_tensor * apir_create_node(uint64_t id, - struct ggml_context * ctx, + ggml_context * ctx, const std::unordered_map & tensor_ptrs, - std::unordered_map & tensor_map); + std::unordered_map & tensor_map); ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes, uint32_t n_tensors, diff --git a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp b/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp index c6e0c522782..2b01ffd3e67 100644 --- a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp +++ b/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp @@ -36,7 +36,7 @@ apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) { result.data = reinterpret_cast(tensor->data); if (tensor->data) { if (!tensor->buffer) { - FATAL("tensor has data but not buffer :/"); + GGML_ABORT("tensor has data but not buffer :/"); } // tensor->data is serialized as an offset to the buffer base address result.data -= reinterpret_cast(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base); diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp index 5e696ca49e7..73df9ea9165 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp @@ -2,12 +2,12 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { - struct virtgpu * gpu = BUFT_TO_GPU(buft); + virtgpu * gpu = BUFT_TO_GPU(buft); - struct ggml_backend_remoting_buffer_context * context = - (struct ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + ggml_backend_remoting_buffer_context * context = + (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); if (!context) { - FATAL("Couldn't allocate the buffer context ..."); + GGML_ABORT("Couldn't allocate the buffer context ..."); } context->gpu = gpu; @@ -33,13 +33,13 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml } static const char * ggml_backend_remoting_buffer_type_get_name(ggml_backend_buffer_type_t buft) { - struct virtgpu * gpu = BUFT_TO_GPU(buft); + virtgpu * gpu = BUFT_TO_GPU(buft); return apir_buffer_type_get_name(gpu, buft); } static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { - struct virtgpu * gpu = BUFT_TO_GPU(buft); + virtgpu * gpu = BUFT_TO_GPU(buft); static size_t align = 0; @@ -51,7 +51,7 @@ static size_t ggml_backend_remoting_buffer_type_get_alignment(ggml_backend_buffe } static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { - struct virtgpu * gpu = BUFT_TO_GPU(buft); + virtgpu * gpu = BUFT_TO_GPU(buft); static size_t max_size = 0; if (max_size == 0) { @@ -62,14 +62,14 @@ static size_t ggml_backend_remoting_buffer_type_get_max_size(ggml_backend_buffer } static bool ggml_backend_remoting_buffer_type_is_host(ggml_backend_buffer_type_t buft) { - struct virtgpu * gpu = BUFT_TO_GPU(buft); + virtgpu * gpu = BUFT_TO_GPU(buft); return apir_buffer_type_is_host(gpu, buft); } static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { - struct virtgpu * gpu = BUFT_TO_GPU(buft); + virtgpu * gpu = BUFT_TO_GPU(buft); return apir_buffer_type_get_alloc_size(gpu, buft, tensor); } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp index 217a81f878b..c7fdb241de8 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp @@ -1,17 +1,10 @@ #include "ggml-remoting.h" -#define BUFFER_TO_GPU(name) ((struct ggml_backend_remoting_buffer_context *) (name)->context)->gpu - -struct timer_data get_tensor_timer = { 0, 0, 0, "get_tensor" }; -struct timer_data set_tensor_timer = { 0, 0, 0, "set_tensor" }; -struct timer_data cpy_tensor_timer = { 0, 0, 0, "cpy_tensor" }; - -struct timer_data get_tensor_from_ptr_timer = { 0, 0, 0, "get_tensor_from_ptr" }; -struct timer_data set_tensor_from_ptr_timer = { 0, 0, 0, "set_tensor_from_ptr" }; +#define BUFFER_TO_GPU(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->gpu static void * ggml_backend_remoting_buffer_get_base(ggml_backend_buffer_t buffer) { - struct ggml_backend_remoting_buffer_context * context = - (struct ggml_backend_remoting_buffer_context *) buffer->context; + ggml_backend_remoting_buffer_context * context = + (ggml_backend_remoting_buffer_context *) buffer->context; if (context->base) { return context->base; } @@ -26,9 +19,9 @@ static void ggml_backend_remoting_buffer_set_tensor(ggml_backend_buffer_t buffer const void * data, size_t offset, size_t size) { - struct virtgpu * gpu = BUFFER_TO_GPU(buffer); + virtgpu * gpu = BUFFER_TO_GPU(buffer); - struct ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); + ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); if (context->is_from_ptr) { memcpy((char *) tensor->data + offset, data, size); } else { @@ -43,8 +36,8 @@ static void ggml_backend_remoting_buffer_get_tensor(ggml_backend_buffer_t buffer void * data, size_t offset, size_t size) { - struct virtgpu * gpu = BUFFER_TO_GPU(buffer); - struct ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); + virtgpu * gpu = BUFFER_TO_GPU(buffer); + ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); if (context->is_from_ptr) { memcpy(data, (const char *) tensor->data + offset, size); } else { @@ -77,7 +70,7 @@ static void ggml_backend_remoting_buffer_get_tensor_from_ptr(ggml_backend_buffer static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) { - struct virtgpu * gpu = BUFFER_TO_GPU(buffer); + virtgpu * gpu = BUFFER_TO_GPU(buffer); bool ret = apir_buffer_cpy_tensor(gpu, BUFFER_TO_APIR_CONTEXT(buffer), src, dst); @@ -85,7 +78,7 @@ static bool ggml_backend_remoting_buffer_cpy_tensor(ggml_backend_buffer_t buffer } static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { - struct virtgpu * gpu = BUFFER_TO_GPU(buffer); + virtgpu * gpu = BUFFER_TO_GPU(buffer); apir_buffer_clear(gpu, BUFFER_TO_APIR_CONTEXT(buffer), value); @@ -93,11 +86,11 @@ static void ggml_backend_remoting_buffer_clear(ggml_backend_buffer_t buffer, uin } static void ggml_backend_remoting_buffer_free_buffer(ggml_backend_buffer_t buffer) { - struct virtgpu * gpu = BUFFER_TO_GPU(buffer); + virtgpu * gpu = BUFFER_TO_GPU(buffer); apir_buffer_free_buffer(gpu, BUFFER_TO_APIR_CONTEXT(buffer)); - struct ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); + ggml_backend_remoting_buffer_context * context = BUFFER_TO_GGML_CONTEXT(buffer); free(context); buffer->context = NULL; } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp index b8440ceb202..b93d7d31664 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp @@ -1,19 +1,19 @@ #include "ggml-remoting.h" static const char * ggml_backend_remoting_device_get_name(ggml_backend_dev_t dev) { - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); return apir_device_get_name(gpu); } static const char * ggml_backend_remoting_device_get_description(ggml_backend_dev_t dev) { - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); return apir_device_get_description(gpu); } static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_backend_dev_t dev) { - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); static enum ggml_backend_dev_type type; static bool has_type = false; @@ -26,7 +26,7 @@ static enum ggml_backend_dev_type ggml_backend_remoting_device_get_type(ggml_bac } static void ggml_backend_remoting_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); return apir_device_get_memory(gpu, free, total); } @@ -40,7 +40,7 @@ static bool ggml_backend_remoting_device_supports_op(ggml_backend_dev_t dev, con return true; #else - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); return apir_device_supports_op(gpu, op); #endif @@ -59,13 +59,13 @@ static bool ggml_backend_remoting_device_offload_op(ggml_backend_dev_t dev, cons return false; } -static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { +static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) { props->name = ggml_backend_remoting_device_get_name(dev); props->description = ggml_backend_remoting_device_get_description(dev); props->type = ggml_backend_remoting_device_get_type(dev); ggml_backend_remoting_device_get_memory(dev, &props->memory_free, &props->memory_total); - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); apir_device_get_props(gpu, &props->caps.async, &props->caps.host_buffer, &props->caps.buffer_from_host_ptr, &props->caps.events); @@ -75,11 +75,11 @@ static void ggml_backend_remoting_device_get_props(ggml_backend_dev_t dev, struc } ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_backend_dev_t dev) { - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); - static struct ggml_backend_buffer_type buft{ + static ggml_backend_buffer_type buft{ /* .iface = */ ggml_backend_remoting_buffer_type_interface, /* .device = */ dev, /* .context = */ (void *) ctx, @@ -89,11 +89,11 @@ ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_type(ggml_bac } static ggml_backend_buffer_type_t ggml_backend_remoting_device_get_buffer_from_ptr_type(ggml_backend_dev_t dev) { - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); apir_buffer_type_host_handle_t ctx = apir_device_get_buffer_type(gpu); - static struct ggml_backend_buffer_type buft{ + static ggml_backend_buffer_type buft{ /* .iface = */ ggml_backend_remoting_buffer_from_ptr_type_interface, /* .device = */ dev, /* .context = */ (void *) ctx, @@ -106,12 +106,12 @@ static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_b void * ptr, size_t size, size_t max_tensor_size) { - struct virtgpu * gpu = DEV_TO_GPU(dev); + virtgpu * gpu = DEV_TO_GPU(dev); - struct ggml_backend_remoting_buffer_context * context = - (struct ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + ggml_backend_remoting_buffer_context * context = + (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); if (!context) { - FATAL("Couldn't allocate the buffer context ..."); + GGML_ABORT("Couldn't allocate the buffer context ..."); } context->gpu = gpu; @@ -126,7 +126,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_b return buffer; } -const struct ggml_backend_device_i ggml_backend_remoting_device_interface = { +const ggml_backend_device_i ggml_backend_remoting_device_interface = { /* .get_name = */ ggml_backend_remoting_device_get_name, /* .get_description = */ ggml_backend_remoting_device_get_description, /* .get_memory = */ ggml_backend_remoting_device_get_memory, diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp index 663f26e20bd..75718bc1b14 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp @@ -3,8 +3,8 @@ #include #include -static struct virtgpu * apir_initialize() { - static struct virtgpu * apir_gpu_instance = NULL; +static virtgpu * apir_initialize() { + static virtgpu * apir_gpu_instance = NULL; static bool apir_initialized = false; if (apir_initialized) { @@ -13,7 +13,7 @@ static struct virtgpu * apir_initialize() { apir_gpu_instance = create_virtgpu(); if (!apir_gpu_instance) { - FATAL("failed to initialize the virtgpu :/"); + GGML_ABORT("failed to initialize the virtgpu :/"); } apir_initialized = true; @@ -22,9 +22,9 @@ static struct virtgpu * apir_initialize() { } static int ggml_backend_remoting_get_device_count() { - struct virtgpu * gpu = apir_initialize(); + virtgpu * gpu = apir_initialize(); if (!gpu) { - WARNING("apir_initialize failed :/"); + GGML_LOG_WARN("apir_initialize failed :/"); return 0; } @@ -46,13 +46,13 @@ ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device) { static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { if (devices.size() > 0) { - INFO("%s: already initialized", __func__); + GGML_LOG_INFO("%s: already initialized", __func__); return; } - struct virtgpu * gpu = apir_initialize(); + virtgpu * gpu = apir_initialize(); if (!gpu) { - FATAL("apir_initialize failed :/"); + GGML_LOG_ERROR("apir_initialize failed :/"); return; } @@ -95,33 +95,17 @@ static const char * ggml_backend_remoting_reg_get_name(ggml_backend_reg_t reg) { return GGML_REMOTING_FRONTEND_NAME; } -static const struct ggml_backend_reg_i ggml_backend_remoting_reg_i = { +static const ggml_backend_reg_i ggml_backend_remoting_reg_i = { /* .get_name = */ ggml_backend_remoting_reg_get_name, /* .get_device_count = */ ggml_backend_remoting_reg_get_device_count, /* .get_device = */ ggml_backend_remoting_reg_get_device, /* .get_proc_address = */ NULL, }; -static void showTime() { - show_timer(&graph_compute_timer); - show_timer(&get_tensor_timer); - show_timer(&set_tensor_timer); - show_timer(&wait_host_reply_timer); - - if (get_tensor_from_ptr_timer.count) { - show_timer(&get_tensor_from_ptr_timer); - show_timer(&set_tensor_from_ptr_timer); - } - - if (cpy_tensor_timer.count) { - show_timer(&cpy_tensor_timer); - } -} - ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { - struct virtgpu * gpu = apir_initialize(); + virtgpu * gpu = apir_initialize(); if (!gpu) { - FATAL("apir_initialize failed :/"); + GGML_LOG_ERROR("apir_initialize failed :/"); return NULL; } @@ -139,10 +123,7 @@ ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { ggml_backend_remoting_reg_init_devices(®); - int cr = atexit(showTime); - GGML_ASSERT(cr == 0); - - MESSAGE("%s: initialzed", __func__); + GGML_LOG_INFO("%s: initialzed", __func__); return ® } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend.cpp index e400be2af0d..a0f1f7ec792 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend.cpp @@ -10,33 +10,21 @@ static void ggml_backend_remoting_free(ggml_backend_t backend) { delete backend; } -struct timer_data graph_compute_timer = { 0, 0, 0, "compute_timer" }; - static ggml_status ggml_backend_remoting_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { - struct virtgpu * gpu = DEV_TO_GPU(backend->device); - - start_timer(&graph_compute_timer); - - ggml_status status = apir_backend_graph_compute(gpu, cgraph); - - stop_timer(&graph_compute_timer); + virtgpu * gpu = DEV_TO_GPU(backend->device); - return status; + return apir_backend_graph_compute(gpu, cgraph); } static void ggml_backend_remoting_graph_optimize(ggml_backend_t backend, ggml_cgraph * cgraph) { - struct virtgpu * gpu = DEV_TO_GPU(backend->device); + virtgpu * gpu = DEV_TO_GPU(backend->device); #if true UNUSED(gpu); UNUSED(cgraph); - - // not working yet #else - start_timer(&graph_compute_timer); + // not working yet apir_backend_graph_optimize(gpu, cgraph); - - stop_timer(&graph_compute_timer); #endif } diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting.h b/ggml/src/ggml-remotingfrontend/ggml-remoting.h index 0683fec8424..3027fafdfba 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-remoting.h +++ b/ggml/src/ggml-remotingfrontend/ggml-remoting.h @@ -14,34 +14,34 @@ #define USE_ALWAYS_TRUE_SUPPORTS_OP 1 #define USE_METAL_GUEST_SUPPORTS_OP 0 -#define DEV_TO_GPU(name) ((struct ggml_backend_remoting_device_context *) (name)->context)->gpu +#define DEV_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->context)->gpu -#define BUFFER_TO_GGML_CONTEXT(name) ((struct ggml_backend_remoting_buffer_context *) (name)->context) +#define BUFFER_TO_GGML_CONTEXT(name) ((ggml_backend_remoting_buffer_context *) (name)->context) -#define BUFFER_TO_APIR_CONTEXT(name) &((struct ggml_backend_remoting_buffer_context *) (name)->context)->apir_context +#define BUFFER_TO_APIR_CONTEXT(name) &((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context #define BUFFER_TO_HOST_HANDLE(name) \ - ((struct ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle + ((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle #define GET_DEVICE_CONTEXT() \ - (struct ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context + (ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context -#define BUFT_TO_GPU(name) ((struct ggml_backend_remoting_device_context *) (name)->device->context)->gpu +#define BUFT_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->device->context)->gpu struct ggml_backend_remoting_device_context { size_t device; std::string name; std::string description; - std::vector> shared_memory; + std::vector> shared_memory; - struct virtgpu * gpu; + virtgpu * gpu; }; struct ggml_backend_remoting_buffer_context { apir_buffer_context_t apir_context; - struct virtgpu * gpu; + virtgpu * gpu; void * base; @@ -49,7 +49,7 @@ struct ggml_backend_remoting_buffer_context { }; extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface; -extern const struct ggml_backend_device_i ggml_backend_remoting_device_interface; +extern const ggml_backend_device_i ggml_backend_remoting_device_interface; extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface; extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface; extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface; diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-apir.h b/ggml/src/ggml-remotingfrontend/virtgpu-apir.h index bad4b1e3ffb..7f16844bf7b 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-apir.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-apir.h @@ -5,11 +5,11 @@ #include "virtgpu-shm.h" #include "virtgpu-utils.h" -typedef struct { +struct apir_buffer_context_t { apir_buffer_host_handle_t host_handle; struct virtgpu_shmem shmem; apir_buffer_type_host_handle_t buft_host_handle; -} apir_buffer_context_t; +}; #include "virtgpu-forward.gen.h" diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp index 2bcf91d3916..39038221a2e 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp @@ -1,14 +1,14 @@ #include "virtgpu-forward-impl.h" static long long current_time_ms() { - struct timespec ts; + timespec ts; clock_gettime(CLOCK_REALTIME, &ts); // Use CLOCK_MONOTONIC for elapsed time return (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; } -ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgraph) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE); @@ -16,14 +16,14 @@ ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgrap std::vector cgraph_data; size_t cgraph_size = apir_serialize_ggml_cgraph(cgraph, cgraph_data); - struct virtgpu_shmem temp_shmem; // Local storage for large buffers - struct virtgpu_shmem * shmem = &temp_shmem; + virtgpu_shmem temp_shmem; // Local storage for large buffers + virtgpu_shmem * shmem = &temp_shmem; if (cgraph_size <= gpu->data_shmem.mmap_size) { // prefer the init-time allocated page, if large enough shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { - FATAL("Couldn't allocate the guest-host shared buffer :/"); + GGML_ABORT("Couldn't allocate the guest-host shared buffer :/"); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); @@ -31,7 +31,7 @@ ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgrap apir_encode_size_t(encoder, &cgraph_size); char * shmem_data = (char *) shmem->mmap_ptr; - struct apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size); + apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size); apir_encode_cgraph_data(&secondary_enc, cgraph_data); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp index c8195e3ef75..a5c4302ce4b 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp @@ -1,8 +1,8 @@ #include "virtgpu-forward-impl.h" -const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME); @@ -14,7 +14,7 @@ const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer const size_t string_size = apir_decode_array_size_unchecked(decoder); char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - ERROR("%s: Could not allocate the device name buffer", __func__); + GGML_LOG_ERROR("%s: Could not allocate the device name buffer", __func__); apir_decoder_set_fatal(decoder); } apir_decode_char_array(decoder, string, string_size); @@ -24,9 +24,9 @@ const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer return string; } -size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT); @@ -43,9 +43,9 @@ size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_ return alignment; } -size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE); @@ -62,9 +62,9 @@ size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_t return max_size; } -bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST); @@ -81,11 +81,11 @@ bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t b return is_host; } -apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, +apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; apir_buffer_context_t buffer_context; @@ -105,9 +105,9 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * g return buffer_context; } -size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp index e1d79331a14..472da7f61c3 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp @@ -1,8 +1,8 @@ #include "virtgpu-forward-impl.h" -void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +void * apir_buffer_get_base(virtgpu * gpu, apir_buffer_context_t * buffer_context) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_BASE); @@ -19,14 +19,14 @@ void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer return (void *) base; } -void apir_buffer_set_tensor(struct virtgpu * gpu, +void apir_buffer_set_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_SET_TENSOR); @@ -34,15 +34,15 @@ void apir_buffer_set_tensor(struct virtgpu * gpu, apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); apir_encode_ggml_tensor(encoder, tensor); - struct virtgpu_shmem temp_shmem; // Local storage for large buffers - struct virtgpu_shmem * shmem = &temp_shmem; + virtgpu_shmem temp_shmem; // Local storage for large buffers + virtgpu_shmem * shmem = &temp_shmem; if (size <= gpu->data_shmem.mmap_size) { // prefer the init-time allocated page, if large enough shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { - FATAL("Couldn't allocate the guest-host shared buffer :/"); + GGML_ABORT("Couldn't allocate the guest-host shared buffer :/"); } memcpy(shmem->mmap_ptr, data, size); @@ -64,7 +64,7 @@ void apir_buffer_set_tensor(struct virtgpu * gpu, #if false void -apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_context, +apir_buffer_get_tensor(virtgpu *gpu, apir_buffer_context_t *buffer_context, const ggml_tensor *tensor, void *data, size_t offset, size_t size) { UNUSED(gpu); UNUSED(tensor); @@ -73,14 +73,14 @@ apir_buffer_get_tensor(struct virtgpu *gpu, apir_buffer_context_t *buffer_contex memcpy(data, buffer_base_addr+offset, size); } #else -void apir_buffer_get_tensor(struct virtgpu * gpu, +void apir_buffer_get_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_TENSOR); @@ -88,15 +88,15 @@ void apir_buffer_get_tensor(struct virtgpu * gpu, apir_encode_apir_buffer_host_handle_t(encoder, &buffer_context->host_handle); apir_encode_ggml_tensor(encoder, tensor); - struct virtgpu_shmem temp_shmem; // Local storage for large buffers - struct virtgpu_shmem * shmem = &temp_shmem; + virtgpu_shmem temp_shmem; // Local storage for large buffers + virtgpu_shmem * shmem = &temp_shmem; if (size <= gpu->data_shmem.mmap_size) { // prefer the init-time allocated page, if large enough shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { - FATAL("Couldn't allocate the guest-host shared buffer :/"); + GGML_ABORT("Couldn't allocate the guest-host shared buffer :/"); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); @@ -115,12 +115,12 @@ void apir_buffer_get_tensor(struct virtgpu * gpu, } #endif -bool apir_buffer_cpy_tensor(struct virtgpu * gpu, +bool apir_buffer_cpy_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * src, const ggml_tensor * dst) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR); @@ -139,9 +139,9 @@ bool apir_buffer_cpy_tensor(struct virtgpu * gpu, return ret_val; } -void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +void apir_buffer_clear(virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CLEAR); @@ -154,9 +154,9 @@ void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_cont remote_call_finish(gpu, encoder, decoder); } -void apir_buffer_free_buffer(struct virtgpu * gpu, apir_buffer_context_t * buffer_context) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +void apir_buffer_free_buffer(virtgpu * gpu, apir_buffer_context_t * buffer_context) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp index e01893ce28a..c88a638227e 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp @@ -1,14 +1,14 @@ #include "virtgpu-forward-impl.h" #include "virtgpu-shm.h" -int apir_device_get_count(struct virtgpu * gpu) { +int apir_device_get_count(virtgpu * gpu) { static int32_t dev_count = -1; if (dev_count != -1) { return dev_count; } - struct apir_encoder * encoder; - struct apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT); @@ -21,13 +21,13 @@ int apir_device_get_count(struct virtgpu * gpu) { return dev_count; } -const char * apir_device_get_name(struct virtgpu * gpu) { +const char * apir_device_get_name(virtgpu * gpu) { static char * string = nullptr; if (string) { return string; } - struct apir_encoder * encoder; - struct apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_NAME); @@ -36,8 +36,8 @@ const char * apir_device_get_name(struct virtgpu * gpu) { const size_t string_size = apir_decode_array_size_unchecked(decoder); string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - ERROR("%s: Could not allocate the device name buffer", __func__); - apir_decoder_set_fatal(decoder); + GGML_LOG_ERROR("%s: Could not allocate the device name buffer", __func__); + return NULL; } apir_decode_char_array(decoder, string, string_size); @@ -46,9 +46,9 @@ const char * apir_device_get_name(struct virtgpu * gpu) { return string; } -const char * apir_device_get_description(struct virtgpu * gpu) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +const char * apir_device_get_description(virtgpu * gpu) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION); @@ -58,8 +58,7 @@ const char * apir_device_get_description(struct virtgpu * gpu) { const size_t string_size = apir_decode_array_size_unchecked(decoder); char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - ERROR("%s: Could not allocate the device description buffer", __func__); - apir_decoder_set_fatal(decoder); + GGML_LOG_ERROR("%s: Could not allocate the device description buffer", __func__); return NULL; } @@ -70,14 +69,14 @@ const char * apir_device_get_description(struct virtgpu * gpu) { return string; } -uint32_t apir_device_get_type(struct virtgpu * gpu) { +uint32_t apir_device_get_type(virtgpu * gpu) { static uint32_t dev_type = 255; if (dev_type != 255) { return dev_type; } - struct apir_encoder * encoder; - struct apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_TYPE); @@ -91,11 +90,11 @@ uint32_t apir_device_get_type(struct virtgpu * gpu) { return dev_type; } -void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total) { +void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total) { static size_t dev_free = 0; static size_t dev_total = 0; - struct apir_encoder * encoder; - struct apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_MEMORY); @@ -113,9 +112,9 @@ void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total) return; } -bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP); @@ -132,9 +131,9 @@ bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op) { return supports_op; } -apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +apir_buffer_type_host_handle_t apir_device_get_buffer_type(virtgpu * gpu) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE); @@ -149,13 +148,13 @@ apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu) return buft_handle; } -void apir_device_get_props(struct virtgpu * gpu, - bool * async, - bool * host_buffer, - bool * buffer_from_host_ptr, - bool * events) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +void apir_device_get_props(virtgpu * gpu, + bool * async, + bool * host_buffer, + bool * buffer_from_host_ptr, + bool * events) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_PROPS); @@ -172,9 +171,9 @@ void apir_device_get_props(struct virtgpu * gpu, return; } -apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, size_t max_tensor_size) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; apir_buffer_context_t buffer_context; @@ -182,7 +181,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t s REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR); if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) { - FATAL("Couldn't allocate the guest-host shared buffer :/"); + GGML_ABORT("Couldn't allocate the guest-host shared buffer :/"); } apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h index 237cb3890ec..b1e00841966 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h @@ -6,24 +6,24 @@ #include "ggml-backend-impl.h" -#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ - do { \ - int32_t forward_flag = (int32_t) apir_command_type__; \ +#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ + do { \ + int32_t forward_flag = (int32_t) apir_command_type__; \ encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_Forward, forward_flag); \ - if (!encoder_name) { \ - FATAL("%s: failed to prepare the remote call encoder :/", __func__); \ - } \ + if (!encoder_name) { \ + GGML_ABORT("%s: failed to prepare the remote call encoder :/", __func__); \ + } \ } while (0) -#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ - do { \ - ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ - if (!decoder_name) { \ - FATAL("%s: failed to kick the remote call :/", __func__); \ - } \ - if (ret_name < APIR_FORWARD_BASE_INDEX) { \ - FATAL("%s: failed to forward the API call: %s: code %d", __func__, apir_forward_error(ret_name), \ - ret_name); \ - } \ - ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ +#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ + do { \ + ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ + if (!decoder_name) { \ + GGML_ABORT("%s: failed to kick the remote call :/", __func__); \ + } \ + if (ret_name < APIR_FORWARD_BASE_INDEX) { \ + GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__, apir_forward_error(ret_name), \ + ret_name); \ + } \ + ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ } while (0) diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h index c27c07f0865..f02d586a342 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h @@ -1,51 +1,51 @@ #pragma once /* device */ -void apir_device_get_device_count(struct virtgpu * gpu); -int apir_device_get_count(struct virtgpu * gpu); -const char * apir_device_get_name(struct virtgpu * gpu); -const char * apir_device_get_description(struct virtgpu * gpu); -uint32_t apir_device_get_type(struct virtgpu * gpu); -void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total); -bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op); -apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu); -void apir_device_get_props(struct virtgpu * gpu, +void apir_device_get_device_count(virtgpu * gpu); +int apir_device_get_count(virtgpu * gpu); +const char * apir_device_get_name(virtgpu * gpu); +const char * apir_device_get_description(virtgpu * gpu); +uint32_t apir_device_get_type(virtgpu * gpu); +void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total); +bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op); +apir_buffer_type_host_handle_t apir_device_get_buffer_type(virtgpu * gpu); +void apir_device_get_props(virtgpu * gpu, bool * async, bool * host_buffer, bool * buffer_from_host_ptr, bool * events); -apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size); +apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, size_t max_tensor_size); /* buffer-type */ -const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); -apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, +const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft); +size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft); +size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft); +bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft); +apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buffer_buft, size_t size); -size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op); +size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op); /* buffer */ -void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); -void apir_buffer_set_tensor(struct virtgpu * gpu, +void * apir_buffer_get_base(virtgpu * gpu, apir_buffer_context_t * buffer_context); +void apir_buffer_set_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, ggml_tensor * tensor, const void * data, size_t offset, size_t size); -void apir_buffer_get_tensor(struct virtgpu * gpu, +void apir_buffer_get_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * tensor, void * data, size_t offset, size_t size); -bool apir_buffer_cpy_tensor(struct virtgpu * gpu, +bool apir_buffer_cpy_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * src, const ggml_tensor * dst); -void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value); -void apir_buffer_free_buffer(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); +void apir_buffer_clear(virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value); +void apir_buffer_free_buffer(virtgpu * gpu, apir_buffer_context_t * buffer_context); /* backend */ -ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgraph); +ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp index 22d9b668002..28ef1dd4595 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp @@ -4,17 +4,17 @@ #include -static uint32_t virtgpu_ioctl_resource_create_blob(struct virtgpu * gpu, - uint32_t blob_mem, - uint32_t blob_flags, - size_t blob_size, - uint64_t blob_id, - uint32_t * res_id) { +static uint32_t virtgpu_ioctl_resource_create_blob(virtgpu * gpu, + uint32_t blob_mem, + uint32_t blob_flags, + size_t blob_size, + uint64_t blob_id, + uint32_t * res_id) { #ifdef SIMULATE_BO_SIZE_FIX blob_size = align64(blob_size, 4096); #endif - struct drm_virtgpu_resource_create_blob args = { + drm_virtgpu_resource_create_blob args = { .blob_mem = blob_mem, .blob_flags = blob_flags, .bo_handle = 0, @@ -34,8 +34,8 @@ static uint32_t virtgpu_ioctl_resource_create_blob(struct virtgpu * gpu, return args.bo_handle; } -static void virtgpu_ioctl_gem_close(struct virtgpu * gpu, uint32_t gem_handle) { - struct drm_gem_close args = { +static void virtgpu_ioctl_gem_close(virtgpu * gpu, uint32_t gem_handle) { + drm_gem_close args = { .handle = gem_handle, .pad = 0, }; @@ -47,8 +47,8 @@ static void virtgpu_ioctl_gem_close(struct virtgpu * gpu, uint32_t gem_handle) { #endif } -static void * virtgpu_ioctl_map(struct virtgpu * gpu, uint32_t gem_handle, size_t size) { - struct drm_virtgpu_map args = { +static void * virtgpu_ioctl_map(virtgpu * gpu, uint32_t gem_handle, size_t size) { + drm_virtgpu_map args = { .offset = 0, .handle = gem_handle, .pad = 0, @@ -66,12 +66,12 @@ static void * virtgpu_ioctl_map(struct virtgpu * gpu, uint32_t gem_handle, size_ return ptr; } -void virtgpu_shmem_destroy(struct virtgpu * gpu, struct virtgpu_shmem * shmem) { +void virtgpu_shmem_destroy(virtgpu * gpu, virtgpu_shmem * shmem) { munmap(shmem->mmap_ptr, shmem->mmap_size); virtgpu_ioctl_gem_close(gpu, shmem->gem_handle); } -int virtgpu_shmem_create(struct virtgpu * gpu, size_t size, struct virtgpu_shmem * shmem) { +int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem) { size = align64(size, 16384); uint32_t res_id; diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.h b/ggml/src/ggml-remotingfrontend/virtgpu-shm.h index bc890c6717c..606860a0946 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-shm.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-shm.h @@ -19,5 +19,5 @@ struct virtgpu_shmem { uint32_t gem_handle; }; -int virtgpu_shmem_create(struct virtgpu * gpu, size_t size, struct virtgpu_shmem * shmem); -void virtgpu_shmem_destroy(struct virtgpu * gpu, struct virtgpu_shmem * shmem); +int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem); +void virtgpu_shmem_destroy(virtgpu * gpu, virtgpu_shmem * shmem); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp index 80046fe2688..8a2805e9902 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp @@ -46,7 +46,7 @@ static inline uint64_t util_logbase2_64(uint64_t n) { #endif } -void util_sparse_array_init(struct util_sparse_array * arr, size_t elem_size, size_t node_size) { +void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size) { memset(arr, 0, sizeof(*arr)); arr->elem_size = elem_size; arr->node_size_log2 = util_logbase2_64(node_size); @@ -70,7 +70,7 @@ static inline unsigned _util_sparse_array_node_level(uintptr_t handle) { return handle & NODE_LEVEL_MASK; } -static inline void _util_sparse_array_node_finish(struct util_sparse_array * arr, uintptr_t node) { +static inline void _util_sparse_array_node_finish(util_sparse_array * arr, uintptr_t node) { if (_util_sparse_array_node_level(node) > 0) { uintptr_t * children = (uintptr_t *) _util_sparse_array_node_data(node); size_t node_size = 1ull << arr->node_size_log2; @@ -91,7 +91,7 @@ static inline uintptr_t _util_sparse_array_node(void * data, unsigned level) { return (uintptr_t) data | level; } -inline uintptr_t _util_sparse_array_node_alloc(struct util_sparse_array * arr, unsigned level) { +inline uintptr_t _util_sparse_array_node_alloc(util_sparse_array * arr, unsigned level) { size_t size; if (level == 0) { size = arr->elem_size << arr->node_size_log2; @@ -119,7 +119,7 @@ static inline uintptr_t _util_sparse_array_set_or_free_node(uintptr_t * node_ptr } } -void * util_sparse_array_get(struct util_sparse_array * arr, uint64_t idx) { +void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx) { const unsigned node_size_log2 = arr->node_size_log2; uintptr_t root = p_atomic_read(&arr->root); if (unlikely(!root)) { diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.h b/ggml/src/ggml-remotingfrontend/virtgpu-utils.h index c2bcd9589f6..e6e8ff73d8e 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-utils.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-utils.h @@ -28,65 +28,6 @@ #define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE) -void thks_bye(); -void breakpoint(); - -#ifndef NDEBUG -inline void INFO(const char * format, ...) { - fprintf(stderr, "INFO: "); - - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); -} -#else -inline void INFO(...) {} -#endif - -inline void MESSAGE(const char * format, ...) { - fprintf(stderr, "APIR: "); - - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); -} - -inline void WARNING(const char * format, ...) { - fprintf(stderr, "WARNING: "); - - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); -} - -inline void ERROR(const char * format, ...) { - fprintf(stderr, "ERROR: "); - - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); -} - -inline void FATAL(const char * format, ...) { - fprintf(stderr, "FATAL: "); - - va_list argptr; - va_start(argptr, format); - vfprintf(stderr, format, argptr); - fprintf(stderr, "\n"); - va_end(argptr); - - abort(); -} - static inline bool util_is_power_of_two_nonzero64(uint64_t v) { return IS_POT_NONZERO(v); } @@ -97,8 +38,8 @@ static inline uint64_t align64(uint64_t value, uint64_t alignment) { } struct list_head { - struct list_head * prev; - struct list_head * next; + list_head * prev; + list_head * next; }; struct util_sparse_array { @@ -108,13 +49,38 @@ struct util_sparse_array { uintptr_t root; }; -void * util_sparse_array_get(struct util_sparse_array * arr, uint64_t idx); -void util_sparse_array_init(struct util_sparse_array * arr, size_t elem_size, size_t node_size); +void * util_sparse_array_get(util_sparse_array * arr, uint64_t idx); +void util_sparse_array_init(util_sparse_array * arr, size_t elem_size, size_t node_size); inline void os_time_sleep(int64_t usecs) { - struct timespec time; + timespec time; time.tv_sec = usecs / 1000000; time.tv_nsec = (usecs % 1000000) * 1000; while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR) ; } + +struct timer_data { + long long start; + long long total; + long long count; +}; + +static inline void start_timer(timer_data * timer) { + timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + timer->start = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; +} + +// returns the duration in ns +static inline long long stop_timer(timer_data * timer) { + timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + long long timer_end = (long long) ts.tv_sec * 1000000000LL + ts.tv_nsec; + + long long duration = (timer_end - timer->start); + timer->total += duration; + timer->count += 1; + + return duration; +} diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-remotingfrontend/virtgpu.cpp index f43cd6fdd5f..1149fa72f4a 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu.cpp @@ -7,35 +7,33 @@ #include #include -static virt_gpu_result_t virtgpu_open_device(struct virtgpu * gpu, const drmDevicePtr dev); -static virt_gpu_result_t virtgpu_open(struct virtgpu * gpu); +static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr dev); +static virt_gpu_result_t virtgpu_open(virtgpu * gpu); -static virt_gpu_result_t virtgpu_init_capset(struct virtgpu * gpu); -static virt_gpu_result_t virtgpu_init_context(struct virtgpu * gpu); +static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu); +static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu); -static int virtgpu_ioctl_context_init(struct virtgpu * gpu, enum virgl_renderer_capset capset_id); -static int virtgpu_ioctl_get_caps(struct virtgpu * gpu, - enum virgl_renderer_capset id, - uint32_t version, - void * capset, - size_t capset_size); -static uint64_t virtgpu_ioctl_getparam(struct virtgpu * gpu, uint64_t param); -static void virtgpu_init_renderer_info(struct virtgpu * gpu); - -struct timer_data wait_host_reply_timer = { 0, 0, 0, "wait_host_reply" }; +static int virtgpu_ioctl_context_init(virtgpu * gpu, virgl_renderer_capset capset_id); +static int virtgpu_ioctl_get_caps(virtgpu * gpu, + virgl_renderer_capset id, + uint32_t version, + void * capset, + size_t capset_size); +static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param); +static void virtgpu_init_renderer_info(virtgpu * gpu); static void log_call_duration(long long call_duration_ns, const char * name); const uint64_t APIR_HANDSHAKE_MAX_WAIT_MS = 2 * 1000; // 2s const uint64_t APIR_LOADLIBRARY_MAX_WAIT_MS = 60 * 1000; // 60s -static int virtgpu_handshake(struct virtgpu * gpu) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +static int virtgpu_handshake(virtgpu * gpu) { + apir_encoder * encoder; + apir_decoder * decoder; encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HandShake, 0); if (!encoder) { - FATAL("%s: failed to prepare the remote call encoder :/", __func__); + GGML_ABORT("%s: failed to prepare the remote call encoder :/", __func__); return 1; } @@ -54,7 +52,7 @@ static int virtgpu_handshake(struct virtgpu * gpu) { log_call_duration(call_duration_ns, "API Remoting handshake"); if (!decoder) { - FATAL( + GGML_ABORT( "%s: failed to initiate the communication with the virglrenderer library. " "Most likely, the wrong virglrenderer library was loaded in the hypervisor.", __func__); @@ -67,8 +65,8 @@ static int virtgpu_handshake(struct virtgpu * gpu) { uint32_t host_minor; if (ret_magic != APIR_HANDSHAKE_MAGIC) { - FATAL("%s: handshake with the virglrenderer failed (code=%d | %s):/", __func__, ret_magic, - apir_backend_initialize_error(ret_magic)); + GGML_ABORT("%s: handshake with the virglrenderer failed (code=%d | %s):/", __func__, ret_magic, + apir_backend_initialize_error(ret_magic)); } else { apir_decode_uint32_t(decoder, &host_major); apir_decode_uint32_t(decoder, &host_minor); @@ -80,26 +78,26 @@ static int virtgpu_handshake(struct virtgpu * gpu) { return 1; } - INFO("%s: Guest is running with %u.%u", __func__, guest_major, guest_minor); - INFO("%s: Host is running with %u.%u", __func__, host_major, host_minor); + GGML_LOG_INFO("%s: Guest is running with %u.%u", __func__, guest_major, guest_minor); + GGML_LOG_INFO("%s: Host is running with %u.%u", __func__, host_major, host_minor); if (guest_major != host_major) { - ERROR("Host major (%d) and guest major (%d) version differ", host_major, guest_major); + GGML_LOG_ERROR("Host major (%d) and guest major (%d) version differ", host_major, guest_major); } else if (guest_minor != host_minor) { - WARNING("Host minor (%d) and guest minor (%d) version differ", host_minor, guest_minor); + GGML_LOG_WARN("Host minor (%d) and guest minor (%d) version differ", host_minor, guest_minor); } return 0; } -static ApirLoadLibraryReturnCode virtgpu_load_library(struct virtgpu * gpu) { - struct apir_encoder * encoder; - struct apir_decoder * decoder; +static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { + apir_encoder * encoder; + apir_decoder * decoder; ApirLoadLibraryReturnCode ret; encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LoadLibrary, 0); if (!encoder) { - FATAL("%s: hypercall error: failed to prepare the remote call encoder :/", __func__); + GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder :/", __func__); return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; } @@ -110,14 +108,14 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(struct virtgpu * gpu) { log_call_duration(call_duration_ns, "API Remoting LoadLibrary"); if (!decoder) { - FATAL("%s: hypercall error: failed to kick the API remoting hypercall. :/", __func__); + GGML_ABORT("%s: hypercall error: failed to kick the API remoting hypercall. :/", __func__); return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; } remote_call_finish(gpu, encoder, decoder); if (ret == APIR_LOAD_LIBRARY_SUCCESS) { - INFO("%s: The API Remoting backend was successfully loaded and initialized", __func__); + GGML_LOG_INFO("%s: The API Remoting backend was successfully loaded and initialized", __func__); return ret; } @@ -125,76 +123,76 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(struct virtgpu * gpu) { // something wrong happened, find out what. if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { - FATAL("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__, + GGML_ABORT("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__, apir_load_library_error(ret), ret); return ret; } - INFO("%s: virglrenderer successfully loaded the API Remoting backend library", __func__); + GGML_LOG_INFO("%s: virglrenderer successfully loaded the API Remoting backend library", __func__); ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX); if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { - FATAL("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s):/", __func__, + GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s):/", __func__, apir_ret, apir_load_library_error(apir_ret)); } else { uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX; - FATAL("%s: the API Remoting backend library initialize its backend library: apir code=%d):/", __func__, + GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d):/", __func__, lib_ret); } return ret; } -struct virtgpu * create_virtgpu() { - struct virtgpu * gpu = new struct virtgpu(); +virtgpu * create_virtgpu() { + virtgpu * gpu = new virtgpu(); gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr; - util_sparse_array_init(&gpu->shmem_array, sizeof(struct virtgpu_shmem), 1024); + util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024); if (virtgpu_open(gpu) != APIR_SUCCESS) { - FATAL("%s: failed to open the virtgpu device :/", __func__); + GGML_ABORT("%s: failed to open the virtgpu device :/", __func__); return NULL; } if (virtgpu_init_capset(gpu) != APIR_SUCCESS) { - FATAL("%s: failed to initialize the GPU capset :/", __func__); + GGML_ABORT("%s: failed to initialize the GPU capset :/", __func__); return NULL; } if (virtgpu_init_context(gpu) != APIR_SUCCESS) { - FATAL("%s: failed to initialize the GPU context :/", __func__); + GGML_ABORT("%s: failed to initialize the GPU context :/", __func__); return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) { - FATAL("%s: failed to create the shared reply memory pages :/", __func__); + GGML_ABORT("%s: failed to create the shared reply memory pages :/", __func__); return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) { - FATAL("%s: failed to create the shared data memory pages :/", __func__); + GGML_ABORT("%s: failed to create the shared data memory pages :/", __func__); return NULL; } if (virtgpu_handshake(gpu)) { - FATAL("%s: failed to handshake with the virglrenderer library :/", __func__); + GGML_ABORT("%s: failed to handshake with the virglrenderer library :/", __func__); return NULL; } if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) { - FATAL("%s: failed to load the backend library :/", __func__); + GGML_ABORT("%s: failed to load the backend library :/", __func__); return NULL; } return gpu; } -static virt_gpu_result_t virtgpu_open(struct virtgpu * gpu) { +static virt_gpu_result_t virtgpu_open(virtgpu * gpu) { drmDevicePtr devs[8]; int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs)); if (count < 0) { - ERROR("%s: failed to enumerate DRM devices", __func__); + GGML_LOG_ERROR("%s: failed to enumerate DRM devices", __func__); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -211,21 +209,21 @@ static virt_gpu_result_t virtgpu_open(struct virtgpu * gpu) { return result; } -static virt_gpu_result_t virtgpu_open_device(struct virtgpu * gpu, const drmDevicePtr dev) { +static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr dev) { const char * node_path = dev->nodes[DRM_NODE_RENDER]; int fd = open(node_path, O_RDWR | O_CLOEXEC); if (fd < 0) { - MESSAGE("failed to open %s", node_path); + GGML_ABORT("failed to open %s", node_path); return APIR_ERROR_INITIALIZATION_FAILED; } drmVersionPtr version = drmGetVersion(fd); if (!version || strcmp(version->name, "virtio_gpu") || version->version_major != 0) { if (version) { - MESSAGE("unknown DRM driver %s version %d", version->name, version->version_major); + GGML_ABORT("unknown DRM driver %s version %d", version->name, version->version_major); } else { - MESSAGE("failed to get DRM driver version"); + GGML_ABORT("failed to get DRM driver version"); } if (version) { @@ -239,28 +237,28 @@ static virt_gpu_result_t virtgpu_open_device(struct virtgpu * gpu, const drmDevi drmFreeVersion(version); - MESSAGE("using DRM device %s", node_path); + GGML_LOG_INFO("using DRM device %s", node_path); return APIR_SUCCESS; } -static virt_gpu_result_t virtgpu_init_context(struct virtgpu * gpu) { +static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) { assert(!gpu->capset.version); const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id); if (ret) { - MESSAGE("failed to initialize context: %s", strerror(errno)); + GGML_LOG_INFO("failed to initialize context: %s", strerror(errno)); return APIR_ERROR_INITIALIZATION_FAILED; } return APIR_SUCCESS; } -static virt_gpu_result_t virtgpu_init_capset(struct virtgpu * gpu) { +static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) { if (gpu->use_apir_capset) { - MESSAGE("Using the APIR capset"); + GGML_LOG_INFO("Using the APIR capset"); gpu->capset.id = VIRGL_RENDERER_CAPSET_APIR; } else { - MESSAGE("Using the Venus capset"); + GGML_LOG_INFO("Using the Venus capset"); gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS; } gpu->capset.version = 0; @@ -269,7 +267,7 @@ static virt_gpu_result_t virtgpu_init_capset(struct virtgpu * gpu) { virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data)); if (ret) { - MESSAGE("failed to get APIR v%d capset: %s", gpu->capset.version, strerror(errno)); + GGML_LOG_INFO("failed to get APIR v%d capset: %s", gpu->capset.version, strerror(errno)); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -278,8 +276,8 @@ static virt_gpu_result_t virtgpu_init_capset(struct virtgpu * gpu) { return APIR_SUCCESS; } -static int virtgpu_ioctl_context_init(struct virtgpu * gpu, enum virgl_renderer_capset capset_id) { - struct drm_virtgpu_context_set_param ctx_set_params[3] = { +static int virtgpu_ioctl_context_init(virtgpu * gpu, virgl_renderer_capset capset_id) { + drm_virtgpu_context_set_param ctx_set_params[3] = { { .param = VIRTGPU_CONTEXT_PARAM_CAPSET_ID, .value = capset_id, @@ -294,7 +292,7 @@ static int virtgpu_ioctl_context_init(struct virtgpu * gpu, enum virgl_renderer_ }, }; - struct drm_virtgpu_context_init args = { + drm_virtgpu_context_init args = { .num_params = ARRAY_SIZE(ctx_set_params), .pad = 0, .ctx_set_params = (uintptr_t) &ctx_set_params, @@ -303,12 +301,12 @@ static int virtgpu_ioctl_context_init(struct virtgpu * gpu, enum virgl_renderer_ return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_CONTEXT_INIT, &args); } -static int virtgpu_ioctl_get_caps(struct virtgpu * gpu, - enum virgl_renderer_capset id, - uint32_t version, - void * capset, - size_t capset_size) { - struct drm_virtgpu_get_caps args = { +static int virtgpu_ioctl_get_caps(virtgpu * gpu, + virgl_renderer_capset id, + uint32_t version, + void * capset, + size_t capset_size) { + drm_virtgpu_get_caps args = { .cap_set_id = id, .cap_set_ver = version, .addr = (uintptr_t) capset, @@ -319,10 +317,10 @@ static int virtgpu_ioctl_get_caps(struct virtgpu * gpu, return virtgpu_ioctl(gpu, DRM_IOCTL_VIRTGPU_GET_CAPS, &args); } -static uint64_t virtgpu_ioctl_getparam(struct virtgpu * gpu, uint64_t param) { +static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param) { /* val must be zeroed because kernel only writes the lower 32 bits */ uint64_t val = 0; - struct drm_virtgpu_getparam args = { + drm_virtgpu_getparam args = { .param = param, .value = (uintptr_t) &val, }; @@ -331,14 +329,14 @@ static uint64_t virtgpu_ioctl_getparam(struct virtgpu * gpu, uint64_t param) { return ret ? 0 : val; } -struct apir_encoder * remote_call_prepare(struct virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags) { +apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags) { /* * Prepare the command encoder and its buffer */ static char encoder_buffer[4096]; - static struct apir_encoder enc; + static apir_encoder enc; enc = { .cur = encoder_buffer, .start = encoder_buffer, @@ -368,31 +366,31 @@ struct apir_encoder * remote_call_prepare(struct virtgpu * gpu, ApirCommandType return &enc; } -void remote_call_finish(struct virtgpu * gpu, struct apir_encoder * enc, struct apir_decoder * dec) { +void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec) { UNUSED(gpu); if (!enc) { - ERROR("Invalid (null) encoder :/"); + GGML_LOG_ERROR("Invalid (null) encoder :/"); } if (!dec) { - ERROR("Invalid (null) decoder :/"); + GGML_LOG_ERROR("Invalid (null) decoder :/"); } if (apir_encoder_get_fatal(enc)) { - ERROR("Failed to encode the output parameters."); + GGML_LOG_ERROR("Failed to encode the output parameters."); } if (apir_decoder_get_fatal(dec)) { - ERROR("Failed to decode the input parameters."); + GGML_LOG_ERROR("Failed to decode the input parameters."); } } -uint32_t remote_call(struct virtgpu * gpu, - struct apir_encoder * encoder, - struct apir_decoder ** decoder, - float max_wait_ms, - long long * call_duration_ns) { +uint32_t remote_call(virtgpu * gpu, + apir_encoder * encoder, + apir_decoder ** decoder, + float max_wait_ms, + long long * call_duration_ns) { /* * Prepare the reply notification pointer */ @@ -404,7 +402,7 @@ uint32_t remote_call(struct virtgpu * gpu, * Trigger the execbuf ioctl */ - struct drm_virtgpu_execbuffer args = { + drm_virtgpu_execbuffer args = { .flags = VIRTGPU_EXECBUF_RING_IDX, .size = (uint32_t) (encoder->cur - encoder->start), .command = (uintptr_t) encoder->start, @@ -426,16 +424,17 @@ uint32_t remote_call(struct virtgpu * gpu, int ret = drmIoctl(gpu->fd, DRM_IOCTL_VIRTGPU_EXECBUFFER, &args); if (ret != 0) { - FATAL("%s: the virtgpu EXECBUFFER ioctl failed (%d) :/ \n", ret); + GGML_ABORT("%s: the virtgpu EXECBUFFER ioctl failed (%d)", __func__, ret); } /* * Wait for the response notification */ + timer_data wait_host_reply_timer = { 0, 0, 0 }; start_timer(&wait_host_reply_timer); - struct timespec ts_start, ts_end; + timespec ts_start, ts_end; clock_gettime(CLOCK_MONOTONIC, &ts_start); long long start_time = (long long) ts_start.tv_sec * 1000000000LL + ts_start.tv_nsec; @@ -469,14 +468,14 @@ uint32_t remote_call(struct virtgpu * gpu, } if (max_wait_ms && timedout) { - ERROR("timed out waiting for the host answer..."); + GGML_LOG_ERROR("timed out waiting for the host answer..."); return APIR_FORWARD_TIMEOUT; } /* * Prepare the decoder */ - static struct apir_decoder response_dec; + static apir_decoder response_dec; response_dec.cur = (char *) gpu->reply_shmem.mmap_ptr + sizeof(*atomic_reply_notif); response_dec.end = (char *) gpu->reply_shmem.mmap_ptr + gpu->reply_shmem.mmap_size; *decoder = &response_dec; @@ -491,10 +490,10 @@ static void log_call_duration(long long call_duration_ns, const char * name) { double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds if (call_duration_s > 1) { - MESSAGE("%s: waited %.2fs for the %s host reply...", __func__, call_duration_s, name); + GGML_LOG_INFO("%s: waited %.2fs for the %s host reply...", __func__, call_duration_s, name); } else if (call_duration_ms > 1) { - MESSAGE("%s: waited %.2fms for the %s host reply...", __func__, call_duration_ms, name); + GGML_LOG_INFO("%s: waited %.2fms for the %s host reply...", __func__, call_duration_ms, name); } else { - MESSAGE("%s: waited %lldns for the %s host reply...", __func__, call_duration_ns, name); + GGML_LOG_INFO("%s: waited %lldns for the %s host reply...", __func__, call_duration_ns, name); } } diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.h b/ggml/src/ggml-remotingfrontend/virtgpu.h index c8f432eaefe..9b570261753 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu.h @@ -47,45 +47,43 @@ enum virgl_renderer_capset { #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -typedef enum virt_gpu_result_t { +enum virt_gpu_result_t { APIR_SUCCESS = 0, APIR_ERROR_INITIALIZATION_FAILED = -1, -} virt_gpu_result_t; +}; #define PRINTFLIKE(f, a) __attribute__((format(__printf__, f, a))) struct virtgpu { - struct remoting_dev_instance * instance; - bool use_apir_capset; int fd; struct { - enum virgl_renderer_capset id; + virgl_renderer_capset id; uint32_t version; - struct virgl_renderer_capset_apir data; + virgl_renderer_capset_apir data; } capset; - struct util_sparse_array shmem_array; + util_sparse_array shmem_array; /* APIR communication pages */ - struct virtgpu_shmem reply_shmem; - struct virtgpu_shmem data_shmem; + virtgpu_shmem reply_shmem; + virtgpu_shmem data_shmem; }; -static inline int virtgpu_ioctl(struct virtgpu * gpu, unsigned long request, void * args) { +static inline int virtgpu_ioctl(virtgpu * gpu, unsigned long request, void * args) { return drmIoctl(gpu->fd, request, args); } -struct virtgpu * create_virtgpu(); +virtgpu * create_virtgpu(); -struct apir_encoder * remote_call_prepare(struct virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags); +apir_encoder * remote_call_prepare(virtgpu * gpu, ApirCommandType apir_cmd_type, int32_t cmd_flags); -uint32_t remote_call(struct virtgpu * gpu, - struct apir_encoder * enc, - struct apir_decoder ** dec, - float max_wait_ms, - long long * call_duration_ns); +uint32_t remote_call(virtgpu * gpu, + apir_encoder * enc, + apir_decoder ** dec, + float max_wait_ms, + long long * call_duration_ns); -void remote_call_finish(struct virtgpu * gpu, struct apir_encoder * enc, struct apir_decoder * dec); +void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec); From 3a9fcfb8f42cb982fb60baa6e8d761081154e644 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 12 Jan 2026 12:03:27 +0100 Subject: [PATCH 10/37] Update regenerate_remoting.py to launch clang-format Assisted-by-AI: Claude Code --- .../backend-dispatched.gen.h | 76 +++++-------------- .../regenerate_remoting.py | 45 ++++++++++- .../virtgpu-forward.gen.h | 46 +++++------ 3 files changed, 86 insertions(+), 81 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h b/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h index 9336c29ad7c..b81fd5039bd 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h @@ -1,73 +1,35 @@ #pragma once /* device */ -uint32_t backend_device_get_device_count(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_device_get_count(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); +uint32_t backend_device_get_device_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_device_get_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); -uint32_t backend_device_get_description(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); +uint32_t backend_device_get_description(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); -uint32_t backend_device_get_memory(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_device_supports_op(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_device_get_buffer_type(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_device_get_props(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); +uint32_t backend_device_get_memory(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_device_supports_op(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_device_get_buffer_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_device_get_props(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); /* buffer-type */ -uint32_t backend_buffer_type_get_name(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_buffer_type_is_host(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); /* buffer */ uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); -uint32_t backend_buffer_set_tensor(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_buffer_get_tensor(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); -uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); +uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); +uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); -uint32_t backend_buffer_free_buffer(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); +uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); /* backend */ -uint32_t backend_backend_graph_compute(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); +uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); static inline const char * backend_dispatch_command_name(ApirBackendCommandType type) { switch (type) { diff --git a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py b/ggml/src/ggml-remotingfrontend/regenerate_remoting.py index 82fab6cad5e..77180661231 100755 --- a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py +++ b/ggml/src/ggml-remotingfrontend/regenerate_remoting.py @@ -21,6 +21,8 @@ from typing import Dict, List, Any, Tuple from pathlib import Path import os +import subprocess +import shutil NL = '\n' # can't have f"{'\n'}" in f-strings @@ -39,6 +41,33 @@ def __init__(self, yaml_path: str = "ggmlremoting_functions.yaml"): self.naming_patterns = self.config['naming_patterns'] self.config_data = self.config['config'] + # Check if clang-format is available + self.clang_format_available = self._check_clang_format_available() + + def _check_clang_format_available(self) -> bool: + """Check if clang-format is available in the system PATH.""" + return shutil.which("clang-format") is not None + + def _format_file_with_clang_format(self, file_path: Path) -> bool: + """Format a file with clang-format -i. Returns True if successful, False otherwise.""" + if not self.clang_format_available: + return False + + try: + subprocess.run( + ["clang-format", "-i", str(file_path)], + check=True, + capture_output=True, + text=True + ) + return True + except subprocess.CalledProcessError as e: + print(f" ⚠️ Warning: clang-format failed for {file_path}: {e}") + return False + except Exception as e: + print(f" ⚠️ Warning: Unexpected error formatting {file_path}: {e}") + return False + def generate_enum_name(self, group_name: str, function_name: str) -> str: """Generate the APIR_COMMAND_TYPE enum name for a function.""" prefix = self.naming_patterns['enum_prefix'] @@ -133,7 +162,7 @@ def generate_backend_dispatched_header(self) -> str: current_group = func['group_name'] signature = "uint32_t" - params = "struct apir_encoder *enc, struct apir_decoder *dec, struct virgl_apir_context *ctx" + params = "apir_encoder *enc, apir_decoder *dec, virgl_apir_context *ctx" decl_lines.append(f"{signature} {func['backend_function']}({params});") # Switch cases @@ -255,6 +284,20 @@ def regenerate_codebase(self) -> None: virtgpu_forward_path.write_text(virtgpu_forward_content) print(f" ✅ {virtgpu_forward_path.resolve()}") + # Format generated files with clang-format + generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path] + + if not self.clang_format_available: + print("\n⚠️ Warning: clang-format not found in PATH. Generated files will not be formatted.") + print(" Install clang-format to enable automatic code formatting.") + else: + print("\n🎨 Formatting files with clang-format...") + for file_path in generated_files: + if self._format_file_with_clang_format(file_path): + print(f" ✅ Formatted {file_path.name}") + else: + print(f" ❌ Failed to format {file_path.name}") + # Generate summary functions = self.get_enabled_functions() total_functions = len(functions) diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h index f02d586a342..c27c07f0865 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h @@ -1,51 +1,51 @@ #pragma once /* device */ -void apir_device_get_device_count(virtgpu * gpu); -int apir_device_get_count(virtgpu * gpu); -const char * apir_device_get_name(virtgpu * gpu); -const char * apir_device_get_description(virtgpu * gpu); -uint32_t apir_device_get_type(virtgpu * gpu); -void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total); -bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op); -apir_buffer_type_host_handle_t apir_device_get_buffer_type(virtgpu * gpu); -void apir_device_get_props(virtgpu * gpu, +void apir_device_get_device_count(struct virtgpu * gpu); +int apir_device_get_count(struct virtgpu * gpu); +const char * apir_device_get_name(struct virtgpu * gpu); +const char * apir_device_get_description(struct virtgpu * gpu); +uint32_t apir_device_get_type(struct virtgpu * gpu); +void apir_device_get_memory(struct virtgpu * gpu, size_t * free, size_t * total); +bool apir_device_supports_op(struct virtgpu * gpu, const ggml_tensor * op); +apir_buffer_type_host_handle_t apir_device_get_buffer_type(struct virtgpu * gpu); +void apir_device_get_props(struct virtgpu * gpu, bool * async, bool * host_buffer, bool * buffer_from_host_ptr, bool * events); -apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, size_t max_tensor_size); +apir_buffer_context_t apir_device_buffer_from_ptr(struct virtgpu * gpu, size_t size, size_t max_tensor_size); /* buffer-type */ -const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft); -size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft); -bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft); -apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, +const char * apir_buffer_type_get_name(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); +size_t apir_buffer_type_get_alignment(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); +size_t apir_buffer_type_get_max_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); +bool apir_buffer_type_is_host(struct virtgpu * gpu, ggml_backend_buffer_type_t buft); +apir_buffer_context_t apir_buffer_type_alloc_buffer(struct virtgpu * gpu, ggml_backend_buffer_type_t buffer_buft, size_t size); -size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op); +size_t apir_buffer_type_get_alloc_size(struct virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op); /* buffer */ -void * apir_buffer_get_base(virtgpu * gpu, apir_buffer_context_t * buffer_context); -void apir_buffer_set_tensor(virtgpu * gpu, +void * apir_buffer_get_base(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); +void apir_buffer_set_tensor(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, ggml_tensor * tensor, const void * data, size_t offset, size_t size); -void apir_buffer_get_tensor(virtgpu * gpu, +void apir_buffer_get_tensor(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * tensor, void * data, size_t offset, size_t size); -bool apir_buffer_cpy_tensor(virtgpu * gpu, +bool apir_buffer_cpy_tensor(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * src, const ggml_tensor * dst); -void apir_buffer_clear(virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value); -void apir_buffer_free_buffer(virtgpu * gpu, apir_buffer_context_t * buffer_context); +void apir_buffer_clear(struct virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value); +void apir_buffer_free_buffer(struct virtgpu * gpu, apir_buffer_context_t * buffer_context); /* backend */ -ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph); +ggml_status apir_backend_graph_compute(struct virtgpu * gpu, ggml_cgraph * cgraph); From 74f0a8596e07345603133160e7ff1d71424afb73 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 12 Jan 2026 15:00:47 +0100 Subject: [PATCH 11/37] reformat with clang-format --- .../apir_cs_ggml-rpc-back.cpp | 4 +- .../backend-dispatched-backend.cpp | 4 +- .../backend-dispatched-buffer-type.cpp | 24 +++-------- .../backend-dispatched-buffer.cpp | 20 +++------- .../backend-dispatched-device.cpp | 40 +++++-------------- .../ggml-remotingbackend/backend-dispatched.h | 5 +-- ggml/src/ggml-remotingbackend/backend.cpp | 4 +- .../ggml-backend-buffer-type.cpp | 3 +- .../ggml-backend-buffer.cpp | 3 +- .../ggml-backend-device.cpp | 3 +- .../ggml-backend-reg.cpp | 2 +- .../src/ggml-remotingfrontend/ggml-remoting.h | 16 ++++---- .../virtgpu-forward-backend.cpp | 6 +-- .../virtgpu-forward-buffer-type.cpp | 28 ++++++------- .../virtgpu-forward-buffer.cpp | 26 ++++++------ .../virtgpu-forward-device.cpp | 36 ++++++++--------- .../virtgpu-forward-impl.h | 32 +++++++-------- .../src/ggml-remotingfrontend/virtgpu-utils.h | 6 +-- ggml/src/ggml-remotingfrontend/virtgpu.cpp | 14 +++---- ggml/src/ggml-remotingfrontend/virtgpu.h | 4 +- 20 files changed, 113 insertions(+), 167 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp b/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp index ddc0b7cd445..60a8a93bfb8 100644 --- a/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp +++ b/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp @@ -74,7 +74,7 @@ ggml_tensor * apir_create_node(uint64_t return tensor_map[id]; } const apir_rpc_tensor * tensor = tensor_ptrs.at(id); - ggml_tensor * result = apir_deserialize_tensor(ctx, tensor); + ggml_tensor * result = apir_deserialize_tensor(ctx, tensor); if (result == nullptr) { return nullptr; } @@ -99,7 +99,7 @@ ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes, }; ggml_context * ctx = ggml_init(params); ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false); - graph->n_nodes = n_nodes; + graph->n_nodes = n_nodes; std::unordered_map tensor_ptrs; for (uint32_t i = 0; i < n_tensors; i++) { tensor_ptrs[tensors[i].id] = &tensors[i]; diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp index 229ffa7f9c7..d010aa6f0f0 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp @@ -7,9 +7,7 @@ #include -uint32_t backend_backend_graph_compute(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp index ad908d243a3..4a54ee8c216 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp @@ -6,9 +6,7 @@ #include -uint32_t backend_buffer_type_get_name(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -22,9 +20,7 @@ uint32_t backend_buffer_type_get_name(apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -35,9 +31,7 @@ uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -48,9 +42,7 @@ uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_is_host(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -61,9 +53,7 @@ uint32_t backend_buffer_type_is_host(apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; @@ -85,9 +75,7 @@ uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, return 0; } -uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp index 03c2299ed60..1c20e6b6187 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp @@ -6,9 +6,7 @@ #include -uint32_t backend_buffer_get_base(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); @@ -19,9 +17,7 @@ uint32_t backend_buffer_get_base(apir_encoder * enc, return 0; } -uint32_t backend_buffer_set_tensor(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); @@ -53,9 +49,7 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, return 0; } -uint32_t backend_buffer_get_tensor(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); @@ -86,9 +80,7 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, return 0; } -uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); ggml_backend_buffer_t buffer; @@ -121,9 +113,7 @@ uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir return 0; } -uint32_t backend_buffer_free_buffer(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(enc); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp index 26f0cb88f40..da62d2300ad 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp @@ -6,9 +6,7 @@ #include -uint32_t backend_device_get_device_count(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_get_device_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(ctx); UNUSED(dec); @@ -19,9 +17,7 @@ uint32_t backend_device_get_device_count(apir_encoder * enc, return 0; } -uint32_t backend_device_get_count(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_get_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(ctx); UNUSED(dec); @@ -32,9 +28,7 @@ uint32_t backend_device_get_count(apir_encoder * enc, return 0; } -uint32_t backend_device_get_name(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -47,9 +41,7 @@ uint32_t backend_device_get_name(apir_encoder * enc, return 0; } -uint32_t backend_device_get_description(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_get_description(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -62,9 +54,7 @@ uint32_t backend_device_get_description(apir_encoder * enc, return 0; } -uint32_t backend_device_get_type(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -74,9 +64,7 @@ uint32_t backend_device_get_type(apir_encoder * enc, return 0; } -uint32_t backend_device_get_memory(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_get_memory(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -89,9 +77,7 @@ uint32_t backend_device_get_memory(apir_encoder * enc, return 0; } -uint32_t backend_device_supports_op(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_supports_op(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec); @@ -103,9 +89,7 @@ uint32_t backend_device_supports_op(apir_encoder * enc, return 0; } -uint32_t backend_device_get_buffer_type(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_get_buffer_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -116,9 +100,7 @@ uint32_t backend_device_get_buffer_type(apir_encoder * enc, return 0; } -uint32_t backend_device_get_props(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_get_props(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); @@ -133,9 +115,7 @@ uint32_t backend_device_get_props(apir_encoder * enc, return 0; } -uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx) { +uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { UNUSED(ctx); UNUSED(dec); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.h b/ggml/src/ggml-remotingbackend/backend-dispatched.h index 2268ed8966a..8b8e86e3832 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.h @@ -11,10 +11,7 @@ #include "shared/apir_cs.h" #include "shared/apir_cs_ggml.h" - -typedef uint32_t (*backend_dispatch_t)(apir_encoder * enc, - apir_decoder * dec, - virgl_apir_context * ctx); +typedef uint32_t (*backend_dispatch_t)(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); #include "backend-dispatched.gen.h" diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index e8db1d07ad3..b689687a4ba 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -112,8 +112,8 @@ uint32_t apir_backend_dispatcher(uint32_t cmd_type, apir_encoder * enc = &_enc; apir_decoder _dec = { - .cur = dec_cur, - .end = dec_end, + .cur = dec_cur, + .end = dec_end, .fatal = false, }; apir_decoder * dec = &_dec; diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp index 73df9ea9165..ef6850570a0 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp @@ -4,8 +4,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_buffer_type_alloc_buffer(ggml size_t size) { virtgpu * gpu = BUFT_TO_GPU(buft); - ggml_backend_remoting_buffer_context * context = - (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); if (!context) { GGML_ABORT("Couldn't allocate the buffer context ..."); } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp index c7fdb241de8..6b95362dd80 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp @@ -3,8 +3,7 @@ #define BUFFER_TO_GPU(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->gpu static void * ggml_backend_remoting_buffer_get_base(ggml_backend_buffer_t buffer) { - ggml_backend_remoting_buffer_context * context = - (ggml_backend_remoting_buffer_context *) buffer->context; + ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) buffer->context; if (context->base) { return context->base; } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp index b93d7d31664..579eb990781 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp @@ -108,8 +108,7 @@ static ggml_backend_buffer_t ggml_backend_remoting_device_buffer_from_ptr(ggml_b size_t max_tensor_size) { virtgpu * gpu = DEV_TO_GPU(dev); - ggml_backend_remoting_buffer_context * context = - (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); + ggml_backend_remoting_buffer_context * context = (ggml_backend_remoting_buffer_context *) malloc(sizeof(*context)); if (!context) { GGML_ABORT("Couldn't allocate the buffer context ..."); } diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp index 75718bc1b14..1e246506741 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp @@ -5,7 +5,7 @@ static virtgpu * apir_initialize() { static virtgpu * apir_gpu_instance = NULL; - static bool apir_initialized = false; + static bool apir_initialized = false; if (apir_initialized) { return apir_gpu_instance; diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting.h b/ggml/src/ggml-remotingfrontend/ggml-remoting.h index 3027fafdfba..66cad84e5fe 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-remoting.h +++ b/ggml/src/ggml-remotingfrontend/ggml-remoting.h @@ -20,11 +20,9 @@ #define BUFFER_TO_APIR_CONTEXT(name) &((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context -#define BUFFER_TO_HOST_HANDLE(name) \ - ((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle +#define BUFFER_TO_HOST_HANDLE(name) ((ggml_backend_remoting_buffer_context *) (name)->context)->apir_context.host_handle -#define GET_DEVICE_CONTEXT() \ - (ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context +#define GET_DEVICE_CONTEXT() (ggml_backend_remoting_device_context *) ggml_backend_remoting_get_device(0)->context #define BUFT_TO_GPU(name) ((ggml_backend_remoting_device_context *) (name)->device->context)->gpu @@ -48,11 +46,11 @@ struct ggml_backend_remoting_buffer_context { bool is_from_ptr; }; -extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface; -extern const ggml_backend_device_i ggml_backend_remoting_device_interface; -extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface; -extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface; -extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface; +extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_type_interface; +extern const ggml_backend_device_i ggml_backend_remoting_device_interface; +extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_interface; +extern const ggml_backend_buffer_type_i ggml_backend_remoting_buffer_from_ptr_type_interface; +extern const ggml_backend_buffer_i ggml_backend_remoting_buffer_from_ptr_interface; ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device); ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const char * params); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp index 39038221a2e..df7070edfdb 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp @@ -7,8 +7,8 @@ static long long current_time_ms() { } ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BACKEND_GRAPH_COMPUTE); @@ -30,7 +30,7 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { apir_encode_size_t(encoder, &cgraph_size); - char * shmem_data = (char *) shmem->mmap_ptr; + char * shmem_data = (char *) shmem->mmap_ptr; apir_encoder secondary_enc = apir_new_encoder(shmem_data, cgraph_size); apir_encode_cgraph_data(&secondary_enc, cgraph_data); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp index a5c4302ce4b..34ad26fcd2d 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp @@ -1,8 +1,8 @@ #include "virtgpu-forward-impl.h" const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t buft) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_NAME); @@ -25,8 +25,8 @@ const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t } size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t buft) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALIGNMENT); @@ -44,8 +44,8 @@ size_t apir_buffer_type_get_alignment(virtgpu * gpu, ggml_backend_buffer_type_t } size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t buft) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_MAX_SIZE); @@ -63,8 +63,8 @@ size_t apir_buffer_type_get_max_size(virtgpu * gpu, ggml_backend_buffer_type_t b } bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_IS_HOST); @@ -81,11 +81,9 @@ bool apir_buffer_type_is_host(virtgpu * gpu, ggml_backend_buffer_type_t buft) { return is_host; } -apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, - ggml_backend_buffer_type_t buft, - size_t size) { - apir_encoder * encoder; - apir_decoder * decoder; +apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, ggml_backend_buffer_type_t buft, size_t size) { + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; apir_buffer_context_t buffer_context; @@ -106,8 +104,8 @@ apir_buffer_context_t apir_buffer_type_alloc_buffer(virtgpu * gpu, } size_t apir_buffer_type_get_alloc_size(virtgpu * gpu, ggml_backend_buffer_type_t buft, const ggml_tensor * op) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_TYPE_GET_ALLOC_SIZE); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp index 472da7f61c3..716459c8127 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp @@ -1,8 +1,8 @@ #include "virtgpu-forward-impl.h" void * apir_buffer_get_base(virtgpu * gpu, apir_buffer_context_t * buffer_context) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_BASE); @@ -25,8 +25,8 @@ void apir_buffer_set_tensor(virtgpu * gpu, const void * data, size_t offset, size_t size) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_SET_TENSOR); @@ -73,14 +73,14 @@ apir_buffer_get_tensor(virtgpu *gpu, apir_buffer_context_t *buffer_context, memcpy(data, buffer_base_addr+offset, size); } #else -void apir_buffer_get_tensor(virtgpu * gpu, +void apir_buffer_get_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_GET_TENSOR); @@ -119,8 +119,8 @@ bool apir_buffer_cpy_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * src, const ggml_tensor * dst) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CPY_TENSOR); @@ -140,8 +140,8 @@ bool apir_buffer_cpy_tensor(virtgpu * gpu, } void apir_buffer_clear(virtgpu * gpu, apir_buffer_context_t * buffer_context, uint8_t value) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_CLEAR); @@ -155,8 +155,8 @@ void apir_buffer_clear(virtgpu * gpu, apir_buffer_context_t * buffer_context, ui } void apir_buffer_free_buffer(virtgpu * gpu, apir_buffer_context_t * buffer_context) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_BUFFER_FREE_BUFFER); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp index c88a638227e..88b744572ee 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp @@ -7,8 +7,8 @@ int apir_device_get_count(virtgpu * gpu) { return dev_count; } - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_COUNT); @@ -26,8 +26,8 @@ const char * apir_device_get_name(virtgpu * gpu) { if (string) { return string; } - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_NAME); @@ -47,8 +47,8 @@ const char * apir_device_get_name(virtgpu * gpu) { } const char * apir_device_get_description(virtgpu * gpu) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_DESCRIPTION); @@ -75,8 +75,8 @@ uint32_t apir_device_get_type(virtgpu * gpu) { return dev_type; } - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_TYPE); @@ -93,8 +93,8 @@ uint32_t apir_device_get_type(virtgpu * gpu) { void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total) { static size_t dev_free = 0; static size_t dev_total = 0; - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_MEMORY); @@ -113,8 +113,8 @@ void apir_device_get_memory(virtgpu * gpu, size_t * free, size_t * total) { } bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_SUPPORTS_OP); @@ -132,8 +132,8 @@ bool apir_device_supports_op(virtgpu * gpu, const ggml_tensor * op) { } apir_buffer_type_host_handle_t apir_device_get_buffer_type(virtgpu * gpu) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_BUFFER_TYPE); @@ -153,8 +153,8 @@ void apir_device_get_props(virtgpu * gpu, bool * host_buffer, bool * buffer_from_host_ptr, bool * events) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_GET_PROPS); @@ -172,8 +172,8 @@ void apir_device_get_props(virtgpu * gpu, } apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, size_t max_tensor_size) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirForwardReturnCode ret; apir_buffer_context_t buffer_context; diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h index b1e00841966..a86e4812657 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h @@ -6,24 +6,24 @@ #include "ggml-backend-impl.h" -#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ - do { \ - int32_t forward_flag = (int32_t) apir_command_type__; \ +#define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ + do { \ + int32_t forward_flag = (int32_t) apir_command_type__; \ encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_Forward, forward_flag); \ - if (!encoder_name) { \ - GGML_ABORT("%s: failed to prepare the remote call encoder :/", __func__); \ - } \ + if (!encoder_name) { \ + GGML_ABORT("%s: failed to prepare the remote call encoder :/", __func__); \ + } \ } while (0) -#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ - do { \ - ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ - if (!decoder_name) { \ - GGML_ABORT("%s: failed to kick the remote call :/", __func__); \ - } \ - if (ret_name < APIR_FORWARD_BASE_INDEX) { \ +#define REMOTE_CALL(gpu_dev_name, encoder_name, decoder_name, ret_name) \ + do { \ + ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ + if (!decoder_name) { \ + GGML_ABORT("%s: failed to kick the remote call :/", __func__); \ + } \ + if (ret_name < APIR_FORWARD_BASE_INDEX) { \ GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__, apir_forward_error(ret_name), \ - ret_name); \ - } \ - ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ + ret_name); \ + } \ + ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ } while (0) diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.h b/ggml/src/ggml-remotingfrontend/virtgpu-utils.h index e6e8ff73d8e..a0036b4e2bc 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-utils.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-utils.h @@ -61,9 +61,9 @@ inline void os_time_sleep(int64_t usecs) { } struct timer_data { - long long start; - long long total; - long long count; + long long start; + long long total; + long long count; }; static inline void start_timer(timer_data * timer) { diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-remotingfrontend/virtgpu.cpp index 1149fa72f4a..763fbaa7ea7 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu.cpp @@ -91,8 +91,8 @@ static int virtgpu_handshake(virtgpu * gpu) { } static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { - apir_encoder * encoder; - apir_decoder * decoder; + apir_encoder * encoder; + apir_decoder * decoder; ApirLoadLibraryReturnCode ret; encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LoadLibrary, 0); @@ -124,7 +124,7 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { if (ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { GGML_ABORT("%s: virglrenderer could not load the API Remoting backend library: %s (code %d)", __func__, - apir_load_library_error(ret), ret); + apir_load_library_error(ret), ret); return ret; } @@ -133,12 +133,12 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX); if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { - GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s):/", __func__, - apir_ret, apir_load_library_error(apir_ret)); + GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s):/", + __func__, apir_ret, apir_load_library_error(apir_ret)); } else { uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX; GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d):/", __func__, - lib_ret); + lib_ret); } return ret; } @@ -319,7 +319,7 @@ static int virtgpu_ioctl_get_caps(virtgpu * gpu, static uint64_t virtgpu_ioctl_getparam(virtgpu * gpu, uint64_t param) { /* val must be zeroed because kernel only writes the lower 32 bits */ - uint64_t val = 0; + uint64_t val = 0; drm_virtgpu_getparam args = { .param = param, .value = (uintptr_t) &val, diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.h b/ggml/src/ggml-remotingfrontend/virtgpu.h index 9b570261753..00f0a820b14 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu.h @@ -60,8 +60,8 @@ struct virtgpu { int fd; struct { - virgl_renderer_capset id; - uint32_t version; + virgl_renderer_capset id; + uint32_t version; virgl_renderer_capset_apir data; } capset; From 1a450490d23f42824dddc8fe1be94c1c307512d8 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 12 Jan 2026 15:49:55 +0100 Subject: [PATCH 12/37] more cleanups --- .../backend-dispatched-backend.cpp | 10 +-- .../backend-dispatched-buffer-type.cpp | 15 +++-- .../backend-dispatched-buffer.cpp | 30 ++++----- .../backend-dispatched-device.cpp | 46 +++++++------- .../backend-dispatched.cpp | 9 ++- .../ggml-remotingbackend/backend-dispatched.h | 1 - ggml/src/ggml-remotingbackend/backend-utils.h | 51 --------------- ...ackend-internal.h => backend-virgl-apir.h} | 0 ggml/src/ggml-remotingbackend/backend.cpp | 63 ++++++++++++------- .../shared/api_remoting.h | 6 +- .../shared/apir_cs_ggml.h | 2 +- 11 files changed, 100 insertions(+), 133 deletions(-) delete mode 100644 ggml/src/ggml-remotingbackend/backend-utils.h rename ggml/src/ggml-remotingbackend/{backend-internal.h => backend-virgl-apir.h} (100%) diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp index d010aa6f0f0..f61b113b2e5 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp @@ -1,5 +1,5 @@ #include "backend-dispatched.h" -#include "backend-internal.h" +#include "backend-virgl-apir.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -8,8 +8,8 @@ #include uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(enc); + GGML_UNUSED(ctx); + GGML_UNUSED(enc); static bool async_backend_initialized = false; static bool async_backend; @@ -27,7 +27,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v const void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - ERROR("Couldn't get the shmem addr from virgl"); + GGML_LOG_ERROR("Couldn't get the shmem addr from virgl"); apir_decoder_set_fatal(dec); return 1; } @@ -45,7 +45,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v if (dev->iface.supports_op(dev, op)) { continue; } - ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op)); + GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op)); status = GGML_STATUS_ABORTED; apir_encode_ggml_status(enc, &status); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp index 4a54ee8c216..8ea1bb4fb49 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp @@ -1,5 +1,5 @@ #include "backend-dispatched.h" -#include "backend-internal.h" +#include "backend-virgl-apir.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -7,7 +7,7 @@ #include uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); + GGML_UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -21,7 +21,7 @@ uint32_t backend_buffer_type_get_name(apir_encoder * enc, apir_decoder * dec, vi } uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); + GGML_UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -32,7 +32,7 @@ uint32_t backend_buffer_type_get_alignment(apir_encoder * enc, apir_decoder * de } uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); + GGML_UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -43,7 +43,7 @@ uint32_t backend_buffer_type_get_max_size(apir_encoder * enc, apir_decoder * dec } uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); + GGML_UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -54,8 +54,7 @@ uint32_t backend_buffer_type_is_host(apir_encoder * enc, apir_decoder * dec, vir } uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - + GGML_UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); @@ -76,7 +75,7 @@ uint32_t backend_buffer_type_alloc_buffer(apir_encoder * enc, apir_decoder * dec } uint32_t backend_buffer_type_get_alloc_size(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); + GGML_UNUSED(ctx); ggml_backend_buffer_type_t buft; buft = apir_decode_ggml_buffer_type(dec); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp index 1c20e6b6187..de68e583ddf 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp @@ -1,5 +1,5 @@ #include "backend-dispatched.h" -#include "backend-internal.h" +#include "backend-virgl-apir.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -7,7 +7,7 @@ #include uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); + GGML_UNUSED(ctx); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); @@ -18,8 +18,8 @@ uint32_t backend_buffer_get_base(apir_encoder * enc, apir_decoder * dec, virgl_a } uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(enc); + GGML_UNUSED(ctx); + GGML_UNUSED(enc); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); @@ -40,7 +40,7 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - ERROR("Couldn't get the shmem addr from virgl :/"); + GGML_LOG_ERROR("Couldn't get the shmem addr from virgl :/"); return 1; } @@ -50,8 +50,8 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl } uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(enc); + GGML_UNUSED(ctx); + GGML_UNUSED(enc); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); @@ -71,7 +71,7 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - ERROR("Couldn't get the shmem addr from virgl :/"); + GGML_LOG_ERROR("Couldn't get the shmem addr from virgl :/"); return 1; } @@ -81,11 +81,11 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl } uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); + GGML_UNUSED(ctx); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); - INFO("%s <---->", __func__); + GGML_LOG_INFO("%s <---->", __func__); const ggml_tensor * src; // safe to remove the const qualifier here src = apir_decode_ggml_tensor(dec); @@ -99,8 +99,8 @@ uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl } uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(enc); + GGML_UNUSED(ctx); + GGML_UNUSED(enc); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); @@ -114,14 +114,14 @@ uint32_t backend_buffer_clear(apir_encoder * enc, apir_decoder * dec, virgl_apir } uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(enc); + GGML_UNUSED(ctx); + GGML_UNUSED(enc); ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); if (!apir_untrack_backend_buffer(buffer)) { - WARNING("%s: unknown buffer %p", (void *) buffer); + GGML_LOG_WARN("%s: unknown buffer %p", __func__, (void *) buffer); return 1; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp index da62d2300ad..c314c2a30bf 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp @@ -1,5 +1,5 @@ #include "backend-dispatched.h" -#include "backend-internal.h" +#include "backend-virgl-apir.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -7,9 +7,9 @@ #include uint32_t backend_device_get_device_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); int32_t dev_count = reg->iface.get_device_count(reg); apir_encode_int32_t(enc, &dev_count); @@ -18,9 +18,9 @@ uint32_t backend_device_get_device_count(apir_encoder * enc, apir_decoder * dec, } uint32_t backend_device_get_count(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); int32_t dev_count = reg->iface.get_device_count(reg); apir_encode_int32_t(enc, &dev_count); @@ -29,8 +29,8 @@ uint32_t backend_device_get_count(apir_encoder * enc, apir_decoder * dec, virgl_ } uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); const char * string = dev->iface.get_name(dev); @@ -42,8 +42,8 @@ uint32_t backend_device_get_name(apir_encoder * enc, apir_decoder * dec, virgl_a } uint32_t backend_device_get_description(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); const char * string = dev->iface.get_description(dev); @@ -55,8 +55,8 @@ uint32_t backend_device_get_description(apir_encoder * enc, apir_decoder * dec, } uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); uint32_t type = dev->iface.get_type(dev); apir_encode_uint32_t(enc, &type); @@ -65,8 +65,8 @@ uint32_t backend_device_get_type(apir_encoder * enc, apir_decoder * dec, virgl_a } uint32_t backend_device_get_memory(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); size_t free, total; dev->iface.get_memory(dev, &free, &total); @@ -78,7 +78,7 @@ uint32_t backend_device_get_memory(apir_encoder * enc, apir_decoder * dec, virgl } uint32_t backend_device_supports_op(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); + GGML_UNUSED(ctx); const ggml_tensor * op = apir_decode_ggml_tensor_inplace(dec); @@ -90,8 +90,8 @@ uint32_t backend_device_supports_op(apir_encoder * enc, apir_decoder * dec, virg } uint32_t backend_device_get_buffer_type(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); ggml_backend_buffer_type_t bufft = dev->iface.get_buffer_type(dev); @@ -101,8 +101,8 @@ uint32_t backend_device_get_buffer_type(apir_encoder * enc, apir_decoder * dec, } uint32_t backend_device_get_props(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); ggml_backend_dev_props props; dev->iface.get_props(dev, &props); @@ -116,15 +116,15 @@ uint32_t backend_device_get_props(apir_encoder * enc, apir_decoder * dec, virgl_ } uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx) { - UNUSED(ctx); - UNUSED(dec); + GGML_UNUSED(ctx); + GGML_UNUSED(dec); uint32_t shmem_res_id; apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); void * shmem_ptr = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_ptr) { - ERROR("Couldn't get the shmem addr from virgl"); + GGML_LOG_ERROR("Couldn't get the shmem addr from virgl"); apir_decoder_set_fatal(dec); return 1; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp index 7800f7e814b..792443c9a95 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp @@ -1,6 +1,6 @@ #include "backend-dispatched.h" +#include "backend-virgl-apir.h" -#include "backend-internal.h" #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -17,14 +17,14 @@ long long timer_count = 0; uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_backend_init_fct_p) { if (reg != NULL) { - WARNING("%s: already initialized :/", __func__); + GGML_LOG_WARN("%s: already initialized :/", __func__); return APIR_BACKEND_INITIALIZE_ALREADY_INITED; } ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p; reg = ggml_backend_reg_fct(); if (reg == NULL) { - ERROR("%s: backend registration failed :/", __func__); + GGML_LOG_ERROR("%s: backend registration failed :/", __func__); return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED; } @@ -36,13 +36,12 @@ uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_ bck = ggml_backend_fct(0); if (!bck) { - ERROR("%s: backend initialization failed :/", __func__); + GGML_LOG_ERROR("%s: backend initialization failed :/", __func__); return APIR_BACKEND_INITIALIZE_BACKEND_FAILED; } size_t free, total; dev->iface.get_memory(dev, &free, &total); - INFO("%s: free memory: %ld MB", __func__, (size_t) free / 1024 / 1024); return APIR_BACKEND_INITIALIZE_SUCCESS; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.h b/ggml/src/ggml-remotingbackend/backend-dispatched.h index 8b8e86e3832..bc2330337f0 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.h @@ -5,7 +5,6 @@ #include -#include "backend-utils.h" #include "backend-convert.h" #include "shared/apir_backend.h" #include "shared/apir_cs.h" diff --git a/ggml/src/ggml-remotingbackend/backend-utils.h b/ggml/src/ggml-remotingbackend/backend-utils.h deleted file mode 100644 index bb1a5a57bc1..00000000000 --- a/ggml/src/ggml-remotingbackend/backend-utils.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include - -#include -#include -#include -#include - -#define UNUSED GGML_UNUSED -#define APIR_LLAMA_CPP_LOG_TO_FILE_ENV "APIR_LLAMA_CPP_LOG_TO_FILE" - -static FILE * get_log_dest(void) { - static FILE * dest = NULL; - if (dest) { - return dest; - } - const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV); - if (!apir_log_to_file) { - dest = stderr; - return dest; - } - - dest = fopen(apir_log_to_file, "w"); - - return dest; -} - -#define APIR_VA_PRINT(prefix, format) \ - do { \ - FILE * dest = get_log_dest(); \ - fprintf(dest, prefix); \ - va_list argptr; \ - va_start(argptr, format); \ - vfprintf(dest, format, argptr); \ - fprintf(dest, "\n"); \ - va_end(argptr); \ - fflush(dest); \ - } while (0) - -inline void INFO(const char * format, ...) { - APIR_VA_PRINT("INFO: ", format); -} - -inline void WARNING(const char * format, ...) { - APIR_VA_PRINT("WARNING: ", format); -} - -inline void ERROR(const char * format, ...) { - APIR_VA_PRINT("ERROR: ", format); -} diff --git a/ggml/src/ggml-remotingbackend/backend-internal.h b/ggml/src/ggml-remotingbackend/backend-virgl-apir.h similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-internal.h rename to ggml/src/ggml-remotingbackend/backend-virgl-apir.h diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index b689687a4ba..e4069101cdd 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -1,6 +1,6 @@ #include "backend-dispatched.h" -#include "backend-internal.h" -#include "backend-utils.h" +#include "backend-virgl-apir.h" + #include "shared/api_remoting.h" #include "shared/apir_backend.h" #include "shared/apir_cs.h" @@ -10,11 +10,19 @@ #include -#define GGML_BACKEND_LIBRARY_PATH_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_PATH" -#define GGML_BACKEND_LIBRARY_REG_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_REG" -#define GGML_BACKEND_LIBRARY_INIT_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_INIT" +#define APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_PATH" +#define APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_REG" +#define APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_INIT" +#define APIR_LLAMA_CPP_LOG_TO_FILE_ENV "APIR_LLAMA_CPP_LOG_TO_FILE" static void * backend_library_handle = NULL; +static FILE * apir_logfile = NULL; + +static void log_to_file_callback(enum ggml_log_level level, const char * text, void * user_data) { + FILE * logfile = (FILE *)user_data; + fprintf(logfile, "[%d] %s", level, text); + fflush(logfile); +} extern "C" { void apir_backend_deinit(void) { @@ -27,28 +35,41 @@ void apir_backend_deinit(void) { if (dev) { size_t free, total; dev->iface.get_memory(dev, &free, &total); - INFO("%s: free memory: %ld MB", __func__, (size_t) free / 1024 / 1024); + GGML_LOG_INFO("%s: free memory: %ld MB", __func__, (size_t) free / 1024 / 1024); } if (backend_library_handle) { - INFO("%s: The GGML backend library was loaded. Unloading it.", __func__); + GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.", __func__); dlclose(backend_library_handle); + backend_library_handle = NULL; } - INFO("%s: bye-bye", __func__); + if (apir_logfile) { + fclose(apir_logfile); + apir_logfile = NULL; + } } ApirLoadLibraryReturnCode apir_backend_initialize() { const char * dlsym_error; - const char * library_name = getenv(GGML_BACKEND_LIBRARY_PATH_ENV); - const char * library_reg = getenv(GGML_BACKEND_LIBRARY_REG_ENV); - const char * library_init = getenv(GGML_BACKEND_LIBRARY_INIT_ENV); + const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV); + if (apir_log_to_file) { + apir_logfile = fopen(apir_log_to_file, "w"); + if (apir_logfile) { + ggml_log_set(log_to_file_callback, apir_logfile); + } else { + GGML_LOG_INFO("Could not open the log file at '%s'", apir_log_to_file); + } + } + const char * library_name = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); + const char * library_reg = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); + const char * library_init = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV); - INFO("%s: loading %s (%s|%s)", __func__, library_name, library_reg, library_init); + GGML_LOG_INFO("%s: loading %s (%s|%s)", __func__, library_name, library_reg, library_init); if (!library_name) { - ERROR("cannot open the GGML library: env var '%s' not defined\n", GGML_BACKEND_LIBRARY_PATH_ENV); + GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -56,13 +77,13 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { backend_library_handle = dlopen(library_name, RTLD_LAZY); if (!backend_library_handle) { - ERROR("cannot open the GGML library: %s", dlerror()); + GGML_LOG_ERROR("cannot open the GGML library: %s", dlerror()); return APIR_LOAD_LIBRARY_CANNOT_OPEN; } if (!library_reg) { - ERROR("cannot register the GGML library: env var '%s' not defined", GGML_BACKEND_LIBRARY_REG_ENV); + GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -70,14 +91,14 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg); dlsym_error = dlerror(); if (dlsym_error) { - ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s", library_reg, - GGML_BACKEND_LIBRARY_REG_ENV, dlsym_error); + GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s", library_reg, + APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error); return APIR_LOAD_LIBRARY_SYMBOL_MISSING; } if (!library_init) { - ERROR("cannot initialize the GGML library: env var '%s' not defined", library_init); + GGML_LOG_ERROR("cannot initialize the GGML library: env var '%s' not defined", library_init); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -85,8 +106,8 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { void * ggml_backend_init_fct = dlsym(backend_library_handle, library_init); dlsym_error = dlerror(); if (dlsym_error) { - ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s", library_init, - GGML_BACKEND_LIBRARY_INIT_ENV, dlsym_error); + GGML_LOG_ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s", library_init, + APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV, dlsym_error); return APIR_LOAD_LIBRARY_SYMBOL_MISSING; } @@ -119,7 +140,7 @@ uint32_t apir_backend_dispatcher(uint32_t cmd_type, apir_decoder * dec = &_dec; if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { - ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); + GGML_LOG_ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); return APIR_BACKEND_FORWARD_INDEX_INVALID; } diff --git a/ggml/src/ggml-remotingbackend/shared/api_remoting.h b/ggml/src/ggml-remotingbackend/shared/api_remoting.h index 4025586cc3a..4c9109b17f9 100644 --- a/ggml/src/ggml-remotingbackend/shared/api_remoting.h +++ b/ggml/src/ggml-remotingbackend/shared/api_remoting.h @@ -16,7 +16,7 @@ enum ApirCommandType { APIR_COMMAND_TYPE_LoadLibrary = 1, APIR_COMMAND_TYPE_Forward = 2, - APIR_COMMAND_TYPE_LENGTH = 3, + APIR_COMMAND_TYPE_LENGTH = 3, }; typedef uint64_t ApirCommandFlags; @@ -28,7 +28,7 @@ enum ApirLoadLibraryReturnCode { APIR_LOAD_LIBRARY_ENV_VAR_MISSING = 3, APIR_LOAD_LIBRARY_CANNOT_OPEN = 4, APIR_LOAD_LIBRARY_SYMBOL_MISSING = 5, - APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code + APIR_LOAD_LIBRARY_INIT_BASE_INDEX = 6, // anything above this is a APIR backend library initialization return code }; enum ApirForwardReturnCode { @@ -36,7 +36,7 @@ enum ApirForwardReturnCode { APIR_FORWARD_NO_DISPATCH_FCT = 1, APIR_FORWARD_TIMEOUT = 2, - APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code + APIR_FORWARD_BASE_INDEX = 3, // anything above this is a APIR backend library forward return code } ; __attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) { diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h index 2e1b26a01ba..40898807446 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h @@ -133,7 +133,7 @@ static inline void apir_encode_cgraph_data(apir_encoder * enc, std::vector Date: Mon, 12 Jan 2026 15:52:12 +0100 Subject: [PATCH 13/37] Remove extra header files --- .../include/drm-uapi/drm.h | 1408 ----------------- .../include/drm-uapi/virtgpu_drm.h | 276 ---- ggml/src/ggml-remotingfrontend/virtgpu.cpp | 4 +- ggml/src/ggml-remotingfrontend/virtgpu.h | 27 +- 4 files changed, 17 insertions(+), 1698 deletions(-) delete mode 100644 ggml/src/ggml-remotingfrontend/include/drm-uapi/drm.h delete mode 100644 ggml/src/ggml-remotingfrontend/include/drm-uapi/virtgpu_drm.h diff --git a/ggml/src/ggml-remotingfrontend/include/drm-uapi/drm.h b/ggml/src/ggml-remotingfrontend/include/drm-uapi/drm.h deleted file mode 100644 index 4e4f7c2c39e..00000000000 --- a/ggml/src/ggml-remotingfrontend/include/drm-uapi/drm.h +++ /dev/null @@ -1,1408 +0,0 @@ -/* - * Header for the Direct Rendering Manager - * - * Author: Rickard E. (Rik) Faith - * - * Acknowledgments: - * Dec 1999, Richard Henderson , move to generic cmpxchg. - */ - -/* - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _DRM_H_ -#define _DRM_H_ - -#if defined(__linux__) - -#include -#include -typedef unsigned int drm_handle_t; - -#else /* One of the BSDs */ - -#include -#include -#include -typedef int8_t __s8; -typedef uint8_t __u8; -typedef int16_t __s16; -typedef uint16_t __u16; -typedef int32_t __s32; -typedef uint32_t __u32; -typedef int64_t __s64; -typedef uint64_t __u64; -typedef size_t __kernel_size_t; -typedef unsigned long drm_handle_t; - -#endif - -#if defined(__cplusplus) -extern "C" { -#endif - -#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ -#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ -#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ -#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ - -#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ -#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ -#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) -#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) -#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) - -typedef unsigned int drm_context_t; -typedef unsigned int drm_drawable_t; -typedef unsigned int drm_magic_t; - -/* - * Cliprect. - * - * \warning: If you change this structure, make sure you change - * XF86DRIClipRectRec in the server as well - * - * \note KW: Actually it's illegal to change either for - * backwards-compatibility reasons. - */ -struct drm_clip_rect { - unsigned short x1; - unsigned short y1; - unsigned short x2; - unsigned short y2; -}; - -/* - * Drawable information. - */ -struct drm_drawable_info { - unsigned int num_rects; - struct drm_clip_rect *rects; -}; - -/* - * Texture region, - */ -struct drm_tex_region { - unsigned char next; - unsigned char prev; - unsigned char in_use; - unsigned char padding; - unsigned int age; -}; - -/* - * Hardware lock. - * - * The lock structure is a simple cache-line aligned integer. To avoid - * processor bus contention on a multiprocessor system, there should not be any - * other data stored in the same cache line. - */ -struct drm_hw_lock { - __volatile__ unsigned int lock; /**< lock variable */ - char padding[60]; /**< Pad to cache line */ -}; - -/* - * DRM_IOCTL_VERSION ioctl argument type. - * - * \sa drmGetVersion(). - */ -struct drm_version { - int version_major; /**< Major version */ - int version_minor; /**< Minor version */ - int version_patchlevel; /**< Patch level */ - __kernel_size_t name_len; /**< Length of name buffer */ - char *name; /**< Name of driver */ - __kernel_size_t date_len; /**< Length of date buffer */ - char *date; /**< User-space buffer to hold date */ - __kernel_size_t desc_len; /**< Length of desc buffer */ - char *desc; /**< User-space buffer to hold desc */ -}; - -/* - * DRM_IOCTL_GET_UNIQUE ioctl argument type. - * - * \sa drmGetBusid() and drmSetBusId(). - */ -struct drm_unique { - __kernel_size_t unique_len; /**< Length of unique */ - char *unique; /**< Unique name for driver instantiation */ -}; - -struct drm_list { - int count; /**< Length of user-space structures */ - struct drm_version *version; -}; - -struct drm_block { - int unused; -}; - -/* - * DRM_IOCTL_CONTROL ioctl argument type. - * - * \sa drmCtlInstHandler() and drmCtlUninstHandler(). - */ -struct drm_control { - enum { - DRM_ADD_COMMAND, - DRM_RM_COMMAND, - DRM_INST_HANDLER, - DRM_UNINST_HANDLER - } func; - int irq; -}; - -/* - * Type of memory to map. - */ -enum drm_map_type { - _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ - _DRM_REGISTERS = 1, /**< no caching, no core dump */ - _DRM_SHM = 2, /**< shared, cached */ - _DRM_AGP = 3, /**< AGP/GART */ - _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ - _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ -}; - -/* - * Memory mapping flags. - */ -enum drm_map_flags { - _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ - _DRM_READ_ONLY = 0x02, - _DRM_LOCKED = 0x04, /**< shared, cached, locked */ - _DRM_KERNEL = 0x08, /**< kernel requires access */ - _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ - _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ - _DRM_REMOVABLE = 0x40, /**< Removable mapping */ - _DRM_DRIVER = 0x80 /**< Managed by driver */ -}; - -struct drm_ctx_priv_map { - unsigned int ctx_id; /**< Context requesting private mapping */ - void *handle; /**< Handle of map */ -}; - -/* - * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls - * argument type. - * - * \sa drmAddMap(). - */ -struct drm_map { - unsigned long offset; /**< Requested physical address (0 for SAREA)*/ - unsigned long size; /**< Requested physical size (bytes) */ - enum drm_map_type type; /**< Type of memory to map */ - enum drm_map_flags flags; /**< Flags */ - void *handle; /**< User-space: "Handle" to pass to mmap() */ - /**< Kernel-space: kernel-virtual address */ - int mtrr; /**< MTRR slot used */ - /* Private data */ -}; - -/* - * DRM_IOCTL_GET_CLIENT ioctl argument type. - */ -struct drm_client { - int idx; /**< Which client desired? */ - int auth; /**< Is client authenticated? */ - unsigned long pid; /**< Process ID */ - unsigned long uid; /**< User ID */ - unsigned long magic; /**< Magic */ - unsigned long iocs; /**< Ioctl count */ -}; - -enum drm_stat_type { - _DRM_STAT_LOCK, - _DRM_STAT_OPENS, - _DRM_STAT_CLOSES, - _DRM_STAT_IOCTLS, - _DRM_STAT_LOCKS, - _DRM_STAT_UNLOCKS, - _DRM_STAT_VALUE, /**< Generic value */ - _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ - _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ - - _DRM_STAT_IRQ, /**< IRQ */ - _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ - _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ - _DRM_STAT_DMA, /**< DMA */ - _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ - _DRM_STAT_MISSED /**< Missed DMA opportunity */ - /* Add to the *END* of the list */ -}; - -/* - * DRM_IOCTL_GET_STATS ioctl argument type. - */ -struct drm_stats { - unsigned long count; - struct { - unsigned long value; - enum drm_stat_type type; - } data[15]; -}; - -/* - * Hardware locking flags. - */ -enum drm_lock_flags { - _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ - _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ - _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ - _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ - /* These *HALT* flags aren't supported yet - -- they will be used to support the - full-screen DGA-like mode. */ - _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ - _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ -}; - -/* - * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. - * - * \sa drmGetLock() and drmUnlock(). - */ -struct drm_lock { - int context; - enum drm_lock_flags flags; -}; - -/* - * DMA flags - * - * \warning - * These values \e must match xf86drm.h. - * - * \sa drm_dma. - */ -enum drm_dma_flags { - /* Flags for DMA buffer dispatch */ - _DRM_DMA_BLOCK = 0x01, /**< - * Block until buffer dispatched. - * - * \note The buffer may not yet have - * been processed by the hardware -- - * getting a hardware lock with the - * hardware quiescent will ensure - * that the buffer has been - * processed. - */ - _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ - _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ - - /* Flags for DMA buffer request */ - _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ - _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ - _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ -}; - -/* - * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. - * - * \sa drmAddBufs(). - */ -struct drm_buf_desc { - int count; /**< Number of buffers of this size */ - int size; /**< Size in bytes */ - int low_mark; /**< Low water mark */ - int high_mark; /**< High water mark */ - enum { - _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ - _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ - _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ - _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ - _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ - } flags; - unsigned long agp_start; /**< - * Start address of where the AGP buffers are - * in the AGP aperture - */ -}; - -/* - * DRM_IOCTL_INFO_BUFS ioctl argument type. - */ -struct drm_buf_info { - int count; /**< Entries in list */ - struct drm_buf_desc *list; -}; - -/* - * DRM_IOCTL_FREE_BUFS ioctl argument type. - */ -struct drm_buf_free { - int count; - int *list; -}; - -/* - * Buffer information - * - * \sa drm_buf_map. - */ -struct drm_buf_pub { - int idx; /**< Index into the master buffer list */ - int total; /**< Buffer size */ - int used; /**< Amount of buffer in use (for DMA) */ - void *address; /**< Address of buffer */ -}; - -/* - * DRM_IOCTL_MAP_BUFS ioctl argument type. - */ -struct drm_buf_map { - int count; /**< Length of the buffer list */ -#ifdef __cplusplus - void *virt; -#else - void *virtual; /**< Mmap'd area in user-virtual */ -#endif - struct drm_buf_pub *list; /**< Buffer information */ -}; - -/* - * DRM_IOCTL_DMA ioctl argument type. - * - * Indices here refer to the offset into the buffer list in drm_buf_get. - * - * \sa drmDMA(). - */ -struct drm_dma { - int context; /**< Context handle */ - int send_count; /**< Number of buffers to send */ - int *send_indices; /**< List of handles to buffers */ - int *send_sizes; /**< Lengths of data to send */ - enum drm_dma_flags flags; /**< Flags */ - int request_count; /**< Number of buffers requested */ - int request_size; /**< Desired size for buffers */ - int *request_indices; /**< Buffer information */ - int *request_sizes; - int granted_count; /**< Number of buffers granted */ -}; - -enum drm_ctx_flags { - _DRM_CONTEXT_PRESERVED = 0x01, - _DRM_CONTEXT_2DONLY = 0x02 -}; - -/* - * DRM_IOCTL_ADD_CTX ioctl argument type. - * - * \sa drmCreateContext() and drmDestroyContext(). - */ -struct drm_ctx { - drm_context_t handle; - enum drm_ctx_flags flags; -}; - -/* - * DRM_IOCTL_RES_CTX ioctl argument type. - */ -struct drm_ctx_res { - int count; - struct drm_ctx *contexts; -}; - -/* - * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. - */ -struct drm_draw { - drm_drawable_t handle; -}; - -/* - * DRM_IOCTL_UPDATE_DRAW ioctl argument type. - */ -typedef enum { - DRM_DRAWABLE_CLIPRECTS -} drm_drawable_info_type_t; - -struct drm_update_draw { - drm_drawable_t handle; - unsigned int type; - unsigned int num; - unsigned long long data; -}; - -/* - * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. - */ -struct drm_auth { - drm_magic_t magic; -}; - -/* - * DRM_IOCTL_IRQ_BUSID ioctl argument type. - * - * \sa drmGetInterruptFromBusID(). - */ -struct drm_irq_busid { - int irq; /**< IRQ number */ - int busnum; /**< bus number */ - int devnum; /**< device number */ - int funcnum; /**< function number */ -}; - -enum drm_vblank_seq_type { - _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ - _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ - /* bits 1-6 are reserved for high crtcs */ - _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, - _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ - _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ - _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ - _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ - _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ -}; -#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 - -#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) -#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ - _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) - -struct drm_wait_vblank_request { - enum drm_vblank_seq_type type; - unsigned int sequence; - unsigned long signal; -}; - -struct drm_wait_vblank_reply { - enum drm_vblank_seq_type type; - unsigned int sequence; - long tval_sec; - long tval_usec; -}; - -/* - * DRM_IOCTL_WAIT_VBLANK ioctl argument type. - * - * \sa drmWaitVBlank(). - */ -union drm_wait_vblank { - struct drm_wait_vblank_request request; - struct drm_wait_vblank_reply reply; -}; - -#define _DRM_PRE_MODESET 1 -#define _DRM_POST_MODESET 2 - -/* - * DRM_IOCTL_MODESET_CTL ioctl argument type - * - * \sa drmModesetCtl(). - */ -struct drm_modeset_ctl { - __u32 crtc; - __u32 cmd; -}; - -/* - * DRM_IOCTL_AGP_ENABLE ioctl argument type. - * - * \sa drmAgpEnable(). - */ -struct drm_agp_mode { - unsigned long mode; /**< AGP mode */ -}; - -/* - * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. - * - * \sa drmAgpAlloc() and drmAgpFree(). - */ -struct drm_agp_buffer { - unsigned long size; /**< In bytes -- will round to page boundary */ - unsigned long handle; /**< Used for binding / unbinding */ - unsigned long type; /**< Type of memory to allocate */ - unsigned long physical; /**< Physical used by i810 */ -}; - -/* - * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. - * - * \sa drmAgpBind() and drmAgpUnbind(). - */ -struct drm_agp_binding { - unsigned long handle; /**< From drm_agp_buffer */ - unsigned long offset; /**< In bytes -- will round to page boundary */ -}; - -/* - * DRM_IOCTL_AGP_INFO ioctl argument type. - * - * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), - * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), - * drmAgpVendorId() and drmAgpDeviceId(). - */ -struct drm_agp_info { - int agp_version_major; - int agp_version_minor; - unsigned long mode; - unsigned long aperture_base; /* physical address */ - unsigned long aperture_size; /* bytes */ - unsigned long memory_allowed; /* bytes */ - unsigned long memory_used; - - /* PCI information */ - unsigned short id_vendor; - unsigned short id_device; -}; - -/* - * DRM_IOCTL_SG_ALLOC ioctl argument type. - */ -struct drm_scatter_gather { - unsigned long size; /**< In bytes -- will round to page boundary */ - unsigned long handle; /**< Used for mapping / unmapping */ -}; - -/* - * DRM_IOCTL_SET_VERSION ioctl argument type. - */ -struct drm_set_version { - int drm_di_major; - int drm_di_minor; - int drm_dd_major; - int drm_dd_minor; -}; - -/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ -struct drm_gem_close { - /** Handle of the object to be closed. */ - __u32 handle; - __u32 pad; -}; - -/* DRM_IOCTL_GEM_FLINK ioctl argument type */ -struct drm_gem_flink { - /** Handle for the object being named */ - __u32 handle; - - /** Returned global name */ - __u32 name; -}; - -/* DRM_IOCTL_GEM_OPEN ioctl argument type */ -struct drm_gem_open { - /** Name of object being opened */ - __u32 name; - - /** Returned handle for the object */ - __u32 handle; - - /** Returned size of the object */ - __u64 size; -}; - -/** - * DRM_CAP_DUMB_BUFFER - * - * If set to 1, the driver supports creating dumb buffers via the - * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. - */ -#define DRM_CAP_DUMB_BUFFER 0x1 -/** - * DRM_CAP_VBLANK_HIGH_CRTC - * - * If set to 1, the kernel supports specifying a :ref:`CRTC index` - * in the high bits of &drm_wait_vblank_request.type. - * - * Starting kernel version 2.6.39, this capability is always set to 1. - */ -#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 -/** - * DRM_CAP_DUMB_PREFERRED_DEPTH - * - * The preferred bit depth for dumb buffers. - * - * The bit depth is the number of bits used to indicate the color of a single - * pixel excluding any padding. This is different from the number of bits per - * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per - * pixel. - * - * Note that this preference only applies to dumb buffers, it's irrelevant for - * other types of buffers. - */ -#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 -/** - * DRM_CAP_DUMB_PREFER_SHADOW - * - * If set to 1, the driver prefers userspace to render to a shadow buffer - * instead of directly rendering to a dumb buffer. For best speed, userspace - * should do streaming ordered memory copies into the dumb buffer and never - * read from it. - * - * Note that this preference only applies to dumb buffers, it's irrelevant for - * other types of buffers. - */ -#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 -/** - * DRM_CAP_PRIME - * - * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT - * and &DRM_PRIME_CAP_EXPORT. - * - * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and - * &DRM_PRIME_CAP_EXPORT are always advertised. - * - * PRIME buffers are exposed as dma-buf file descriptors. - * See :ref:`prime_buffer_sharing`. - */ -#define DRM_CAP_PRIME 0x5 -/** - * DRM_PRIME_CAP_IMPORT - * - * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME - * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. - * - * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. - */ -#define DRM_PRIME_CAP_IMPORT 0x1 -/** - * DRM_PRIME_CAP_EXPORT - * - * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME - * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. - * - * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. - */ -#define DRM_PRIME_CAP_EXPORT 0x2 -/** - * DRM_CAP_TIMESTAMP_MONOTONIC - * - * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in - * struct drm_event_vblank. If set to 1, the kernel will report timestamps with - * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these - * clocks. - * - * Starting from kernel version 2.6.39, the default value for this capability - * is 1. Starting kernel version 4.15, this capability is always set to 1. - */ -#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 -/** - * DRM_CAP_ASYNC_PAGE_FLIP - * - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy - * page-flips. - */ -#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 -/** - * DRM_CAP_CURSOR_WIDTH - * - * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid - * width x height combination for the hardware cursor. The intention is that a - * hardware agnostic userspace can query a cursor plane size to use. - * - * Note that the cross-driver contract is to merely return a valid size; - * drivers are free to attach another meaning on top, eg. i915 returns the - * maximum plane size. - */ -#define DRM_CAP_CURSOR_WIDTH 0x8 -/** - * DRM_CAP_CURSOR_HEIGHT - * - * See &DRM_CAP_CURSOR_WIDTH. - */ -#define DRM_CAP_CURSOR_HEIGHT 0x9 -/** - * DRM_CAP_ADDFB2_MODIFIERS - * - * If set to 1, the driver supports supplying modifiers in the - * &DRM_IOCTL_MODE_ADDFB2 ioctl. - */ -#define DRM_CAP_ADDFB2_MODIFIERS 0x10 -/** - * DRM_CAP_PAGE_FLIP_TARGET - * - * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and - * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in - * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP - * ioctl. - */ -#define DRM_CAP_PAGE_FLIP_TARGET 0x11 -/** - * DRM_CAP_CRTC_IN_VBLANK_EVENT - * - * If set to 1, the kernel supports reporting the CRTC ID in - * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and - * &DRM_EVENT_FLIP_COMPLETE events. - * - * Starting kernel version 4.12, this capability is always set to 1. - */ -#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 -/** - * DRM_CAP_SYNCOBJ - * - * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. - */ -#define DRM_CAP_SYNCOBJ 0x13 -/** - * DRM_CAP_SYNCOBJ_TIMELINE - * - * If set to 1, the driver supports timeline operations on sync objects. See - * :ref:`drm_sync_objects`. - */ -#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 -/** - * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP - * - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic - * commits. - */ -#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 - -/* DRM_IOCTL_GET_CAP ioctl argument type */ -struct drm_get_cap { - __u64 capability; - __u64 value; -}; - -/** - * DRM_CLIENT_CAP_STEREO_3D - * - * If set to 1, the DRM core will expose the stereo 3D capabilities of the - * monitor by advertising the supported 3D layouts in the flags of struct - * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. - * - * This capability is always supported for all drivers starting from kernel - * version 3.13. - */ -#define DRM_CLIENT_CAP_STEREO_3D 1 - -/** - * DRM_CLIENT_CAP_UNIVERSAL_PLANES - * - * If set to 1, the DRM core will expose all planes (overlay, primary, and - * cursor) to userspace. - * - * This capability has been introduced in kernel version 3.15. Starting from - * kernel version 3.17, this capability is always supported for all drivers. - */ -#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 - -/** - * DRM_CLIENT_CAP_ATOMIC - * - * If set to 1, the DRM core will expose atomic properties to userspace. This - * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and - * &DRM_CLIENT_CAP_ASPECT_RATIO. - * - * If the driver doesn't support atomic mode-setting, enabling this capability - * will fail with -EOPNOTSUPP. - * - * This capability has been introduced in kernel version 4.0. Starting from - * kernel version 4.2, this capability is always supported for atomic-capable - * drivers. - */ -#define DRM_CLIENT_CAP_ATOMIC 3 - -/** - * DRM_CLIENT_CAP_ASPECT_RATIO - * - * If set to 1, the DRM core will provide aspect ratio information in modes. - * See ``DRM_MODE_FLAG_PIC_AR_*``. - * - * This capability is always supported for all drivers starting from kernel - * version 4.18. - */ -#define DRM_CLIENT_CAP_ASPECT_RATIO 4 - -/** - * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS - * - * If set to 1, the DRM core will expose special connectors to be used for - * writing back to memory the scene setup in the commit. The client must enable - * &DRM_CLIENT_CAP_ATOMIC first. - * - * This capability is always supported for atomic-capable drivers starting from - * kernel version 4.19. - */ -#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 - -/** - * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT - * - * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and - * virtualbox) have additional restrictions for cursor planes (thus - * making cursor planes on those drivers not truly universal,) e.g. - * they need cursor planes to act like one would expect from a mouse - * cursor and have correctly set hotspot properties. - * If this client cap is not set the DRM core will hide cursor plane on - * those virtualized drivers because not setting it implies that the - * client is not capable of dealing with those extra restictions. - * Clients which do set cursor hotspot and treat the cursor plane - * like a mouse cursor should set this property. - * The client must enable &DRM_CLIENT_CAP_ATOMIC first. - * - * Setting this property on drivers which do not special case - * cursor planes (i.e. non-virtualized drivers) will return - * EOPNOTSUPP, which can be used by userspace to gauge - * requirements of the hardware/drivers they're running on. - * - * This capability is always supported for atomic-capable virtualized - * drivers starting from kernel version 6.6. - */ -#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 - -/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ -struct drm_set_client_cap { - __u64 capability; - __u64 value; -}; - -#define DRM_RDWR O_RDWR -#define DRM_CLOEXEC O_CLOEXEC -struct drm_prime_handle { - __u32 handle; - - /** Flags.. only applicable for handle->fd */ - __u32 flags; - - /** Returned dmabuf file descriptor */ - __s32 fd; -}; - -struct drm_syncobj_create { - __u32 handle; -#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) - __u32 flags; -}; - -struct drm_syncobj_destroy { - __u32 handle; - __u32 pad; -}; - -#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) -#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) -struct drm_syncobj_handle { - __u32 handle; - __u32 flags; - - __s32 fd; - __u32 pad; -}; - -struct drm_syncobj_transfer { - __u32 src_handle; - __u32 dst_handle; - __u64 src_point; - __u64 dst_point; - __u32 flags; - __u32 pad; -}; - -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ -struct drm_syncobj_wait { - __u64 handles; - /* absolute timeout */ - __s64 timeout_nsec; - __u32 count_handles; - __u32 flags; - __u32 first_signaled; /* only valid when not waiting all */ - __u32 pad; - /** - * @deadline_nsec - fence deadline hint - * - * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing - * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is - * set. - */ - __u64 deadline_nsec; -}; - -struct drm_syncobj_timeline_wait { - __u64 handles; - /* wait on specific timeline point for every handles*/ - __u64 points; - /* absolute timeout */ - __s64 timeout_nsec; - __u32 count_handles; - __u32 flags; - __u32 first_signaled; /* only valid when not waiting all */ - __u32 pad; - /** - * @deadline_nsec - fence deadline hint - * - * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing - * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is - * set. - */ - __u64 deadline_nsec; -}; - -/** - * struct drm_syncobj_eventfd - * @handle: syncobj handle. - * @flags: Zero to wait for the point to be signalled, or - * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be - * available for the point. - * @point: syncobj timeline point (set to zero for binary syncobjs). - * @fd: Existing eventfd to sent events to. - * @pad: Must be zero. - * - * Register an eventfd to be signalled by a syncobj. The eventfd counter will - * be incremented by one. - */ -struct drm_syncobj_eventfd { - __u32 handle; - __u32 flags; - __u64 point; - __s32 fd; - __u32 pad; -}; - - -struct drm_syncobj_array { - __u64 handles; - __u32 count_handles; - __u32 pad; -}; - -#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */ -struct drm_syncobj_timeline_array { - __u64 handles; - __u64 points; - __u32 count_handles; - __u32 flags; -}; - - -/* Query current scanout sequence number */ -struct drm_crtc_get_sequence { - __u32 crtc_id; /* requested crtc_id */ - __u32 active; /* return: crtc output is active */ - __u64 sequence; /* return: most recent vblank sequence */ - __s64 sequence_ns; /* return: most recent time of first pixel out */ -}; - -/* Queue event to be delivered at specified sequence. Time stamp marks - * when the first pixel of the refresh cycle leaves the display engine - * for the display - */ -#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */ -#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */ - -struct drm_crtc_queue_sequence { - __u32 crtc_id; - __u32 flags; - __u64 sequence; /* on input, target sequence. on output, actual sequence */ - __u64 user_data; /* user data passed to event */ -}; - -#if defined(__cplusplus) -} -#endif - -#include "drm_mode.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -#define DRM_IOCTL_BASE 'd' -#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) -#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) -#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) -#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) - -#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) -#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) -#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) -#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) -#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) -#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) -#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) -#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) -#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) -/** - * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. - * - * GEM handles are not reference-counted by the kernel. User-space is - * responsible for managing their lifetime. For example, if user-space imports - * the same memory object twice on the same DRM file description, the same GEM - * handle is returned by both imports, and user-space needs to ensure - * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen - * when a memory object is allocated, then exported and imported again on the - * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception - * and always returns fresh new GEM handles even if an existing GEM handle - * already refers to the same memory object before the IOCTL is performed. - */ -#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) -#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) -#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) -#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) -#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) - -#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) -#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) -#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) -#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) -#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control) -#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) -#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) -#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc) -#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) -#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) -#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free) - -#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map) - -#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map) -#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) - -#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) -#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) - -#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) -#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) -#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx) -#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) -#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx) -#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx) -#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) -#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) -#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) -#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) -#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock) -#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) -#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) - -/** - * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. - * - * User-space sets &drm_prime_handle.handle with the GEM handle to export and - * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in - * &drm_prime_handle.fd. - * - * The export can fail for any driver-specific reason, e.g. because export is - * not supported for this specific GEM handle (but might be for others). - * - * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. - */ -#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) -/** - * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. - * - * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to - * import, and gets back a GEM handle in &drm_prime_handle.handle. - * &drm_prime_handle.flags is unused. - * - * If an existing GEM handle refers to the memory object backing the DMA-BUF, - * that GEM handle is returned. Therefore user-space which needs to handle - * arbitrary DMA-BUFs must have a user-space lookup data structure to manually - * reference-count duplicated GEM handles. For more information see - * &DRM_IOCTL_GEM_CLOSE. - * - * The import can fail for any driver-specific reason, e.g. because import is - * only supported for DMA-BUFs allocated on this DRM device. - * - * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. - */ -#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) - -#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) -#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31) -#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode) -#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info) -#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) -#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer) -#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding) -#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding) - -#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) -#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather) - -#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) - -#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) -#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) - -#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) - -#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) -#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) -#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) -#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) -#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) -#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) -#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) -#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector) -#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */ -#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */ - -#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) -#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property) -#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) -#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) -#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) -/** - * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. - * - * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL - * argument is a framebuffer object ID. - * - * Warning: removing a framebuffer currently in-use on an enabled plane will - * disable that plane. The CRTC the plane is linked to may also be disabled - * (depending on driver capabilities). - */ -#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) -#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) -#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) - -/** - * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. - * - * KMS dumb buffers provide a very primitive way to allocate a buffer object - * suitable for scanout and map it for software rendering. KMS dumb buffers are - * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb - * buffers are not suitable to be displayed on any other device than the KMS - * device where they were allocated from. Also see - * :ref:`kms_dumb_buffer_objects`. - * - * The IOCTL argument is a struct drm_mode_create_dumb. - * - * User-space is expected to create a KMS dumb buffer via this IOCTL, then add - * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via - * &DRM_IOCTL_MODE_MAP_DUMB. - * - * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. - * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate - * driver preferences for dumb buffers. - */ -#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) -#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) -#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) -#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res) -#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) -#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) -#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) -#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) -#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property) -#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2) -#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic) -#define DRM_IOCTL_MODE_CREATEPROPBLOB DRM_IOWR(0xBD, struct drm_mode_create_blob) -#define DRM_IOCTL_MODE_DESTROYPROPBLOB DRM_IOWR(0xBE, struct drm_mode_destroy_blob) - -#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) -#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) -#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) -#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) -#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) -#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) -#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) - -#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) -#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) -#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) -#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) - -#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) -#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) -#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) -#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) - -/** - * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. - * - * This queries metadata about a framebuffer. User-space fills - * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the - * struct as the output. - * - * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles - * will be filled with GEM buffer handles. Fresh new GEM handles are always - * returned, even if another GEM handle referring to the same memory object - * already exists on the DRM file description. The caller is responsible for - * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same - * new handle will be returned for multiple planes in case they use the same - * memory object. Planes are valid until one has a zero handle -- this can be - * used to compute the number of planes. - * - * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid - * until one has a zero &drm_mode_fb_cmd2.pitches. - * - * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set - * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the - * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. - * - * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space - * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately - * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not - * double-close handles which are specified multiple times in the array. - */ -#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) - -#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) - -/** - * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. - * - * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL - * argument is a framebuffer object ID. - * - * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable - * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept - * alive. When the plane no longer uses the framebuffer (because the - * framebuffer is replaced with another one, or the plane is disabled), the - * framebuffer is cleaned up. - * - * This is useful to implement flicker-free transitions between two processes. - * - * Depending on the threat model, user-space may want to ensure that the - * framebuffer doesn't expose any sensitive user information: closed - * framebuffers attached to a plane can be read back by the next DRM master. - */ -#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) - -/* - * Device specific ioctls should only be in their respective headers - * The device specific ioctl range is from 0x40 to 0x9f. - * Generic IOCTLS restart at 0xA0. - * - * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and - * drmCommandReadWrite(). - */ -#define DRM_COMMAND_BASE 0x40 -#define DRM_COMMAND_END 0xA0 - -/** - * struct drm_event - Header for DRM events - * @type: event type. - * @length: total number of payload bytes (including header). - * - * This struct is a header for events written back to user-space on the DRM FD. - * A read on the DRM FD will always only return complete events: e.g. if the - * read buffer is 100 bytes large and there are two 64 byte events pending, - * only one will be returned. - * - * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and - * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, - * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. - */ -struct drm_event { - __u32 type; - __u32 length; -}; - -/** - * DRM_EVENT_VBLANK - vertical blanking event - * - * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the - * &_DRM_VBLANK_EVENT flag set. - * - * The event payload is a struct drm_event_vblank. - */ -#define DRM_EVENT_VBLANK 0x01 -/** - * DRM_EVENT_FLIP_COMPLETE - page-flip completion event - * - * This event is sent in response to an atomic commit or legacy page-flip with - * the &DRM_MODE_PAGE_FLIP_EVENT flag set. - * - * The event payload is a struct drm_event_vblank. - */ -#define DRM_EVENT_FLIP_COMPLETE 0x02 -/** - * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event - * - * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. - * - * The event payload is a struct drm_event_crtc_sequence. - */ -#define DRM_EVENT_CRTC_SEQUENCE 0x03 - -struct drm_event_vblank { - struct drm_event base; - __u64 user_data; - __u32 tv_sec; - __u32 tv_usec; - __u32 sequence; - __u32 crtc_id; /* 0 on older kernels that do not support this */ -}; - -/* Event delivered at sequence. Time stamp marks when the first pixel - * of the refresh cycle leaves the display engine for the display - */ -struct drm_event_crtc_sequence { - struct drm_event base; - __u64 user_data; - __s64 time_ns; - __u64 sequence; -}; - -/* typedef area */ -typedef struct drm_clip_rect drm_clip_rect_t; -typedef struct drm_drawable_info drm_drawable_info_t; -typedef struct drm_tex_region drm_tex_region_t; -typedef struct drm_hw_lock drm_hw_lock_t; -typedef struct drm_version drm_version_t; -typedef struct drm_unique drm_unique_t; -typedef struct drm_list drm_list_t; -typedef struct drm_block drm_block_t; -typedef struct drm_control drm_control_t; -typedef enum drm_map_type drm_map_type_t; -typedef enum drm_map_flags drm_map_flags_t; -typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; -typedef struct drm_map drm_map_t; -typedef struct drm_client drm_client_t; -typedef enum drm_stat_type drm_stat_type_t; -typedef struct drm_stats drm_stats_t; -typedef enum drm_lock_flags drm_lock_flags_t; -typedef struct drm_lock drm_lock_t; -typedef enum drm_dma_flags drm_dma_flags_t; -typedef struct drm_buf_desc drm_buf_desc_t; -typedef struct drm_buf_info drm_buf_info_t; -typedef struct drm_buf_free drm_buf_free_t; -typedef struct drm_buf_pub drm_buf_pub_t; -typedef struct drm_buf_map drm_buf_map_t; -typedef struct drm_dma drm_dma_t; -typedef union drm_wait_vblank drm_wait_vblank_t; -typedef struct drm_agp_mode drm_agp_mode_t; -typedef enum drm_ctx_flags drm_ctx_flags_t; -typedef struct drm_ctx drm_ctx_t; -typedef struct drm_ctx_res drm_ctx_res_t; -typedef struct drm_draw drm_draw_t; -typedef struct drm_update_draw drm_update_draw_t; -typedef struct drm_auth drm_auth_t; -typedef struct drm_irq_busid drm_irq_busid_t; -typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; - -typedef struct drm_agp_buffer drm_agp_buffer_t; -typedef struct drm_agp_binding drm_agp_binding_t; -typedef struct drm_agp_info drm_agp_info_t; -typedef struct drm_scatter_gather drm_scatter_gather_t; -typedef struct drm_set_version drm_set_version_t; - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/ggml/src/ggml-remotingfrontend/include/drm-uapi/virtgpu_drm.h b/ggml/src/ggml-remotingfrontend/include/drm-uapi/virtgpu_drm.h deleted file mode 100644 index 9debb320c34..00000000000 --- a/ggml/src/ggml-remotingfrontend/include/drm-uapi/virtgpu_drm.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright 2013 Red Hat - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef VIRTGPU_DRM_H -#define VIRTGPU_DRM_H - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/* Please note that modifications to all structs defined here are - * subject to backwards-compatibility constraints. - * - * Do not use pointers, use __u64 instead for 32 bit / 64 bit user/kernel - * compatibility Keep fields aligned to their size - */ - -#define DRM_VIRTGPU_MAP 0x01 -#define DRM_VIRTGPU_EXECBUFFER 0x02 -#define DRM_VIRTGPU_GETPARAM 0x03 -#define DRM_VIRTGPU_RESOURCE_CREATE 0x04 -#define DRM_VIRTGPU_RESOURCE_INFO 0x05 -#define DRM_VIRTGPU_TRANSFER_FROM_HOST 0x06 -#define DRM_VIRTGPU_TRANSFER_TO_HOST 0x07 -#define DRM_VIRTGPU_WAIT 0x08 -#define DRM_VIRTGPU_GET_CAPS 0x09 -#define DRM_VIRTGPU_RESOURCE_CREATE_BLOB 0x0a -#define DRM_VIRTGPU_CONTEXT_INIT 0x0b - -#define VIRTGPU_EXECBUF_FENCE_FD_IN 0x01 -#define VIRTGPU_EXECBUF_FENCE_FD_OUT 0x02 -#define VIRTGPU_EXECBUF_RING_IDX 0x04 -#define VIRTGPU_EXECBUF_FLAGS (\ - VIRTGPU_EXECBUF_FENCE_FD_IN |\ - VIRTGPU_EXECBUF_FENCE_FD_OUT |\ - VIRTGPU_EXECBUF_RING_IDX |\ - 0) - -struct drm_virtgpu_map { - __u64 offset; /* use for mmap system call */ - __u32 handle; - __u32 pad; -}; - -#define VIRTGPU_EXECBUF_SYNCOBJ_RESET 0x01 -#define VIRTGPU_EXECBUF_SYNCOBJ_FLAGS ( \ - VIRTGPU_EXECBUF_SYNCOBJ_RESET | \ - 0) -struct drm_virtgpu_execbuffer_syncobj { - __u32 handle; - __u32 flags; - __u64 point; -}; - -/* fence_fd is modified on success if VIRTGPU_EXECBUF_FENCE_FD_OUT flag is set. */ -struct drm_virtgpu_execbuffer { - __u32 flags; - __u32 size; - __u64 command; /* void* */ - __u64 bo_handles; - __u32 num_bo_handles; - __s32 fence_fd; /* in/out fence fd (see VIRTGPU_EXECBUF_FENCE_FD_IN/OUT) */ - __u32 ring_idx; /* command ring index (see VIRTGPU_EXECBUF_RING_IDX) */ - __u32 syncobj_stride; /* size of @drm_virtgpu_execbuffer_syncobj */ - __u32 num_in_syncobjs; - __u32 num_out_syncobjs; - __u64 in_syncobjs; - __u64 out_syncobjs; -}; - -#define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */ -#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */ -#define VIRTGPU_PARAM_RESOURCE_BLOB 3 /* DRM_VIRTGPU_RESOURCE_CREATE_BLOB */ -#define VIRTGPU_PARAM_HOST_VISIBLE 4 /* Host blob resources are mappable */ -#define VIRTGPU_PARAM_CROSS_DEVICE 5 /* Cross virtio-device resource sharing */ -#define VIRTGPU_PARAM_CONTEXT_INIT 6 /* DRM_VIRTGPU_CONTEXT_INIT */ -#define VIRTGPU_PARAM_SUPPORTED_CAPSET_IDs 7 /* Bitmask of supported capability set ids */ -#define VIRTGPU_PARAM_EXPLICIT_DEBUG_NAME 8 /* Ability to set debug name from userspace */ - -struct drm_virtgpu_getparam { - __u64 param; - __u64 value; -}; - -/* NO_BO flags? NO resource flag? */ -/* resource flag for y_0_top */ -struct drm_virtgpu_resource_create { - __u32 target; - __u32 format; - __u32 bind; - __u32 width; - __u32 height; - __u32 depth; - __u32 array_size; - __u32 last_level; - __u32 nr_samples; - __u32 flags; - __u32 bo_handle; /* if this is set - recreate a new resource attached to this bo ? */ - __u32 res_handle; /* returned by kernel */ - __u32 size; /* validate transfer in the host */ - __u32 stride; /* validate transfer in the host */ -}; - -struct drm_virtgpu_resource_info { - __u32 bo_handle; - __u32 res_handle; - __u32 size; - __u32 blob_mem; -}; - -struct drm_virtgpu_3d_box { - __u32 x; - __u32 y; - __u32 z; - __u32 w; - __u32 h; - __u32 d; -}; - -struct drm_virtgpu_3d_transfer_to_host { - __u32 bo_handle; - struct drm_virtgpu_3d_box box; - __u32 level; - __u32 offset; - __u32 stride; - __u32 layer_stride; -}; - -struct drm_virtgpu_3d_transfer_from_host { - __u32 bo_handle; - struct drm_virtgpu_3d_box box; - __u32 level; - __u32 offset; - __u32 stride; - __u32 layer_stride; -}; - -#define VIRTGPU_WAIT_NOWAIT 1 /* like it */ -struct drm_virtgpu_3d_wait { - __u32 handle; /* 0 is an invalid handle */ - __u32 flags; -}; - -#define VIRTGPU_DRM_CAPSET_VIRGL 1 -#define VIRTGPU_DRM_CAPSET_VIRGL2 2 -#define VIRTGPU_DRM_CAPSET_GFXSTREAM_VULKAN 3 -#define VIRTGPU_DRM_CAPSET_VENUS 4 -#define VIRTGPU_DRM_CAPSET_CROSS_DOMAIN 5 -#define VIRTGPU_DRM_CAPSET_DRM 6 -struct drm_virtgpu_get_caps { - __u32 cap_set_id; - __u32 cap_set_ver; - __u64 addr; - __u32 size; - __u32 pad; -}; - -struct drm_virtgpu_resource_create_blob { -#define VIRTGPU_BLOB_MEM_GUEST 0x0001 -#define VIRTGPU_BLOB_MEM_HOST3D 0x0002 -#define VIRTGPU_BLOB_MEM_HOST3D_GUEST 0x0003 - -#define VIRTGPU_BLOB_FLAG_USE_MAPPABLE 0x0001 -#define VIRTGPU_BLOB_FLAG_USE_SHAREABLE 0x0002 -#define VIRTGPU_BLOB_FLAG_USE_CROSS_DEVICE 0x0004 - /* zero is invalid blob_mem */ - __u32 blob_mem; - __u32 blob_flags; - __u32 bo_handle; - __u32 res_handle; - __u64 size; - - /* - * for 3D contexts with VIRTGPU_BLOB_MEM_HOST3D_GUEST and - * VIRTGPU_BLOB_MEM_HOST3D otherwise, must be zero. - */ - __u32 pad; - __u32 cmd_size; - __u64 cmd; - __u64 blob_id; -}; - -#define VIRTGPU_CONTEXT_PARAM_CAPSET_ID 0x0001 -#define VIRTGPU_CONTEXT_PARAM_NUM_RINGS 0x0002 -#define VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK 0x0003 -#define VIRTGPU_CONTEXT_PARAM_DEBUG_NAME 0x0004 -struct drm_virtgpu_context_set_param { - __u64 param; - __u64 value; -}; - -struct drm_virtgpu_context_init { - __u32 num_params; - __u32 pad; - - /* pointer to drm_virtgpu_context_set_param array */ - __u64 ctx_set_params; -}; - -/* - * Event code that's given when VIRTGPU_CONTEXT_PARAM_POLL_RINGS_MASK is in - * effect. The event size is sizeof(drm_event), since there is no additional - * payload. - */ -#define VIRTGPU_EVENT_FENCE_SIGNALED 0x90000000 - -#define DRM_IOCTL_VIRTGPU_MAP \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_MAP, struct drm_virtgpu_map) - -#define DRM_IOCTL_VIRTGPU_EXECBUFFER \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_EXECBUFFER,\ - struct drm_virtgpu_execbuffer) - -#define DRM_IOCTL_VIRTGPU_GETPARAM \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GETPARAM,\ - struct drm_virtgpu_getparam) - -#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE, \ - struct drm_virtgpu_resource_create) - -#define DRM_IOCTL_VIRTGPU_RESOURCE_INFO \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_INFO, \ - struct drm_virtgpu_resource_info) - -#define DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_FROM_HOST, \ - struct drm_virtgpu_3d_transfer_from_host) - -#define DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_TRANSFER_TO_HOST, \ - struct drm_virtgpu_3d_transfer_to_host) - -#define DRM_IOCTL_VIRTGPU_WAIT \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WAIT, \ - struct drm_virtgpu_3d_wait) - -#define DRM_IOCTL_VIRTGPU_GET_CAPS \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, \ - struct drm_virtgpu_get_caps) - -#define DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_RESOURCE_CREATE_BLOB, \ - struct drm_virtgpu_resource_create_blob) - -#define DRM_IOCTL_VIRTGPU_CONTEXT_INIT \ - DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_CONTEXT_INIT, \ - struct drm_virtgpu_context_init) - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-remotingfrontend/virtgpu.cpp index 763fbaa7ea7..47897527f6c 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu.cpp @@ -256,10 +256,10 @@ static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) { static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) { if (gpu->use_apir_capset) { GGML_LOG_INFO("Using the APIR capset"); - gpu->capset.id = VIRGL_RENDERER_CAPSET_APIR; + gpu->capset.id = VIRTGPU_DRM_CAPSET_APIR; } else { GGML_LOG_INFO("Using the Venus capset"); - gpu->capset.id = VIRGL_RENDERER_CAPSET_VENUS; + gpu->capset.id = VIRTGPU_DRM_CAPSET_VENUS; } gpu->capset.version = 0; diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.h b/ggml/src/ggml-remotingfrontend/virtgpu.h index 00f0a820b14..b3dccd4de19 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu.h @@ -19,23 +19,26 @@ #define VIRGL_RENDERER_UNSTABLE_APIS 1 #include "apir_hw.h" -#include "drm-uapi/virtgpu_drm.h" +#include #include "venus_hw.h" -// must match https://gitlab.freedesktop.org/kpouget/virglrenderer/-/blob/main/src/virglrenderer_hw.h?ref_type=heads -enum virgl_renderer_capset { - VIRGL_RENDERER_CAPSET_VIRGL = 1, - VIRGL_RENDERER_CAPSET_VIRGL2 = 2, - /* 3 is reserved for gfxstream */ - VIRGL_RENDERER_CAPSET_VENUS = 4, - /* 5 is reserved for cross-domain */ - VIRGL_RENDERER_CAPSET_DRM = 6, - - VIRGL_RENDERER_CAPSET_APIR = 10, -}; +#ifndef VIRTGPU_DRM_CAPSET_APIR +// Will be defined include/drm/virtgpu_drm.h when +// https://gitlab.freedesktop.org/virgl/virglrenderer/-/merge_requests/1590/diffs +// is merged +#define VIRTGPU_DRM_CAPSET_APIR 10 +#endif +// Mesa/Virlgrenderer Venus internal. Only necessary during the +// Venus->APIR transition in Virglrenderer #define VENUS_COMMAND_TYPE_LENGTH 331 +#ifndef VIRTGPU_DRM_CAPSET_VENUS // only available with Linux >= v6.16 +#define VIRTGPU_DRM_CAPSET_VENUS 4 +#endif + +typedef uint32_t virgl_renderer_capset; + /* from src/virtio/vulkan/vn_renderer_virtgpu.c */ #define VIRTGPU_PCI_VENDOR_ID 0x1af4 #define VIRTGPU_PCI_DEVICE_ID 0x1050 From 700884b57c21d2ca334d7db36c024d62057ce06b Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 13 Jan 2026 09:55:36 +0100 Subject: [PATCH 14/37] Make sure that the LOG messages end with EOL --- .../backend-dispatched-backend.cpp | 4 +- .../backend-dispatched-buffer.cpp | 8 +-- .../backend-dispatched-device.cpp | 2 +- .../backend-dispatched.cpp | 6 +- ggml/src/ggml-remotingbackend/backend.cpp | 18 ++--- .../src/ggml-remotingbackend/shared/apir_cs.h | 10 +-- .../shared/apir_cs_ggml.h | 2 +- .../apir_cs_ggml-rpc-front.cpp | 2 +- .../ggml-backend-reg.cpp | 12 ++-- .../virtgpu-forward-backend.cpp | 2 +- .../virtgpu-forward-buffer-type.cpp | 2 +- .../virtgpu-forward-buffer.cpp | 4 +- .../virtgpu-forward-device.cpp | 6 +- .../virtgpu-forward-impl.h | 8 +-- ggml/src/ggml-remotingfrontend/virtgpu.cpp | 65 +++++++++---------- 15 files changed, 75 insertions(+), 76 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp index f61b113b2e5..595d60c6f78 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp @@ -27,7 +27,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v const void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - GGML_LOG_ERROR("Couldn't get the shmem addr from virgl"); + GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); apir_decoder_set_fatal(dec); return 1; } @@ -45,7 +45,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v if (dev->iface.supports_op(dev, op)) { continue; } - GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend :/", idx, ggml_op_desc(op)); + GGML_LOG_ERROR("Graph node %d (%s) not supported by the backend\n", idx, ggml_op_desc(op)); status = GGML_STATUS_ABORTED; apir_encode_ggml_status(enc, &status); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp index de68e583ddf..87dc74294cc 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp @@ -40,7 +40,7 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - GGML_LOG_ERROR("Couldn't get the shmem addr from virgl :/"); + GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); return 1; } @@ -71,7 +71,7 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_data) { - GGML_LOG_ERROR("Couldn't get the shmem addr from virgl :/"); + GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); return 1; } @@ -85,7 +85,7 @@ uint32_t backend_buffer_cpy_tensor(apir_encoder * enc, apir_decoder * dec, virgl ggml_backend_buffer_t buffer; buffer = apir_decode_ggml_buffer(dec); - GGML_LOG_INFO("%s <---->", __func__); + const ggml_tensor * src; // safe to remove the const qualifier here src = apir_decode_ggml_tensor(dec); @@ -121,7 +121,7 @@ uint32_t backend_buffer_free_buffer(apir_encoder * enc, apir_decoder * dec, virg buffer = apir_decode_ggml_buffer(dec); if (!apir_untrack_backend_buffer(buffer)) { - GGML_LOG_WARN("%s: unknown buffer %p", __func__, (void *) buffer); + GGML_LOG_WARN("%s: unknown buffer %p\n", __func__, (void *) buffer); return 1; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp index c314c2a30bf..e91f3f98fef 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp @@ -124,7 +124,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, void * shmem_ptr = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); if (!shmem_ptr) { - GGML_LOG_ERROR("Couldn't get the shmem addr from virgl"); + GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); apir_decoder_set_fatal(dec); return 1; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp index 792443c9a95..17b988f3b41 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp @@ -17,14 +17,14 @@ long long timer_count = 0; uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_backend_init_fct_p) { if (reg != NULL) { - GGML_LOG_WARN("%s: already initialized :/", __func__); + GGML_LOG_WARN("%s: already initialized\n", __func__); return APIR_BACKEND_INITIALIZE_ALREADY_INITED; } ggml_backend_reg_t (*ggml_backend_reg_fct)(void) = (ggml_backend_reg_t (*)()) ggml_backend_reg_fct_p; reg = ggml_backend_reg_fct(); if (reg == NULL) { - GGML_LOG_ERROR("%s: backend registration failed :/", __func__); + GGML_LOG_ERROR("%s: backend registration failed\n", __func__); return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED; } @@ -36,7 +36,7 @@ uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_ bck = ggml_backend_fct(0); if (!bck) { - GGML_LOG_ERROR("%s: backend initialization failed :/", __func__); + GGML_LOG_ERROR("%s: backend initialization failed\n", __func__); return APIR_BACKEND_INITIALIZE_BACKEND_FAILED; } diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index e4069101cdd..d94c24c2a20 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -35,11 +35,11 @@ void apir_backend_deinit(void) { if (dev) { size_t free, total; dev->iface.get_memory(dev, &free, &total); - GGML_LOG_INFO("%s: free memory: %ld MB", __func__, (size_t) free / 1024 / 1024); + GGML_LOG_INFO("%s: free memory: %ld MB\n", __func__, (size_t) free / 1024 / 1024); } if (backend_library_handle) { - GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.", __func__); + GGML_LOG_INFO("%s: The GGML backend library was loaded. Unloading it.\n", __func__); dlclose(backend_library_handle); backend_library_handle = NULL; } @@ -59,14 +59,14 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { if (apir_logfile) { ggml_log_set(log_to_file_callback, apir_logfile); } else { - GGML_LOG_INFO("Could not open the log file at '%s'", apir_log_to_file); + GGML_LOG_INFO("Could not open the log file at '%s'\n", apir_log_to_file); } } const char * library_name = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); const char * library_reg = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); const char * library_init = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV); - GGML_LOG_INFO("%s: loading %s (%s|%s)", __func__, library_name, library_reg, library_init); + GGML_LOG_INFO("%s: loading %s (%s|%s)\n", __func__, library_name, library_reg, library_init); if (!library_name) { GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); @@ -77,13 +77,13 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { backend_library_handle = dlopen(library_name, RTLD_LAZY); if (!backend_library_handle) { - GGML_LOG_ERROR("cannot open the GGML library: %s", dlerror()); + GGML_LOG_ERROR("cannot open the GGML library: %s\n", dlerror()); return APIR_LOAD_LIBRARY_CANNOT_OPEN; } if (!library_reg) { - GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); + GGML_LOG_ERROR("cannot register the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -91,14 +91,14 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { void * ggml_backend_reg_fct = dlsym(backend_library_handle, library_reg); dlsym_error = dlerror(); if (dlsym_error) { - GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s", library_reg, + GGML_LOG_ERROR("cannot find the GGML backend registration symbol '%s' (from %s): %s\n", library_reg, APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV, dlsym_error); return APIR_LOAD_LIBRARY_SYMBOL_MISSING; } if (!library_init) { - GGML_LOG_ERROR("cannot initialize the GGML library: env var '%s' not defined", library_init); + GGML_LOG_ERROR("cannot initialize the GGML library: env var '%s' not defined\n", library_init); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } @@ -106,7 +106,7 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { void * ggml_backend_init_fct = dlsym(backend_library_handle, library_init); dlsym_error = dlerror(); if (dlsym_error) { - GGML_LOG_ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s", library_init, + GGML_LOG_ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s\n", library_init, APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV, dlsym_error); return APIR_LOAD_LIBRARY_SYMBOL_MISSING; diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs.h b/ggml/src/ggml-remotingbackend/shared/apir_cs.h index c8e8f96f86c..27a61091ffd 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs.h @@ -86,7 +86,7 @@ static inline bool apir_decoder_peek_internal(apir_decoder * dec, assert(val_size <= size); if (unlikely(size > (size_t) (dec->end - dec->cur))) { - GGML_LOG_ERROR("reading too much from the decoder ..."); + GGML_LOG_ERROR("reading too much from the decoder ...\n"); apir_decoder_set_fatal(dec); memset(val, 0, val_size); return false; @@ -103,7 +103,7 @@ static inline void apir_decoder_peek(apir_decoder * dec, size_t size, void * val static inline const void * apir_decoder_use_inplace(apir_decoder * dec, size_t size) { if (unlikely(size > (size_t) (dec->end - dec->cur))) { - GGML_LOG_ERROR("reading too much from the decoder ..."); + GGML_LOG_ERROR("reading too much from the decoder ...\n"); apir_decoder_set_fatal(dec); return NULL; } @@ -221,7 +221,7 @@ static inline uint64_t apir_decode_array_size(apir_decoder * dec, uint64_t expec uint64_t size; apir_decode_uint64_t(dec, &size); if (size != expected_size) { - GGML_LOG_ERROR("Couldn't decode array from the decoder"); + GGML_LOG_ERROR("Couldn't decode array from the decoder\n"); apir_decoder_set_fatal(dec); size = 0; } @@ -322,7 +322,7 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t if (size) { val[size - 1] = '\0'; } else { - GGML_LOG_ERROR("Couldn't decode the blog array"); + GGML_LOG_ERROR("Couldn't decode the blog array\n"); apir_decoder_set_fatal(dec); } } @@ -332,7 +332,7 @@ static inline void apir_decode_char_array(apir_decoder * dec, char * val, size_t static inline void * apir_decoder_alloc_array(size_t size, size_t count) { size_t alloc_size; if (unlikely(__builtin_mul_overflow(size, count, &alloc_size))) { - GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes", size, count); + GGML_LOG_ERROR("overflow in array allocation of %zu * %zu bytes\n", size, count); return NULL; } diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h index 40898807446..070c3b25fb1 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h @@ -160,7 +160,7 @@ static inline void apir_encode_ggml_tensor_inline(apir_encoder * enc, const ggml if (tensor->src[0] && tensor->buffer) { static int first = 1; if (first) { - GGML_LOG_WARN("Cannot pass tensors with src and buffer"); + GGML_LOG_WARN("Cannot pass tensors with src and buffer\n"); first = 0; } } diff --git a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp b/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp index 2b01ffd3e67..7def3d1a621 100644 --- a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp +++ b/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp @@ -36,7 +36,7 @@ apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) { result.data = reinterpret_cast(tensor->data); if (tensor->data) { if (!tensor->buffer) { - GGML_ABORT("tensor has data but not buffer :/"); + GGML_ABORT("tensor has data but not buffer"); } // tensor->data is serialized as an offset to the buffer base address result.data -= reinterpret_cast(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base); diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp index 1e246506741..2bf8de770cf 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp @@ -13,7 +13,7 @@ static virtgpu * apir_initialize() { apir_gpu_instance = create_virtgpu(); if (!apir_gpu_instance) { - GGML_ABORT("failed to initialize the virtgpu :/"); + GGML_ABORT("failed to initialize the virtgpu"); } apir_initialized = true; @@ -24,7 +24,7 @@ static virtgpu * apir_initialize() { static int ggml_backend_remoting_get_device_count() { virtgpu * gpu = apir_initialize(); if (!gpu) { - GGML_LOG_WARN("apir_initialize failed :/"); + GGML_LOG_WARN("apir_initialize failed\n"); return 0; } @@ -46,13 +46,13 @@ ggml_backend_dev_t ggml_backend_remoting_get_device(size_t device) { static void ggml_backend_remoting_reg_init_devices(ggml_backend_reg_t reg) { if (devices.size() > 0) { - GGML_LOG_INFO("%s: already initialized", __func__); + GGML_LOG_INFO("%s: already initialized\n", __func__); return; } virtgpu * gpu = apir_initialize(); if (!gpu) { - GGML_LOG_ERROR("apir_initialize failed :/"); + GGML_LOG_ERROR("apir_initialize failed\n"); return; } @@ -105,7 +105,7 @@ static const ggml_backend_reg_i ggml_backend_remoting_reg_i = { ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { virtgpu * gpu = apir_initialize(); if (!gpu) { - GGML_LOG_ERROR("apir_initialize failed :/"); + GGML_LOG_ERROR("apir_initialize failed\n"); return NULL; } @@ -123,7 +123,7 @@ ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { ggml_backend_remoting_reg_init_devices(®); - GGML_LOG_INFO("%s: initialzed", __func__); + GGML_LOG_INFO("%s: initialzed\n", __func__); return ® } diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp index df7070edfdb..bf3c41011ac 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp @@ -23,7 +23,7 @@ ggml_status apir_backend_graph_compute(virtgpu * gpu, ggml_cgraph * cgraph) { // prefer the init-time allocated page, if large enough shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, cgraph_size, shmem)) { - GGML_ABORT("Couldn't allocate the guest-host shared buffer :/"); + GGML_ABORT("Couldn't allocate the guest-host shared buffer"); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp index 34ad26fcd2d..03cb09e0643 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp @@ -14,7 +14,7 @@ const char * apir_buffer_type_get_name(virtgpu * gpu, ggml_backend_buffer_type_t const size_t string_size = apir_decode_array_size_unchecked(decoder); char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - GGML_LOG_ERROR("%s: Could not allocate the device name buffer", __func__); + GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__); apir_decoder_set_fatal(decoder); } apir_decode_char_array(decoder, string, string_size); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp index 716459c8127..bea349a9032 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp @@ -42,7 +42,7 @@ void apir_buffer_set_tensor(virtgpu * gpu, shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { - GGML_ABORT("Couldn't allocate the guest-host shared buffer :/"); + GGML_ABORT("Couldn't allocate the guest-host shared buffer"); } memcpy(shmem->mmap_ptr, data, size); @@ -96,7 +96,7 @@ void apir_buffer_get_tensor(virtgpu * gpu, shmem = &gpu->data_shmem; } else if (virtgpu_shmem_create(gpu, size, shmem)) { - GGML_ABORT("Couldn't allocate the guest-host shared buffer :/"); + GGML_ABORT("Couldn't allocate the guest-host shared buffer"); } apir_encode_virtgpu_shmem_res_id(encoder, shmem->res_id); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp index 88b744572ee..3e45e55bdcb 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp @@ -36,7 +36,7 @@ const char * apir_device_get_name(virtgpu * gpu) { const size_t string_size = apir_decode_array_size_unchecked(decoder); string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - GGML_LOG_ERROR("%s: Could not allocate the device name buffer", __func__); + GGML_LOG_ERROR("%s: Could not allocate the device name buffer\n", __func__); return NULL; } apir_decode_char_array(decoder, string, string_size); @@ -58,7 +58,7 @@ const char * apir_device_get_description(virtgpu * gpu) { const size_t string_size = apir_decode_array_size_unchecked(decoder); char * string = (char *) apir_decoder_alloc_array(sizeof(char), string_size); if (!string) { - GGML_LOG_ERROR("%s: Could not allocate the device description buffer", __func__); + GGML_LOG_ERROR("%s: Could not allocate the device description buffer\n", __func__); return NULL; } @@ -181,7 +181,7 @@ apir_buffer_context_t apir_device_buffer_from_ptr(virtgpu * gpu, size_t size, si REMOTE_CALL_PREPARE(gpu, encoder, APIR_COMMAND_TYPE_DEVICE_BUFFER_FROM_PTR); if (virtgpu_shmem_create(gpu, size, &buffer_context.shmem)) { - GGML_ABORT("Couldn't allocate the guest-host shared buffer :/"); + GGML_ABORT("Couldn't allocate the guest-host shared buffer"); } apir_encode_virtgpu_shmem_res_id(encoder, buffer_context.shmem.res_id); diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h index a86e4812657..c4e7bc5bd2d 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h @@ -11,7 +11,7 @@ int32_t forward_flag = (int32_t) apir_command_type__; \ encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_Forward, forward_flag); \ if (!encoder_name) { \ - GGML_ABORT("%s: failed to prepare the remote call encoder :/", __func__); \ + GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); \ } \ } while (0) @@ -19,11 +19,11 @@ do { \ ret_name = (ApirForwardReturnCode) remote_call(gpu_dev_name, encoder_name, &decoder_name, 0, NULL); \ if (!decoder_name) { \ - GGML_ABORT("%s: failed to kick the remote call :/", __func__); \ + GGML_ABORT("%s: failed to kick the remote call", __func__); \ } \ if (ret_name < APIR_FORWARD_BASE_INDEX) { \ - GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__, apir_forward_error(ret_name), \ - ret_name); \ + GGML_ABORT("%s: failed to forward the API call: %s: code %d", __func__, \ + apir_forward_error(ret_name), ret_name); \ } \ ret_name = (ApirForwardReturnCode) (ret_name - APIR_FORWARD_BASE_INDEX); \ } while (0) diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-remotingfrontend/virtgpu.cpp index 47897527f6c..ca04aa09e0e 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu.cpp @@ -33,7 +33,7 @@ static int virtgpu_handshake(virtgpu * gpu) { encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HandShake, 0); if (!encoder) { - GGML_ABORT("%s: failed to prepare the remote call encoder :/", __func__); + GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); return 1; } @@ -65,7 +65,7 @@ static int virtgpu_handshake(virtgpu * gpu) { uint32_t host_minor; if (ret_magic != APIR_HANDSHAKE_MAGIC) { - GGML_ABORT("%s: handshake with the virglrenderer failed (code=%d | %s):/", __func__, ret_magic, + GGML_ABORT("%s: handshake with the virglrenderer failed (code=%d | %s)", __func__, ret_magic, apir_backend_initialize_error(ret_magic)); } else { apir_decode_uint32_t(decoder, &host_major); @@ -78,13 +78,13 @@ static int virtgpu_handshake(virtgpu * gpu) { return 1; } - GGML_LOG_INFO("%s: Guest is running with %u.%u", __func__, guest_major, guest_minor); - GGML_LOG_INFO("%s: Host is running with %u.%u", __func__, host_major, host_minor); + GGML_LOG_INFO("%s: Guest is running with %u.%u\n", __func__, guest_major, guest_minor); + GGML_LOG_INFO("%s: Host is running with %u.%u\n", __func__, host_major, host_minor); if (guest_major != host_major) { - GGML_LOG_ERROR("Host major (%d) and guest major (%d) version differ", host_major, guest_major); + GGML_LOG_ERROR("Host major (%d) and guest major (%d) version differ\n", host_major, guest_major); } else if (guest_minor != host_minor) { - GGML_LOG_WARN("Host minor (%d) and guest minor (%d) version differ", host_minor, guest_minor); + GGML_LOG_WARN("Host minor (%d) and guest minor (%d) version differ\n", host_minor, guest_minor); } return 0; @@ -97,7 +97,7 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LoadLibrary, 0); if (!encoder) { - GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder :/", __func__); + GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder", __func__); return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; } @@ -108,14 +108,14 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { log_call_duration(call_duration_ns, "API Remoting LoadLibrary"); if (!decoder) { - GGML_ABORT("%s: hypercall error: failed to kick the API remoting hypercall. :/", __func__); + GGML_ABORT("%s: hypercall error: failed to kick the API remoting hypercall.\n", __func__); return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; } remote_call_finish(gpu, encoder, decoder); if (ret == APIR_LOAD_LIBRARY_SUCCESS) { - GGML_LOG_INFO("%s: The API Remoting backend was successfully loaded and initialized", __func__); + GGML_LOG_INFO("%s: The API Remoting backend was successfully loaded and initialized\n", __func__); return ret; } @@ -133,11 +133,11 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { ApirLoadLibraryReturnCode apir_ret = (ApirLoadLibraryReturnCode) (ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX); if (apir_ret < APIR_LOAD_LIBRARY_INIT_BASE_INDEX) { - GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s):/", + GGML_ABORT("%s: the API Remoting backend library couldn't load the backend library: apir code=%d | %s)", __func__, apir_ret, apir_load_library_error(apir_ret)); } else { uint32_t lib_ret = apir_ret - APIR_LOAD_LIBRARY_INIT_BASE_INDEX; - GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d):/", __func__, + GGML_ABORT("%s: the API Remoting backend library initialize its backend library: apir code=%d)", __func__, lib_ret); } return ret; @@ -147,41 +147,40 @@ virtgpu * create_virtgpu() { virtgpu * gpu = new virtgpu(); gpu->use_apir_capset = getenv("GGML_REMOTING_USE_APIR_CAPSET") != nullptr; - util_sparse_array_init(&gpu->shmem_array, sizeof(virtgpu_shmem), 1024); if (virtgpu_open(gpu) != APIR_SUCCESS) { - GGML_ABORT("%s: failed to open the virtgpu device :/", __func__); + GGML_ABORT("%s: failed to open the virtgpu device", __func__); return NULL; } if (virtgpu_init_capset(gpu) != APIR_SUCCESS) { - GGML_ABORT("%s: failed to initialize the GPU capset :/", __func__); + GGML_ABORT("%s: failed to initialize the GPU capset", __func__); return NULL; } if (virtgpu_init_context(gpu) != APIR_SUCCESS) { - GGML_ABORT("%s: failed to initialize the GPU context :/", __func__); + GGML_ABORT("%s: failed to initialize the GPU context", __func__); return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_REPLY_SIZE, &gpu->reply_shmem)) { - GGML_ABORT("%s: failed to create the shared reply memory pages :/", __func__); + GGML_ABORT("%s: failed to create the shared reply memory pages", __func__); return NULL; } if (virtgpu_shmem_create(gpu, SHMEM_DATA_SIZE, &gpu->data_shmem)) { - GGML_ABORT("%s: failed to create the shared data memory pages :/", __func__); + GGML_ABORT("%s: failed to create the shared data memory pages", __func__); return NULL; } if (virtgpu_handshake(gpu)) { - GGML_ABORT("%s: failed to handshake with the virglrenderer library :/", __func__); + GGML_ABORT("%s: failed to handshake with the virglrenderer library", __func__); return NULL; } if (virtgpu_load_library(gpu) != APIR_LOAD_LIBRARY_SUCCESS) { - GGML_ABORT("%s: failed to load the backend library :/", __func__); + GGML_ABORT("%s: failed to load the backend library", __func__); return NULL; } @@ -192,7 +191,7 @@ static virt_gpu_result_t virtgpu_open(virtgpu * gpu) { drmDevicePtr devs[8]; int count = drmGetDevices2(0, devs, ARRAY_SIZE(devs)); if (count < 0) { - GGML_LOG_ERROR("%s: failed to enumerate DRM devices", __func__); + GGML_LOG_ERROR("%s: failed to enumerate DRM devices\n", __func__); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -237,7 +236,7 @@ static virt_gpu_result_t virtgpu_open_device(virtgpu * gpu, const drmDevicePtr d drmFreeVersion(version); - GGML_LOG_INFO("using DRM device %s", node_path); + GGML_LOG_INFO("using DRM device %s\n", node_path); return APIR_SUCCESS; } @@ -246,7 +245,7 @@ static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) { assert(!gpu->capset.version); const int ret = virtgpu_ioctl_context_init(gpu, gpu->capset.id); if (ret) { - GGML_LOG_INFO("failed to initialize context: %s", strerror(errno)); + GGML_LOG_INFO("failed to initialize context: %s\n", strerror(errno)); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -255,10 +254,10 @@ static virt_gpu_result_t virtgpu_init_context(virtgpu * gpu) { static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) { if (gpu->use_apir_capset) { - GGML_LOG_INFO("Using the APIR capset"); + GGML_LOG_INFO("Using the APIR capset\n"); gpu->capset.id = VIRTGPU_DRM_CAPSET_APIR; } else { - GGML_LOG_INFO("Using the Venus capset"); + GGML_LOG_INFO("Using the Venus capset\n"); gpu->capset.id = VIRTGPU_DRM_CAPSET_VENUS; } gpu->capset.version = 0; @@ -267,7 +266,7 @@ static virt_gpu_result_t virtgpu_init_capset(virtgpu * gpu) { virtgpu_ioctl_get_caps(gpu, gpu->capset.id, gpu->capset.version, &gpu->capset.data, sizeof(gpu->capset.data)); if (ret) { - GGML_LOG_INFO("failed to get APIR v%d capset: %s", gpu->capset.version, strerror(errno)); + GGML_LOG_INFO("failed to get APIR v%d capset: %s\n", gpu->capset.version, strerror(errno)); return APIR_ERROR_INITIALIZATION_FAILED; } @@ -370,19 +369,19 @@ void remote_call_finish(virtgpu * gpu, apir_encoder * enc, apir_decoder * dec) { UNUSED(gpu); if (!enc) { - GGML_LOG_ERROR("Invalid (null) encoder :/"); + GGML_LOG_ERROR("Invalid (null) encoder\n"); } if (!dec) { - GGML_LOG_ERROR("Invalid (null) decoder :/"); + GGML_LOG_ERROR("Invalid (null) decoder\n"); } if (apir_encoder_get_fatal(enc)) { - GGML_LOG_ERROR("Failed to encode the output parameters."); + GGML_LOG_ERROR("Failed to encode the output parameters.\n"); } if (apir_decoder_get_fatal(dec)) { - GGML_LOG_ERROR("Failed to decode the input parameters."); + GGML_LOG_ERROR("Failed to decode the input parameters.\n"); } } @@ -468,7 +467,7 @@ uint32_t remote_call(virtgpu * gpu, } if (max_wait_ms && timedout) { - GGML_LOG_ERROR("timed out waiting for the host answer..."); + GGML_LOG_ERROR("timed out waiting for the host answer...\n"); return APIR_FORWARD_TIMEOUT; } @@ -490,10 +489,10 @@ static void log_call_duration(long long call_duration_ns, const char * name) { double call_duration_s = (double) call_duration_ns / 1e9; // 1 second = 1e9 nanoseconds if (call_duration_s > 1) { - GGML_LOG_INFO("%s: waited %.2fs for the %s host reply...", __func__, call_duration_s, name); + GGML_LOG_INFO("%s: waited %.2fs for the %s host reply...\n", __func__, call_duration_s, name); } else if (call_duration_ms > 1) { - GGML_LOG_INFO("%s: waited %.2fms for the %s host reply...", __func__, call_duration_ms, name); + GGML_LOG_INFO("%s: waited %.2fms for the %s host reply...\n", __func__, call_duration_ms, name); } else { - GGML_LOG_INFO("%s: waited %lldns for the %s host reply...", __func__, call_duration_ns, name); + GGML_LOG_INFO("%s: waited %lldns for the %s host reply...\n", __func__, call_duration_ns, name); } } From d160c793375a5f39c2b4716b15bcbb024248b617 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 13 Jan 2026 14:07:13 +0100 Subject: [PATCH 15/37] Cleanup the CMakeLists --- ggml/src/ggml-remotingbackend/CMakeLists.txt | 3 +-- ggml/src/ggml-remotingfrontend/CMakeLists.txt | 9 ++------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/CMakeLists.txt b/ggml/src/ggml-remotingbackend/CMakeLists.txt index d01ec8b1b0f..9989f3e8c93 100644 --- a/ggml/src/ggml-remotingbackend/CMakeLists.txt +++ b/ggml/src/ggml-remotingbackend/CMakeLists.txt @@ -13,7 +13,6 @@ ggml_add_backend_library(ggml-remotingbackend shared/api_remoting.h shared/apir_backend.h shared/apir_cs.h - apir_cs_ggml-rpc-back.cpp - ) + apir_cs_ggml-rpc-back.cpp) target_compile_options(ggml-remotingbackend PRIVATE -std=c++20) diff --git a/ggml/src/ggml-remotingfrontend/CMakeLists.txt b/ggml/src/ggml-remotingfrontend/CMakeLists.txt index a4a7b17d6ce..f347fa0f6ea 100644 --- a/ggml/src/ggml-remotingfrontend/CMakeLists.txt +++ b/ggml/src/ggml-remotingfrontend/CMakeLists.txt @@ -23,14 +23,9 @@ ggml_add_backend_library(ggml-remotingfrontend virtgpu-forward-backend.cpp virtgpu-forward-impl.h apir_cs_ggml-rpc-front.cpp - ../../include/ggml-remoting-frontend.h - ) + ../../include/ggml-remoting-frontend.h) - # Debug: Show what pkg-config found - message(STATUS "DRM_INCLUDE_DIRS: ${DRM_INCLUDE_DIRS}") - message(STATUS "DRM_LIBRARIES: ${DRM_LIBRARIES}") - message(STATUS "DRM_CFLAGS_OTHER: ${DRM_CFLAGS_OTHER}") - target_include_directories(ggml-remotingfrontend PUBLIC /usr/include/libdrm/) +target_include_directories(ggml-remotingfrontend PUBLIC /usr/include/libdrm/) target_link_libraries(ggml-remotingfrontend PUBLIC ${DRM_LIBRARIES}) target_include_directories(ggml-remotingfrontend PUBLIC ${DRM_INCLUDE_DIRS}) From e9a469bee90bcac64585221175c4fc01806d12cb Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 13 Jan 2026 14:07:28 +0100 Subject: [PATCH 16/37] Use uint64_t instead of long long --- ggml/src/ggml-remotingbackend/backend-dispatched.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp index 17b988f3b41..08cb5193f28 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp @@ -11,9 +11,9 @@ ggml_backend_reg_t reg = NULL; ggml_backend_dev_t dev = NULL; ggml_backend_t bck = NULL; -long long timer_start = 0; -long long timer_total = 0; -long long timer_count = 0; +uint64_t timer_start = 0; +uint64_t timer_total = 0; +uint64_t timer_count = 0; uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_backend_init_fct_p) { if (reg != NULL) { From ab4d5cccf16b52a9ef33f732cd24f2c237de9fdd Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 13 Jan 2026 14:07:53 +0100 Subject: [PATCH 17/37] use (full) upper case for constants --- .../src/ggml-remotingbackend/shared/api_remoting.h | 14 +++++++------- .../ggml-remotingfrontend/virtgpu-forward-impl.h | 2 +- ggml/src/ggml-remotingfrontend/virtgpu.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/shared/api_remoting.h b/ggml/src/ggml-remotingbackend/shared/api_remoting.h index 4c9109b17f9..f19a5d12d17 100644 --- a/ggml/src/ggml-remotingbackend/shared/api_remoting.h +++ b/ggml/src/ggml-remotingbackend/shared/api_remoting.h @@ -12,9 +12,9 @@ #define APIR_HANDSHAKE_MAGIC 0xab1e enum ApirCommandType { - APIR_COMMAND_TYPE_HandShake = 0, - APIR_COMMAND_TYPE_LoadLibrary = 1, - APIR_COMMAND_TYPE_Forward = 2, + APIR_COMMAND_TYPE_HANDSHAKE = 0, + APIR_COMMAND_TYPE_LOADLIBRARY = 1, + APIR_COMMAND_TYPE_FORWARD = 2, APIR_COMMAND_TYPE_LENGTH = 3, }; @@ -41,11 +41,11 @@ enum ApirForwardReturnCode { __attribute__((unused)) static inline const char * apir_command_name(ApirCommandType type) { switch (type) { - case APIR_COMMAND_TYPE_HandShake: + case APIR_COMMAND_TYPE_HANDSHAKE: return "HandShake"; - case APIR_COMMAND_TYPE_LoadLibrary: + case APIR_COMMAND_TYPE_LOADLIBRARY: return "LoadLibrary"; - case APIR_COMMAND_TYPE_Forward: + case APIR_COMMAND_TYPE_FORWARD: return "Forward"; default: return "unknown"; @@ -84,7 +84,7 @@ __attribute__((unused)) static const char * apir_forward_error(ApirForwardReturn APIR_FORWARD_ERROR(APIR_FORWARD_TIMEOUT); APIR_FORWARD_ERROR(APIR_FORWARD_BASE_INDEX); - return "Unknown APIR_COMMAND_TYPE_Forward error"; + return "Unknown APIR_COMMAND_TYPE_FORWARD error"; #undef APIR_FORWARD_ERROR } diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h index c4e7bc5bd2d..72e666e36a1 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h +++ b/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h @@ -9,7 +9,7 @@ #define REMOTE_CALL_PREPARE(gpu_dev_name, encoder_name, apir_command_type__) \ do { \ int32_t forward_flag = (int32_t) apir_command_type__; \ - encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_Forward, forward_flag); \ + encoder_name = remote_call_prepare(gpu_dev_name, APIR_COMMAND_TYPE_FORWARD, forward_flag); \ if (!encoder_name) { \ GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); \ } \ diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-remotingfrontend/virtgpu.cpp index ca04aa09e0e..005c8e21db8 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu.cpp @@ -31,7 +31,7 @@ static int virtgpu_handshake(virtgpu * gpu) { apir_encoder * encoder; apir_decoder * decoder; - encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HandShake, 0); + encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_HANDSHAKE, 0); if (!encoder) { GGML_ABORT("%s: failed to prepare the remote call encoder", __func__); return 1; @@ -95,7 +95,7 @@ static ApirLoadLibraryReturnCode virtgpu_load_library(virtgpu * gpu) { apir_decoder * decoder; ApirLoadLibraryReturnCode ret; - encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LoadLibrary, 0); + encoder = remote_call_prepare(gpu, APIR_COMMAND_TYPE_LOADLIBRARY, 0); if (!encoder) { GGML_ABORT("%s: hypercall error: failed to prepare the remote call encoder", __func__); return APIR_LOAD_LIBRARY_HYPERCALL_INITIALIZATION_ERROR; From b522bfe8b21b7fb75623f7f16422f3e4f9d37629 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 13 Jan 2026 14:23:36 +0100 Subject: [PATCH 18/37] ggml-remoting-frontend.cpp: remove unused file --- .../ggml-remoting-frontend.cpp | 22 ------------------- 1 file changed, 22 deletions(-) delete mode 100644 ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp b/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp deleted file mode 100644 index 4ac7f9c2821..00000000000 --- a/ggml/src/ggml-remotingfrontend/ggml-remoting-frontend.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "ggml-remoting-frontend.h" - -#include "ggml-backend-impl.h" -#include "ggml-impl.h" -#include "remoting.h" - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -int ggml_backend_remoting_get_device_count(); - -struct remoting_device_struct { - std::mutex mutex; -}; From 18ef30dded6e06c881907b244b2a775dbed1d5b4 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 13 Jan 2026 14:23:52 +0100 Subject: [PATCH 19/37] regenerate_remoting: remove unnecessary import --- ggml/src/ggml-remotingfrontend/regenerate_remoting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py b/ggml/src/ggml-remotingfrontend/regenerate_remoting.py index 77180661231..4cc7d7568c7 100755 --- a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py +++ b/ggml/src/ggml-remotingfrontend/regenerate_remoting.py @@ -18,7 +18,7 @@ """ import yaml -from typing import Dict, List, Any, Tuple +from typing import Dict, List, Any from pathlib import Path import os import subprocess From e0bb437665a680adf6dd34bcf0e4d6bc28ba999d Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 13 Jan 2026 14:37:13 +0100 Subject: [PATCH 20/37] regenerate_remoting: appease the linter --- .../regenerate_remoting.py | 51 ++++++++++--------- .../src/ggml-remotingfrontend/virtgpu-shm.cpp | 2 + 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py b/ggml/src/ggml-remotingfrontend/regenerate_remoting.py index 4cc7d7568c7..a402c5c565f 100755 --- a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py +++ b/ggml/src/ggml-remotingfrontend/regenerate_remoting.py @@ -23,9 +23,11 @@ import os import subprocess import shutil +import logging NL = '\n' # can't have f"{'\n'}" in f-strings + class RemotingCodebaseGenerator: def __init__(self, yaml_path: str = "ggmlremoting_functions.yaml"): """Initialize the generator with the YAML configuration.""" @@ -62,10 +64,10 @@ def _format_file_with_clang_format(self, file_path: Path) -> bool: ) return True except subprocess.CalledProcessError as e: - print(f" ⚠️ Warning: clang-format failed for {file_path}: {e}") + logging.exception(f" ⚠️ Warning: clang-format failed for {file_path}", e) return False except Exception as e: - print(f" ⚠️ Warning: Unexpected error formatting {file_path}: {e}") + logging.exception(f" ⚠️ Warning: Unexpected error formatting {file_path}: {e}", e) return False def generate_enum_name(self, group_name: str, function_name: str) -> str: @@ -139,7 +141,7 @@ def generate_apir_backend_header(self) -> str: # Add the count total_count = len(functions) - enum_lines.append(f"\n // last command_type index + 1") + enum_lines.append("\n // last command_type index + 1") enum_lines.append(f" APIR_BACKEND_DISPATCH_TABLE_COUNT = {total_count},") enum_lines.append("} ApirBackendCommandType;") @@ -186,9 +188,7 @@ def generate_backend_dispatched_header(self) -> str: table_lines.append("") current_group = func['group_name'] - table_lines.append(f" /* {func['enum_name']} = */ {func['backend_function']},") - total_count = len(functions) header_content = f'''\ #pragma once @@ -240,8 +240,8 @@ def generate_virtgpu_forward_header(self) -> str: def regenerate_codebase(self) -> None: """Regenerate the entire remoting codebase.""" - print("🔄 Regenerating GGML Remoting Codebase...") - print("=" * 50) + logging.info("🔄 Regenerating GGML Remoting Codebase...") + logging.info("=" * 50) # Detect if we're running from frontend directory current_dir = os.getcwd() @@ -249,12 +249,12 @@ def regenerate_codebase(self) -> None: if is_frontend_dir: # Running from ggml/src/ggml-remotingfrontend - print("📍 Detected frontend directory execution") + logging.info("📍 Detected frontend directory execution") backend_base = Path("../ggml-remotingbackend") frontend_base = Path(".") else: # Running from project root (fallback to original behavior) - print("📍 Detected project root execution") + logging.info("📍 Detected project root execution") base_path = self.config_data.get('base_path', 'ggml/src') backend_base = Path(base_path) / "ggml-remotingbackend" frontend_base = Path(base_path) / "ggml-remotingfrontend" @@ -270,51 +270,52 @@ def regenerate_codebase(self) -> None: virtgpu_forward_path.parent.mkdir(parents=True, exist_ok=True) # Generate header files - print("📁 Generating header files...") + logging.info("📁 Generating header files...") apir_backend_content = self.generate_apir_backend_header() apir_backend_path.write_text(apir_backend_content) - print(f" ✅ {apir_backend_path.resolve()}") + logging.info(f" ✅ {apir_backend_path.resolve()}") backend_dispatched_content = self.generate_backend_dispatched_header() backend_dispatched_path.write_text(backend_dispatched_content) - print(f" ✅ {backend_dispatched_path.resolve()}") + logging.info(f" ✅ {backend_dispatched_path.resolve()}") virtgpu_forward_content = self.generate_virtgpu_forward_header() virtgpu_forward_path.write_text(virtgpu_forward_content) - print(f" ✅ {virtgpu_forward_path.resolve()}") + logging.info(f" ✅ {virtgpu_forward_path.resolve()}") # Format generated files with clang-format generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path] if not self.clang_format_available: - print("\n⚠️ Warning: clang-format not found in PATH. Generated files will not be formatted.") - print(" Install clang-format to enable automatic code formatting.") + logging.warn("\n⚠️ Warning: clang-format not found in PATH. Generated files will not be formatted." + " Install clang-format to enable automatic code formatting.") else: - print("\n🎨 Formatting files with clang-format...") + logging.info("\n🎨 Formatting files with clang-format...") for file_path in generated_files: if self._format_file_with_clang_format(file_path): - print(f" ✅ Formatted {file_path.name}") + logging.info(f" ✅ Formatted {file_path.name}") else: - print(f" ❌ Failed to format {file_path.name}") + logging.warn(f" ❌ Failed to format {file_path.name}") # Generate summary functions = self.get_enabled_functions() total_functions = len(functions) - print("\n📊 Generation Summary:") - print("=" * 50) - print(f" Total functions: {total_functions}") - print(f" Function groups: {len(self.functions)}") - print(f" Header files: 3") - print(f" Working directory: {current_dir}") + logging.info("\n📊 Generation Summary:") + logging.info("=" * 50) + logging.info(f" Total functions: {total_functions}") + logging.info(f" Function groups: {len(self.functions)}") + logging.info(" Header files: 3") + logging.info(f" Working directory: {current_dir}") + def main(): try: generator = RemotingCodebaseGenerator() generator.regenerate_codebase() except Exception as e: - print(f"❌ Error: {e}") + logging.exception(f"❌ Error:", e) exit(1) if __name__ == "__main__": diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp b/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp index 28ef1dd4595..4def405a62b 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp +++ b/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp @@ -85,6 +85,8 @@ int virtgpu_shmem_create(virtgpu * gpu, size_t size, virtgpu_shmem * shmem) { void * ptr = virtgpu_ioctl_map(gpu, gem_handle, size); if (!ptr) { virtgpu_ioctl_gem_close(gpu, gem_handle); + GGML_LOG_ERROR("virtgpu_ioctl_map FAILED\n"); + exit(1); return 1; } From ba48cfbe9e6ea5204dad004c23b1a6226f4ad858 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 14 Jan 2026 13:02:26 +0100 Subject: [PATCH 21/37] backend.cpp: use the right variable in error message --- ggml/src/ggml-remotingbackend/backend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index d94c24c2a20..91a621a9c8c 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -98,7 +98,7 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { } if (!library_init) { - GGML_LOG_ERROR("cannot initialize the GGML library: env var '%s' not defined\n", library_init); + GGML_LOG_ERROR("cannot initialize the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV); return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; } From 9182516bac6b2b2342a982cbbb7c8e4b7ef3840a Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 14 Jan 2026 13:02:37 +0100 Subject: [PATCH 22/37] ggml-backend-reg: fix typo --- ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp index 2bf8de770cf..dc5eee4415f 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp @@ -123,7 +123,7 @@ ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { ggml_backend_remoting_reg_init_devices(®); - GGML_LOG_INFO("%s: initialzed\n", __func__); + GGML_LOG_INFO("%s: initialized\n", __func__); return ® } From 7ec38db8bf4af47bd2b736887ed471f01bf5d57b Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 16 Jan 2026 10:13:40 +0100 Subject: [PATCH 23/37] ggml_backend_remoting_buffer_type_get_alloc_size: validate that the buffer is supported --- ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp | 6 ++++++ ggml/src/ggml-remotingfrontend/ggml-remoting.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp index ef6850570a0..4dc960fa5b0 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp @@ -70,6 +70,12 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff const ggml_tensor * tensor) { virtgpu * gpu = BUFT_TO_GPU(buft); + if (tensor->buffer == NULL + || !tensor->buffer->context + || !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) { + return ggml_nbytes(tensor); + } + return apir_buffer_type_get_alloc_size(gpu, buft, tensor); } diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting.h b/ggml/src/ggml-remotingfrontend/ggml-remoting.h index 66cad84e5fe..0127020b3b4 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-remoting.h +++ b/ggml/src/ggml-remotingfrontend/ggml-remoting.h @@ -62,5 +62,8 @@ static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggm } static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) { + if (!buffer->context) { + GGML_ABORT("%s: no context available :/", __func__); + } return BUFFER_TO_HOST_HANDLE(buffer); } From f1ec1be378c6117d0e41fea7db0a276e7f8d6d6d Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 16 Jan 2026 10:13:58 +0100 Subject: [PATCH 24/37] ggml-backend-reg.cpp: define the GGML_BACKEND_DL_IMPL --- ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp index dc5eee4415f..0593cacd41c 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp +++ b/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp @@ -127,3 +127,5 @@ ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { return ® } + +GGML_BACKEND_DL_IMPL(ggml_backend_remoting_frontend_reg) From 119bdec6caef3cebe486ab6f474e890fb3f726b4 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Fri, 16 Jan 2026 15:37:13 +0100 Subject: [PATCH 25/37] Update to allow dynamic configuration from the hypervisor Also cleanup the apir<>ggml-remotingbackend interface --- .../backend-dispatched-backend.cpp | 2 +- .../backend-dispatched-buffer.cpp | 4 +- .../backend-dispatched-device.cpp | 2 +- .../ggml-remotingbackend/backend-dispatched.h | 6 +++ .../ggml-remotingbackend/backend-virgl-apir.h | 26 ++++++---- ggml/src/ggml-remotingbackend/backend.cpp | 47 ++++++++++++------- .../shared/apir_backend.h | 12 ----- 7 files changed, 56 insertions(+), 43 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp index 595d60c6f78..77b4ee71e12 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp @@ -25,7 +25,7 @@ uint32_t backend_backend_graph_compute(apir_encoder * enc, apir_decoder * dec, v uint32_t shmem_res_id; apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); - const void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + const void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_data) { GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); apir_decoder_set_fatal(dec); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp index 87dc74294cc..cf81888e989 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp @@ -37,7 +37,7 @@ uint32_t backend_buffer_set_tensor(apir_encoder * enc, apir_decoder * dec, virgl size_t size; apir_decode_size_t(dec, &size); - void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_data) { GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); @@ -69,7 +69,7 @@ uint32_t backend_buffer_get_tensor(apir_encoder * enc, apir_decoder * dec, virgl size_t size; apir_decode_size_t(dec, &size); - void * shmem_data = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + void * shmem_data = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_data) { GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); return 1; diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp index e91f3f98fef..497f737a881 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp @@ -122,7 +122,7 @@ uint32_t backend_device_buffer_from_ptr(apir_encoder * enc, apir_decoder * dec, uint32_t shmem_res_id; apir_decode_virtgpu_shmem_res_id(dec, &shmem_res_id); - void * shmem_ptr = ctx->iface.get_shmem_ptr(ctx->virgl_ctx, shmem_res_id); + void * shmem_ptr = ctx->iface->get_shmem_ptr(ctx->ctx_id, shmem_res_id); if (!shmem_ptr) { GGML_LOG_ERROR("Couldn't get the shmem addr from virgl\n"); apir_decoder_set_fatal(dec); diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.h b/ggml/src/ggml-remotingbackend/backend-dispatched.h index bc2330337f0..618b8837f6c 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.h @@ -6,10 +6,16 @@ #include #include "backend-convert.h" +#include "backend-virgl-apir.h" #include "shared/apir_backend.h" #include "shared/apir_cs.h" #include "shared/apir_cs_ggml.h" +struct virgl_apir_context { + uint32_t ctx_id; + virgl_apir_callbacks * iface; +}; + typedef uint32_t (*backend_dispatch_t)(apir_encoder * enc, apir_decoder * dec, virgl_apir_context * ctx); #include "backend-dispatched.gen.h" diff --git a/ggml/src/ggml-remotingbackend/backend-virgl-apir.h b/ggml/src/ggml-remotingbackend/backend-virgl-apir.h index 56ce9ea3a86..1972435a9f0 100644 --- a/ggml/src/ggml-remotingbackend/backend-virgl-apir.h +++ b/ggml/src/ggml-remotingbackend/backend-virgl-apir.h @@ -1,3 +1,5 @@ +#pragma once + #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" @@ -11,14 +13,20 @@ extern ggml_backend_reg_t reg; extern ggml_backend_dev_t dev; extern ggml_backend_t bck; +struct virgl_apir_callbacks { + const char * (*get_config)(uint32_t virgl_ctx_id, const char * key); + void * (*get_shmem_ptr)(uint32_t virgl_ctx_id, uint32_t res_id); +}; + extern "C" { -ApirLoadLibraryReturnCode apir_backend_initialize(); -void apir_backend_deinit(void); -uint32_t apir_backend_dispatcher(uint32_t cmd_type, - virgl_apir_context * ctx, - char * dec_cur, - const char * dec_end, - char * enc_cur, - const char * enc_end, - char ** enc_cur_after); +ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs); +void apir_backend_deinit(uint32_t virgl_ctx_id); +uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, + virgl_apir_callbacks * virgl_cbs, + uint32_t cmd_type, + char * dec_cur, + const char * dec_end, + char * enc_cur, + const char * enc_end, + char ** enc_cur_after); } diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index 91a621a9c8c..dd46a5a273c 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -25,7 +25,9 @@ static void log_to_file_callback(enum ggml_log_level level, const char * text, v } extern "C" { -void apir_backend_deinit(void) { +void apir_backend_deinit(uint32_t virgl_ctx_id) { + GGML_UNUSED(virgl_ctx_id); + auto buffers = apir_get_track_backend_buffers(); for (const auto & buffer : buffers) { apir_untrack_backend_buffer(buffer); @@ -50,7 +52,11 @@ void apir_backend_deinit(void) { } } -ApirLoadLibraryReturnCode apir_backend_initialize() { +#define APIR_GGML_LIBRARY_PATH_KEY "ggml.library.path" +#define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg" +#define APIR_GGML_LIBRARY_INIT_KEY "ggml.library.init" + +ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs) { const char * dlsym_error; const char * apir_log_to_file = getenv(APIR_LLAMA_CPP_LOG_TO_FILE_ENV); @@ -62,9 +68,10 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { GGML_LOG_INFO("Could not open the log file at '%s'\n", apir_log_to_file); } } - const char * library_name = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); - const char * library_reg = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV); - const char * library_init = getenv(APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV); + + const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY); + const char * library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY); + const char * library_init = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_INIT_KEY); GGML_LOG_INFO("%s: loading %s (%s|%s)\n", __func__, library_name, library_reg, library_init); @@ -117,27 +124,31 @@ ApirLoadLibraryReturnCode apir_backend_initialize() { return (ApirLoadLibraryReturnCode) (APIR_LOAD_LIBRARY_INIT_BASE_INDEX + ret); } -uint32_t apir_backend_dispatcher(uint32_t cmd_type, - virgl_apir_context * ctx, - char * dec_cur, - const char * dec_end, - char * enc_cur, - const char * enc_end, - char ** enc_cur_after) { - apir_encoder _enc = { +uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, + virgl_apir_callbacks * virgl_cbs, + uint32_t cmd_type, + char * dec_cur, + const char * dec_end, + char * enc_cur, + const char * enc_end, + char ** enc_cur_after) { + apir_encoder enc = { .cur = enc_cur, .start = enc_cur, .end = enc_end, .fatal = false, }; - apir_encoder * enc = &_enc; - apir_decoder _dec = { + apir_decoder dec = { .cur = dec_cur, .end = dec_end, .fatal = false, }; - apir_decoder * dec = &_dec; + + virgl_apir_context ctx = { + .ctx_id = virgl_ctx_id, + .iface = virgl_cbs, + }; if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { GGML_LOG_ERROR("Received an invalid dispatch index (%d >= %d)\n", cmd_type, APIR_BACKEND_DISPATCH_TABLE_COUNT); @@ -145,9 +156,9 @@ uint32_t apir_backend_dispatcher(uint32_t cmd_type, } backend_dispatch_t forward_fct = apir_backend_dispatch_table[cmd_type]; - uint32_t ret = forward_fct(enc, dec, ctx); + uint32_t ret = forward_fct(&enc, &dec, &ctx); - *enc_cur_after = enc->cur; + *enc_cur_after = enc.cur; return ret; } diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.h b/ggml/src/ggml-remotingbackend/shared/apir_backend.h index 094f004c630..977fb9a66fe 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_backend.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_backend.h @@ -24,18 +24,6 @@ typedef uintptr_t apir_buffer_type_host_handle_t; typedef uintptr_t apir_buffer_host_handle_t; -struct virgl_opaque_context; - -struct virgl_apir_callbacks { - void * (*get_shmem_ptr)(virgl_opaque_context * ctx, uint32_t res_id); -}; - -struct virgl_apir_context { - virgl_opaque_context * virgl_ctx; - - virgl_apir_callbacks iface; -}; - static const char * apir_backend_initialize_error(int code) { #define APIR_BACKEND_INITIALIZE_ERROR(code_name) \ do { \ From 8ff55229da3b0ce4e9120fee1c2815c566d80a0e Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 20 Jan 2026 14:08:39 +0100 Subject: [PATCH 26/37] remotingbackend: Simplify the initialization process --- .../backend-dispatched.cpp | 19 +++++++------ .../ggml-remotingbackend/backend-dispatched.h | 2 +- ggml/src/ggml-remotingbackend/backend.cpp | 27 ++++--------------- .../shared/apir_backend.h | 2 ++ 4 files changed, 17 insertions(+), 33 deletions(-) diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp index 08cb5193f28..3161e91d222 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.cpp @@ -15,7 +15,7 @@ uint64_t timer_start = 0; uint64_t timer_total = 0; uint64_t timer_count = 0; -uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_backend_init_fct_p) { +uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) { if (reg != NULL) { GGML_LOG_WARN("%s: already initialized\n", __func__); return APIR_BACKEND_INITIALIZE_ALREADY_INITED; @@ -28,20 +28,19 @@ uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_ return APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED; } - if (reg->iface.get_device_count(reg)) { - dev = reg->iface.get_device(reg, 0); + if (!reg->iface.get_device_count(reg)) { + GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__); + return APIR_BACKEND_INITIALIZE_NO_DEVICE; } - ggml_backend_t (*ggml_backend_fct)(int) = (ggml_backend_t (*)(int)) ggml_backend_init_fct_p; + dev = reg->iface.get_device(reg, 0); - bck = ggml_backend_fct(0); - if (!bck) { - GGML_LOG_ERROR("%s: backend initialization failed\n", __func__); - return APIR_BACKEND_INITIALIZE_BACKEND_FAILED; + if (!dev) { + GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__); + return APIR_BACKEND_INITIALIZE_NO_DEVICE; } - size_t free, total; - dev->iface.get_memory(dev, &free, &total); + bck = dev->iface.init_backend(dev, NULL); return APIR_BACKEND_INITIALIZE_SUCCESS; } diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.h b/ggml/src/ggml-remotingbackend/backend-dispatched.h index 618b8837f6c..6ccbecf078d 100644 --- a/ggml/src/ggml-remotingbackend/backend-dispatched.h +++ b/ggml/src/ggml-remotingbackend/backend-dispatched.h @@ -20,4 +20,4 @@ typedef uint32_t (*backend_dispatch_t)(apir_encoder * enc, apir_decoder * dec, v #include "backend-dispatched.gen.h" -uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p, void * ggml_backend_init_fct_p); +uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p); diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-remotingbackend/backend.cpp index dd46a5a273c..c270e9a5e43 100644 --- a/ggml/src/ggml-remotingbackend/backend.cpp +++ b/ggml/src/ggml-remotingbackend/backend.cpp @@ -12,9 +12,10 @@ #define APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_PATH" #define APIR_LLAMA_CPP_GGML_LIBRARY_REG_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_REG" -#define APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV "APIR_LLAMA_CPP_GGML_LIBRARY_INIT" #define APIR_LLAMA_CPP_LOG_TO_FILE_ENV "APIR_LLAMA_CPP_LOG_TO_FILE" +#define GGML_DEFAULT_BACKEND_REG "ggml_backend_init" + static void * backend_library_handle = NULL; static FILE * apir_logfile = NULL; @@ -54,7 +55,6 @@ void apir_backend_deinit(uint32_t virgl_ctx_id) { #define APIR_GGML_LIBRARY_PATH_KEY "ggml.library.path" #define APIR_GGML_LIBRARY_REG_KEY "ggml.library.reg" -#define APIR_GGML_LIBRARY_INIT_KEY "ggml.library.init" ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct virgl_apir_callbacks *virgl_cbs) { const char * dlsym_error; @@ -70,10 +70,8 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct } const char * library_name = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_PATH_KEY); - const char * library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY); - const char * library_init = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_INIT_KEY); - - GGML_LOG_INFO("%s: loading %s (%s|%s)\n", __func__, library_name, library_reg, library_init); + const char * virgl_library_reg = virgl_cbs->get_config(virgl_ctx_id, APIR_GGML_LIBRARY_REG_KEY); + const char * library_reg = virgl_library_reg ? virgl_library_reg : GGML_DEFAULT_BACKEND_REG; if (!library_name) { GGML_LOG_ERROR("cannot open the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_PATH_ENV); @@ -104,22 +102,7 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct return APIR_LOAD_LIBRARY_SYMBOL_MISSING; } - if (!library_init) { - GGML_LOG_ERROR("cannot initialize the GGML library: env var '%s' not defined\n", APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV); - - return APIR_LOAD_LIBRARY_ENV_VAR_MISSING; - } - - void * ggml_backend_init_fct = dlsym(backend_library_handle, library_init); - dlsym_error = dlerror(); - if (dlsym_error) { - GGML_LOG_ERROR("cannot find the GGML backend init symbol '%s' (from %s): %s\n", library_init, - APIR_LLAMA_CPP_GGML_LIBRARY_INIT_ENV, dlsym_error); - - return APIR_LOAD_LIBRARY_SYMBOL_MISSING; - } - - uint32_t ret = backend_dispatch_initialize(ggml_backend_reg_fct, ggml_backend_init_fct); + uint32_t ret = backend_dispatch_initialize(ggml_backend_reg_fct); return (ApirLoadLibraryReturnCode) (APIR_LOAD_LIBRARY_INIT_BASE_INDEX + ret); } diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.h b/ggml/src/ggml-remotingbackend/shared/apir_backend.h index 977fb9a66fe..f3efa52c721 100644 --- a/ggml/src/ggml-remotingbackend/shared/apir_backend.h +++ b/ggml/src/ggml-remotingbackend/shared/apir_backend.h @@ -13,6 +13,8 @@ #define APIR_BACKEND_INITIALIZE_BACKEND_FAILED 5 #define APIR_BACKEND_INITIALIZE_BACKEND_REG_FAILED 6 #define APIR_BACKEND_INITIALIZE_ALREADY_INITED 7 +#define APIR_BACKEND_INITIALIZE_NO_DEVICE 8 + // new entries here need to be added to the apir_backend_initialize_error function below From 2401f63bc6e3b01cbf64f643b5f82daa7c569502 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Tue, 20 Jan 2026 18:13:21 +0100 Subject: [PATCH 27/37] Rename the GGML backend --- CODEOWNERS | 1 + ggml/CMakeLists.txt | 6 +-- ...gml-remoting-frontend.h => ggml-virtgpu.h} | 2 +- ggml/src/CMakeLists.txt | 3 +- ggml/src/ggml-backend-reg.cpp | 10 ++-- ggml/src/ggml-remotingfrontend/CMakeLists.txt | 37 ------------- ggml/src/ggml-virtgpu/CMakeLists.txt | 52 +++++++++++++++++++ .../apir_cs_ggml-rpc-front.cpp | 2 +- .../backend}/CMakeLists.txt | 5 +- .../backend}/apir_cs_ggml-rpc-back.cpp | 0 .../backend}/backend-convert.h | 0 .../backend}/backend-dispatched-backend.cpp | 0 .../backend-dispatched-buffer-type.cpp | 0 .../backend}/backend-dispatched-buffer.cpp | 0 .../backend}/backend-dispatched-device.cpp | 0 .../backend}/backend-dispatched.cpp | 0 .../backend}/backend-dispatched.gen.h | 0 .../backend}/backend-dispatched.h | 0 .../backend}/backend-virgl-apir.h | 0 .../backend}/backend.cpp | 0 .../backend}/shared/api_remoting.h | 0 .../backend}/shared/apir_backend.gen.h | 0 .../backend}/shared/apir_backend.h | 0 .../backend}/shared/apir_cs.h | 0 .../backend}/shared/apir_cs_ggml.h | 0 .../backend}/shared/apir_cs_rpc.h | 0 .../ggml-backend-buffer-type.cpp | 0 .../ggml-backend-buffer.cpp | 0 .../ggml-backend-device.cpp | 0 .../ggml-backend-reg.cpp | 7 +-- .../ggml-backend.cpp | 3 +- .../ggml-remoting.h | 1 - .../ggmlremoting_functions.yaml | 2 +- .../include/apir_hw.h | 0 .../include/venus_hw.h | 0 .../regenerate_remoting.py | 6 +-- .../virtgpu-apir.h | 2 +- .../virtgpu-forward-backend.cpp | 0 .../virtgpu-forward-buffer-type.cpp | 0 .../virtgpu-forward-buffer.cpp | 0 .../virtgpu-forward-device.cpp | 0 .../virtgpu-forward-impl.h | 4 +- .../virtgpu-forward.gen.h | 0 .../virtgpu-shm.cpp | 0 .../virtgpu-shm.h | 0 .../virtgpu-utils.cpp | 0 .../virtgpu-utils.h | 0 .../virtgpu.cpp | 0 .../virtgpu.h | 4 +- 49 files changed, 83 insertions(+), 64 deletions(-) rename ggml/include/{ggml-remoting-frontend.h => ggml-virtgpu.h} (71%) delete mode 100644 ggml/src/ggml-remotingfrontend/CMakeLists.txt create mode 100644 ggml/src/ggml-virtgpu/CMakeLists.txt rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/apir_cs_ggml-rpc-front.cpp (98%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/CMakeLists.txt (77%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/apir_cs_ggml-rpc-back.cpp (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-convert.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-dispatched-backend.cpp (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-dispatched-buffer-type.cpp (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-dispatched-buffer.cpp (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-dispatched-device.cpp (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-dispatched.cpp (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-dispatched.gen.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-dispatched.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend-virgl-apir.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/backend.cpp (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/shared/api_remoting.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/shared/apir_backend.gen.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/shared/apir_backend.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/shared/apir_cs.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/shared/apir_cs_ggml.h (100%) rename ggml/src/{ggml-remotingbackend => ggml-virtgpu/backend}/shared/apir_cs_rpc.h (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/ggml-backend-buffer-type.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/ggml-backend-buffer.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/ggml-backend-device.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/ggml-backend-reg.cpp (95%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/ggml-backend.cpp (97%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/ggml-remoting.h (98%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/ggmlremoting_functions.yaml (98%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/include/apir_hw.h (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/include/venus_hw.h (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/regenerate_remoting.py (98%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-apir.h (85%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-forward-backend.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-forward-buffer-type.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-forward-buffer.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-forward-device.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-forward-impl.h (94%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-forward.gen.h (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-shm.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-shm.h (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-utils.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu-utils.h (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu.cpp (100%) rename ggml/src/{ggml-remotingfrontend => ggml-virtgpu}/virtgpu.h (95%) diff --git a/CODEOWNERS b/CODEOWNERS index 750096d9a18..b25f2526393 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -66,6 +66,7 @@ /ggml/src/ggml-rpc/ @rgerganov /ggml/src/ggml-threading.* @ggerganov /ggml/src/ggml-vulkan/ @0cc4m +/ggml/src/ggml-virtgpu/ @kpouget /ggml/src/ggml-webgpu/ @reeselevine /ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM /ggml/src/ggml.c @ggerganov diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index e3776172ab1..b0b8e57898c 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -228,8 +228,8 @@ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU) option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF) option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON) option(GGML_ZDNN "ggml: use zDNN" OFF) -option(GGML_REMOTING_FRONTEND "ggml: use the API Remoting frontend" OFF) -option(GGML_REMOTING_BACKEND "ggml: use the API Remoting backend" OFF) +option(GGML_VIRTGPU "ggml: use the VirtGPU/Virglrenderer API Remoting frontend" OFF) +option(GGML_VIRTGPU_BACKEND "ggml: build the VirtGPU/Virglrenderer API Remoting backend" OFF) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) @@ -322,7 +322,7 @@ set(GGML_PUBLIC_HEADERS include/ggml-opt.h include/ggml-metal.h include/ggml-rpc.h - include/ggml-remoting-frontend.h + include/ggml-virtgpu.h include/ggml-sycl.h include/ggml-vulkan.h include/ggml-webgpu.h diff --git a/ggml/include/ggml-remoting-frontend.h b/ggml/include/ggml-virtgpu.h similarity index 71% rename from ggml/include/ggml-remoting-frontend.h rename to ggml/include/ggml-virtgpu.h index 4c7cd585ea4..1cb4bd7a038 100644 --- a/ggml/include/ggml-remoting-frontend.h +++ b/ggml/include/ggml-virtgpu.h @@ -9,7 +9,7 @@ extern "C" { #define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend" -GGML_BACKEND_API ggml_backend_reg_t ggml_backend_remoting_frontend_reg(); +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg(); #ifdef __cplusplus } diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index d2261c02a08..260ad48f0e8 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -451,8 +451,7 @@ ggml_add_backend(HIP) ggml_add_backend(METAL) ggml_add_backend(MUSA) ggml_add_backend(RPC) -ggml_add_backend(RemotingFrontend) -ggml_add_backend(RemotingBackend) +ggml_add_backend(VirtGPU) ggml_add_backend(SYCL) ggml_add_backend(Vulkan) ggml_add_backend(WebGPU) diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 087a228ece2..e932c6167c9 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -69,8 +69,8 @@ #include "ggml-rpc.h" #endif -#ifdef GGML_USE_REMOTINGFRONTEND -#include "ggml-remoting-frontend.h" +#ifdef GGML_USE_VIRTGPU_FRONTEND +#include "ggml-virtgpu.h" #endif #ifdef GGML_USE_CANN @@ -213,8 +213,8 @@ struct ggml_backend_registry { #ifdef GGML_USE_ZDNN register_backend(ggml_backend_zdnn_reg()); #endif -#ifdef GGML_USE_REMOTINGFRONTEND - register_backend(ggml_backend_remoting_frontend_reg()); +#ifdef GGML_USE_VIRTGPU_FRONTEND + register_backend(ggml_backend_virtgpu_reg()); #endif #ifdef GGML_USE_OPENCL @@ -633,7 +633,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ggml_backend_load_best("rpc", silent, dir_path); ggml_backend_load_best("sycl", silent, dir_path); ggml_backend_load_best("vulkan", silent, dir_path); - ggml_backend_load_best("remotingfrontend", silent, dir_path); + ggml_backend_load_best("virtgpu", silent, dir_path); ggml_backend_load_best("opencl", silent, dir_path); ggml_backend_load_best("hexagon", silent, dir_path); ggml_backend_load_best("musa", silent, dir_path); diff --git a/ggml/src/ggml-remotingfrontend/CMakeLists.txt b/ggml/src/ggml-remotingfrontend/CMakeLists.txt deleted file mode 100644 index f347fa0f6ea..00000000000 --- a/ggml/src/ggml-remotingfrontend/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -cmake_minimum_required(VERSION 3.19) -cmake_policy(SET CMP0114 NEW) - -message(STATUS "Enable API Remoting frontend") - -find_package(PkgConfig REQUIRED) -pkg_check_modules(DRM REQUIRED libdrm) - -ggml_add_backend_library(ggml-remotingfrontend - ggml-backend-buffer.cpp - ggml-backend.cpp - ggml-backend-device.cpp - ggml-backend-reg.cpp - ggml-backend-buffer-type.cpp - virtgpu-apir.h - virtgpu-forward.gen.h - virtgpu.cpp - virtgpu-shm.cpp - virtgpu-utils.cpp - virtgpu-forward-device.cpp - virtgpu-forward-buffer-type.cpp - virtgpu-forward-buffer.cpp - virtgpu-forward-backend.cpp - virtgpu-forward-impl.h - apir_cs_ggml-rpc-front.cpp - ../../include/ggml-remoting-frontend.h) - -target_include_directories(ggml-remotingfrontend PUBLIC /usr/include/libdrm/) - -target_link_libraries(ggml-remotingfrontend PUBLIC ${DRM_LIBRARIES}) -target_include_directories(ggml-remotingfrontend PUBLIC ${DRM_INCLUDE_DIRS}) -target_compile_options(ggml-remotingfrontend PUBLIC ${DRM_CFLAGS_OTHER}) - -target_include_directories(ggml-remotingfrontend PUBLIC ./include) -target_include_directories(ggml-remotingfrontend PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - -target_compile_options(ggml-remotingfrontend PRIVATE -std=c++20) diff --git a/ggml/src/ggml-virtgpu/CMakeLists.txt b/ggml/src/ggml-virtgpu/CMakeLists.txt new file mode 100644 index 00000000000..5359832dfcb --- /dev/null +++ b/ggml/src/ggml-virtgpu/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.19) +cmake_policy(SET CMP0114 NEW) + +message(STATUS "Including the VirtGPU/Virglrenderer API Remoting") + +if (NOT GGML_VIRTGPU_BACKEND STREQUAL "ONLY") + message(STATUS "Enable the VirtGPU/Virglrenderer API Remoting frontend libraryf") + + find_package(PkgConfig REQUIRED) + pkg_check_modules(DRM REQUIRED libdrm) + if (NOT GGML_BACKEND_DL) + # cannot simply use USE_VIRTGPU, as in the 'else()' case the + # frontend isn't compiled + target_compile_definitions(ggml PUBLIC "GGML_USE_VIRTGPU_FRONTEND") + endif() + + ggml_add_backend_library(ggml-virtgpu + ggml-backend-buffer.cpp + ggml-backend.cpp + ggml-backend-device.cpp + ggml-backend-reg.cpp + ggml-backend-buffer-type.cpp + virtgpu-apir.h + virtgpu-forward.gen.h + virtgpu.cpp + virtgpu-shm.cpp + virtgpu-utils.cpp + virtgpu-forward-device.cpp + virtgpu-forward-buffer-type.cpp + virtgpu-forward-buffer.cpp + virtgpu-forward-backend.cpp + virtgpu-forward-impl.h + apir_cs_ggml-rpc-front.cpp + ../../include/ggml-virtgpu.h) + + target_include_directories(ggml-virtgpu PUBLIC /usr/include/libdrm/) + + target_link_libraries(ggml-virtgpu PUBLIC ${DRM_LIBRARIES}) + target_include_directories(ggml-virtgpu PUBLIC ${DRM_INCLUDE_DIRS}) + target_compile_options(ggml-virtgpu PUBLIC ${DRM_CFLAGS_OTHER}) + + target_include_directories(ggml-virtgpu PUBLIC ./include) + target_include_directories(ggml-virtgpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + + target_compile_options(ggml-virtgpu PRIVATE -std=c++20) +else() + message(STATUS "Not building the VirtGPU/Virglrenderer API Remoting frontend library") +endif() + +if (NOT GGML_VIRTGPU_BACKEND STREQUAL "OFF") + add_subdirectory("backend") +endif() diff --git a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp b/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp similarity index 98% rename from ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp rename to ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp index 7def3d1a621..f60ae3556ca 100644 --- a/ggml/src/ggml-remotingfrontend/apir_cs_ggml-rpc-front.cpp +++ b/ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp @@ -1,4 +1,4 @@ -#include "../ggml-remotingbackend/shared/apir_cs_rpc.h" +#include "backend/shared/apir_cs_rpc.h" #include "ggml-backend-impl.h" #include "ggml-impl.h" #include "ggml-remoting.h" diff --git a/ggml/src/ggml-remotingbackend/CMakeLists.txt b/ggml/src/ggml-virtgpu/backend/CMakeLists.txt similarity index 77% rename from ggml/src/ggml-remotingbackend/CMakeLists.txt rename to ggml/src/ggml-virtgpu/backend/CMakeLists.txt index 9989f3e8c93..5234c555c9e 100644 --- a/ggml/src/ggml-remotingbackend/CMakeLists.txt +++ b/ggml/src/ggml-virtgpu/backend/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.19) cmake_policy(SET CMP0114 NEW) -message(STATUS "Enable API Remoting backend") +message(STATUS "Enable the VirtGPU/Virglrenderer backend library") ggml_add_backend_library(ggml-remotingbackend backend.cpp @@ -16,3 +16,6 @@ ggml_add_backend_library(ggml-remotingbackend apir_cs_ggml-rpc-back.cpp) target_compile_options(ggml-remotingbackend PRIVATE -std=c++20) + +# Add include directory for ggml-backend-impl.h and other core headers +target_include_directories(ggml-remotingbackend PRIVATE ../..) diff --git a/ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp b/ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp similarity index 100% rename from ggml/src/ggml-remotingbackend/apir_cs_ggml-rpc-back.cpp rename to ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp diff --git a/ggml/src/ggml-remotingbackend/backend-convert.h b/ggml/src/ggml-virtgpu/backend/backend-convert.h similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-convert.h rename to ggml/src/ggml-virtgpu/backend/backend-convert.h diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-dispatched-backend.cpp rename to ggml/src/ggml-virtgpu/backend/backend-dispatched-backend.cpp diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-dispatched-buffer-type.cpp rename to ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer-type.cpp diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-dispatched-buffer.cpp rename to ggml/src/ggml-virtgpu/backend/backend-dispatched-buffer.cpp diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-dispatched-device.cpp rename to ggml/src/ggml-virtgpu/backend/backend-dispatched-device.cpp diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-dispatched.cpp rename to ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.gen.h b/ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-dispatched.gen.h rename to ggml/src/ggml-virtgpu/backend/backend-dispatched.gen.h diff --git a/ggml/src/ggml-remotingbackend/backend-dispatched.h b/ggml/src/ggml-virtgpu/backend/backend-dispatched.h similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-dispatched.h rename to ggml/src/ggml-virtgpu/backend/backend-dispatched.h diff --git a/ggml/src/ggml-remotingbackend/backend-virgl-apir.h b/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h similarity index 100% rename from ggml/src/ggml-remotingbackend/backend-virgl-apir.h rename to ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h diff --git a/ggml/src/ggml-remotingbackend/backend.cpp b/ggml/src/ggml-virtgpu/backend/backend.cpp similarity index 100% rename from ggml/src/ggml-remotingbackend/backend.cpp rename to ggml/src/ggml-virtgpu/backend/backend.cpp diff --git a/ggml/src/ggml-remotingbackend/shared/api_remoting.h b/ggml/src/ggml-virtgpu/backend/shared/api_remoting.h similarity index 100% rename from ggml/src/ggml-remotingbackend/shared/api_remoting.h rename to ggml/src/ggml-virtgpu/backend/shared/api_remoting.h diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h b/ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h similarity index 100% rename from ggml/src/ggml-remotingbackend/shared/apir_backend.gen.h rename to ggml/src/ggml-virtgpu/backend/shared/apir_backend.gen.h diff --git a/ggml/src/ggml-remotingbackend/shared/apir_backend.h b/ggml/src/ggml-virtgpu/backend/shared/apir_backend.h similarity index 100% rename from ggml/src/ggml-remotingbackend/shared/apir_backend.h rename to ggml/src/ggml-virtgpu/backend/shared/apir_backend.h diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs.h similarity index 100% rename from ggml/src/ggml-remotingbackend/shared/apir_cs.h rename to ggml/src/ggml-virtgpu/backend/shared/apir_cs.h diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h similarity index 100% rename from ggml/src/ggml-remotingbackend/shared/apir_cs_ggml.h rename to ggml/src/ggml-virtgpu/backend/shared/apir_cs_ggml.h diff --git a/ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h b/ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h similarity index 100% rename from ggml/src/ggml-remotingbackend/shared/apir_cs_rpc.h rename to ggml/src/ggml-virtgpu/backend/shared/apir_cs_rpc.h diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/ggml-backend-buffer-type.cpp rename to ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/ggml-backend-buffer.cpp rename to ggml/src/ggml-virtgpu/ggml-backend-buffer.cpp diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp b/ggml/src/ggml-virtgpu/ggml-backend-device.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/ggml-backend-device.cpp rename to ggml/src/ggml-virtgpu/ggml-backend-device.cpp diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp similarity index 95% rename from ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp rename to ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index 0593cacd41c..d54c2cbb39c 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -1,4 +1,5 @@ #include "ggml-remoting.h" +#include "ggml-virtgpu.h" #include #include @@ -102,10 +103,10 @@ static const ggml_backend_reg_i ggml_backend_remoting_reg_i = { /* .get_proc_address = */ NULL, }; -ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { +ggml_backend_reg_t ggml_backend_virtgpu_reg() { virtgpu * gpu = apir_initialize(); if (!gpu) { - GGML_LOG_ERROR("apir_initialize failed\n"); + GGML_LOG_ERROR("virtgpu_apir_initialize failed\n"); return NULL; } @@ -128,4 +129,4 @@ ggml_backend_reg_t ggml_backend_remoting_frontend_reg() { return ® } -GGML_BACKEND_DL_IMPL(ggml_backend_remoting_frontend_reg) +GGML_BACKEND_DL_IMPL(ggml_backend_virtgpu_reg) diff --git a/ggml/src/ggml-remotingfrontend/ggml-backend.cpp b/ggml/src/ggml-virtgpu/ggml-backend.cpp similarity index 97% rename from ggml/src/ggml-remotingfrontend/ggml-backend.cpp rename to ggml/src/ggml-virtgpu/ggml-backend.cpp index a0f1f7ec792..5cd6c0c0608 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-backend.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend.cpp @@ -1,4 +1,5 @@ #include "ggml-remoting.h" +#include "../../include/ggml-virtgpu.h" static const char * ggml_backend_remoting_get_name(ggml_backend_t backend) { UNUSED(backend); @@ -60,7 +61,7 @@ ggml_backend_t ggml_backend_remoting_device_init(ggml_backend_dev_t dev, const c ggml_backend_t remoting_backend = new ggml_backend{ /* .guid = */ ggml_backend_remoting_guid(), /* .interface = */ ggml_backend_remoting_interface, - /* .device = */ ggml_backend_reg_dev_get(ggml_backend_remoting_frontend_reg(), ctx->device), + /* .device = */ ggml_backend_reg_dev_get(ggml_backend_virtgpu_reg(), ctx->device), /* .context = */ ctx, }; diff --git a/ggml/src/ggml-remotingfrontend/ggml-remoting.h b/ggml/src/ggml-virtgpu/ggml-remoting.h similarity index 98% rename from ggml/src/ggml-remotingfrontend/ggml-remoting.h rename to ggml/src/ggml-virtgpu/ggml-remoting.h index 0127020b3b4..135f1c52970 100644 --- a/ggml/src/ggml-remotingfrontend/ggml-remoting.h +++ b/ggml/src/ggml-virtgpu/ggml-remoting.h @@ -3,7 +3,6 @@ #include "ggml-backend-impl.h" #include "ggml-backend.h" #include "ggml-impl.h" -#include "ggml-remoting-frontend.h" #include "virtgpu.h" #include diff --git a/ggml/src/ggml-remotingfrontend/ggmlremoting_functions.yaml b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml similarity index 98% rename from ggml/src/ggml-remotingfrontend/ggmlremoting_functions.yaml rename to ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml index d45f0ebb178..fa7d57b0283 100644 --- a/ggml/src/ggml-remotingfrontend/ggmlremoting_functions.yaml +++ b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml @@ -10,7 +10,7 @@ config: files: apir_backend_header: "ggml-remotingbackend/shared/apir_backend.gen.h" backend_dispatched_header: "ggml-remotingbackend/backend-dispatched.gen.h" - virtgpu_forward_header: "ggml-remotingfrontend/virtgpu-forward.gen.h" + virtgpu_forward_header: "ggml-virtgpu-apir/virtgpu-forward.gen.h" # Simplified function definitions with grouping and metadata combined functions: diff --git a/ggml/src/ggml-remotingfrontend/include/apir_hw.h b/ggml/src/ggml-virtgpu/include/apir_hw.h similarity index 100% rename from ggml/src/ggml-remotingfrontend/include/apir_hw.h rename to ggml/src/ggml-virtgpu/include/apir_hw.h diff --git a/ggml/src/ggml-remotingfrontend/include/venus_hw.h b/ggml/src/ggml-virtgpu/include/venus_hw.h similarity index 100% rename from ggml/src/ggml-remotingfrontend/include/venus_hw.h rename to ggml/src/ggml-virtgpu/include/venus_hw.h diff --git a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py b/ggml/src/ggml-virtgpu/regenerate_remoting.py similarity index 98% rename from ggml/src/ggml-remotingfrontend/regenerate_remoting.py rename to ggml/src/ggml-virtgpu/regenerate_remoting.py index a402c5c565f..927c0edccb3 100755 --- a/ggml/src/ggml-remotingfrontend/regenerate_remoting.py +++ b/ggml/src/ggml-virtgpu/regenerate_remoting.py @@ -245,10 +245,10 @@ def regenerate_codebase(self) -> None: # Detect if we're running from frontend directory current_dir = os.getcwd() - is_frontend_dir = current_dir.endswith('ggml-remotingfrontend') + is_frontend_dir = current_dir.endswith('ggml-virtgpu') if is_frontend_dir: - # Running from ggml/src/ggml-remotingfrontend + # Running from ggml/src/ggml-virtgpu-apir logging.info("📍 Detected frontend directory execution") backend_base = Path("../ggml-remotingbackend") frontend_base = Path(".") @@ -257,7 +257,7 @@ def regenerate_codebase(self) -> None: logging.info("📍 Detected project root execution") base_path = self.config_data.get('base_path', 'ggml/src') backend_base = Path(base_path) / "ggml-remotingbackend" - frontend_base = Path(base_path) / "ggml-remotingfrontend" + frontend_base = Path(base_path) / "ggml-virtgpu" # Compute final file paths apir_backend_path = backend_base / "shared" / "apir_backend.gen.h" diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-apir.h b/ggml/src/ggml-virtgpu/virtgpu-apir.h similarity index 85% rename from ggml/src/ggml-remotingfrontend/virtgpu-apir.h rename to ggml/src/ggml-virtgpu/virtgpu-apir.h index 7f16844bf7b..238f960acd2 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-apir.h +++ b/ggml/src/ggml-virtgpu/virtgpu-apir.h @@ -1,4 +1,4 @@ -#include "../ggml-remotingbackend/shared/apir_backend.h" +#include "backend/shared/apir_backend.h" #include "ggml-alloc.h" #include "ggml-impl.h" #include "ggml.h" diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-forward-backend.cpp rename to ggml/src/ggml-virtgpu/virtgpu-forward-backend.cpp diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer-type.cpp rename to ggml/src/ggml-virtgpu/virtgpu-forward-buffer-type.cpp diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-forward-buffer.cpp rename to ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-forward-device.cpp rename to ggml/src/ggml-virtgpu/virtgpu-forward-device.cpp diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h b/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h similarity index 94% rename from ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h rename to ggml/src/ggml-virtgpu/virtgpu-forward-impl.h index 72e666e36a1..eea3e7e5a9b 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu-forward-impl.h +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-impl.h @@ -1,8 +1,8 @@ #include "virtgpu.h" #include "ggml-remoting.h" -#include "../ggml-remotingbackend/shared/apir_backend.h" -#include "../ggml-remotingbackend/shared/apir_cs_ggml.h" +#include "backend/shared/apir_backend.h" +#include "backend/shared/apir_cs_ggml.h" #include "ggml-backend-impl.h" diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h b/ggml/src/ggml-virtgpu/virtgpu-forward.gen.h similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-forward.gen.h rename to ggml/src/ggml-virtgpu/virtgpu-forward.gen.h diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp b/ggml/src/ggml-virtgpu/virtgpu-shm.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-shm.cpp rename to ggml/src/ggml-virtgpu/virtgpu-shm.cpp diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-shm.h b/ggml/src/ggml-virtgpu/virtgpu-shm.h similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-shm.h rename to ggml/src/ggml-virtgpu/virtgpu-shm.h diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp b/ggml/src/ggml-virtgpu/virtgpu-utils.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-utils.cpp rename to ggml/src/ggml-virtgpu/virtgpu-utils.cpp diff --git a/ggml/src/ggml-remotingfrontend/virtgpu-utils.h b/ggml/src/ggml-virtgpu/virtgpu-utils.h similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu-utils.h rename to ggml/src/ggml-virtgpu/virtgpu-utils.h diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.cpp b/ggml/src/ggml-virtgpu/virtgpu.cpp similarity index 100% rename from ggml/src/ggml-remotingfrontend/virtgpu.cpp rename to ggml/src/ggml-virtgpu/virtgpu.cpp diff --git a/ggml/src/ggml-remotingfrontend/virtgpu.h b/ggml/src/ggml-virtgpu/virtgpu.h similarity index 95% rename from ggml/src/ggml-remotingfrontend/virtgpu.h rename to ggml/src/ggml-virtgpu/virtgpu.h index b3dccd4de19..d4bb42e20b2 100644 --- a/ggml/src/ggml-remotingfrontend/virtgpu.h +++ b/ggml/src/ggml-virtgpu/virtgpu.h @@ -4,8 +4,8 @@ #include "virtgpu-shm.h" #include "virtgpu-apir.h" -#include "../ggml-remotingbackend/shared/api_remoting.h" -#include "../ggml-remotingbackend/shared/apir_cs.h" +#include "backend/shared/api_remoting.h" +#include "backend/shared/apir_cs.h" #include #include From 4e3819958a44f4e392763ad17fb9a11c506e8867 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Thu, 22 Jan 2026 13:38:13 +0100 Subject: [PATCH 28/37] virtgpu-forward-buffer.cpp: remove dead code --- ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp index bea349a9032..3181e394407 100644 --- a/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp +++ b/ggml/src/ggml-virtgpu/virtgpu-forward-buffer.cpp @@ -62,17 +62,6 @@ void apir_buffer_set_tensor(virtgpu * gpu, return; } -#if false -void -apir_buffer_get_tensor(virtgpu *gpu, apir_buffer_context_t *buffer_context, - const ggml_tensor *tensor, void *data, size_t offset, size_t size) { - UNUSED(gpu); - UNUSED(tensor); - char *buffer_base_addr = (char *) buffer_context->shmem.mmap_ptr; - - memcpy(data, buffer_base_addr+offset, size); -} -#else void apir_buffer_get_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, const ggml_tensor * tensor, @@ -113,7 +102,6 @@ void apir_buffer_get_tensor(virtgpu * gpu, virtgpu_shmem_destroy(gpu, shmem); } } -#endif bool apir_buffer_cpy_tensor(virtgpu * gpu, apir_buffer_context_t * buffer_context, From 179a146721904ae00201f3c2004c42aa6fca9653 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Thu, 22 Jan 2026 23:41:20 +0100 Subject: [PATCH 29/37] finish updating the backend location --- ggml/src/ggml-virtgpu/backend/CMakeLists.txt | 6 +++--- ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml | 4 ++-- ggml/src/ggml-virtgpu/regenerate_remoting.py | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/ggml/src/ggml-virtgpu/backend/CMakeLists.txt b/ggml/src/ggml-virtgpu/backend/CMakeLists.txt index 5234c555c9e..0b49c403b9a 100644 --- a/ggml/src/ggml-virtgpu/backend/CMakeLists.txt +++ b/ggml/src/ggml-virtgpu/backend/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_policy(SET CMP0114 NEW) message(STATUS "Enable the VirtGPU/Virglrenderer backend library") -ggml_add_backend_library(ggml-remotingbackend +ggml_add_backend_library(ggml-virtgpu-backend backend.cpp backend-dispatched.cpp backend-dispatched-backend.cpp @@ -15,7 +15,7 @@ ggml_add_backend_library(ggml-remotingbackend shared/apir_cs.h apir_cs_ggml-rpc-back.cpp) -target_compile_options(ggml-remotingbackend PRIVATE -std=c++20) +target_compile_options(ggml-virtgpu-backend PRIVATE -std=c++20) # Add include directory for ggml-backend-impl.h and other core headers -target_include_directories(ggml-remotingbackend PRIVATE ../..) +target_include_directories(ggml-virtgpu-backend PRIVATE ../..) diff --git a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml index fa7d57b0283..0b7cccfe9cf 100644 --- a/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml +++ b/ggml/src/ggml-virtgpu/ggmlremoting_functions.yaml @@ -8,8 +8,8 @@ config: # Header files to update files: - apir_backend_header: "ggml-remotingbackend/shared/apir_backend.gen.h" - backend_dispatched_header: "ggml-remotingbackend/backend-dispatched.gen.h" + apir_backend_header: "ggml-virtgpu-apir/backend/shared/apir_backend.gen.h" + backend_dispatched_header: "ggml-virtgpu-apir/backend/backend-dispatched.gen.h" virtgpu_forward_header: "ggml-virtgpu-apir/virtgpu-forward.gen.h" # Simplified function definitions with grouping and metadata combined diff --git a/ggml/src/ggml-virtgpu/regenerate_remoting.py b/ggml/src/ggml-virtgpu/regenerate_remoting.py index 927c0edccb3..a8f12251275 100755 --- a/ggml/src/ggml-virtgpu/regenerate_remoting.py +++ b/ggml/src/ggml-virtgpu/regenerate_remoting.py @@ -250,16 +250,15 @@ def regenerate_codebase(self) -> None: if is_frontend_dir: # Running from ggml/src/ggml-virtgpu-apir logging.info("📍 Detected frontend directory execution") - backend_base = Path("../ggml-remotingbackend") frontend_base = Path(".") else: # Running from project root (fallback to original behavior) logging.info("📍 Detected project root execution") base_path = self.config_data.get('base_path', 'ggml/src') - backend_base = Path(base_path) / "ggml-remotingbackend" frontend_base = Path(base_path) / "ggml-virtgpu" # Compute final file paths + backend_base = frontend_base / "backend" apir_backend_path = backend_base / "shared" / "apir_backend.gen.h" backend_dispatched_path = backend_base / "backend-dispatched.gen.h" virtgpu_forward_path = frontend_base / "virtgpu-forward.gen.h" From 9eb77dd356a089573ee68cb7d16014be9961c6ed Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 26 Jan 2026 08:58:26 +0100 Subject: [PATCH 30/37] ggml: src: ggml-virtgpu/regenerate_remoting: correctly use logging.exception arguments --- ggml/src/ggml-virtgpu/regenerate_remoting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-virtgpu/regenerate_remoting.py b/ggml/src/ggml-virtgpu/regenerate_remoting.py index a8f12251275..fcc9881868d 100755 --- a/ggml/src/ggml-virtgpu/regenerate_remoting.py +++ b/ggml/src/ggml-virtgpu/regenerate_remoting.py @@ -64,10 +64,10 @@ def _format_file_with_clang_format(self, file_path: Path) -> bool: ) return True except subprocess.CalledProcessError as e: - logging.exception(f" ⚠️ Warning: clang-format failed for {file_path}", e) + logging.exception(f" ⚠️ Warning: clang-format failed for {file_path}") return False except Exception as e: - logging.exception(f" ⚠️ Warning: Unexpected error formatting {file_path}: {e}", e) + logging.exception(f" ⚠️ Warning: Unexpected error formatting {file_path}: {e}") return False def generate_enum_name(self, group_name: str, function_name: str) -> str: @@ -314,7 +314,7 @@ def main(): generator = RemotingCodebaseGenerator() generator.regenerate_codebase() except Exception as e: - logging.exception(f"❌ Error:", e) + logging.exception(f"❌ Error: {e}") exit(1) if __name__ == "__main__": From 66f75b31a92746420f480a4b2b162b598f6576f6 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 14 Jan 2026 18:31:59 +0100 Subject: [PATCH 31/37] appaise the linter --- ggml/src/ggml-virtgpu/regenerate_remoting.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-virtgpu/regenerate_remoting.py b/ggml/src/ggml-virtgpu/regenerate_remoting.py index fcc9881868d..72360da97d7 100755 --- a/ggml/src/ggml-virtgpu/regenerate_remoting.py +++ b/ggml/src/ggml-virtgpu/regenerate_remoting.py @@ -64,10 +64,10 @@ def _format_file_with_clang_format(self, file_path: Path) -> bool: ) return True except subprocess.CalledProcessError as e: - logging.exception(f" ⚠️ Warning: clang-format failed for {file_path}") + logging.exception(f" ⚠️ clang-format failed for {file_path}") return False except Exception as e: - logging.exception(f" ⚠️ Warning: Unexpected error formatting {file_path}: {e}") + logging.exception(f" ⚠️ Unexpected error formatting {file_path}: {e}") return False def generate_enum_name(self, group_name: str, function_name: str) -> str: @@ -287,7 +287,7 @@ def regenerate_codebase(self) -> None: generated_files = [apir_backend_path, backend_dispatched_path, virtgpu_forward_path] if not self.clang_format_available: - logging.warn("\n⚠️ Warning: clang-format not found in PATH. Generated files will not be formatted." + logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted." " Install clang-format to enable automatic code formatting.") else: logging.info("\n🎨 Formatting files with clang-format...") @@ -295,7 +295,7 @@ def regenerate_codebase(self) -> None: if self._format_file_with_clang_format(file_path): logging.info(f" ✅ Formatted {file_path.name}") else: - logging.warn(f" ❌ Failed to format {file_path.name}") + logging.warning(f" ❌ Failed to format {file_path.name}") # Generate summary functions = self.get_enabled_functions() @@ -317,5 +317,6 @@ def main(): logging.exception(f"❌ Error: {e}") exit(1) + if __name__ == "__main__": main() From d2944e711877486f66e17dac054a571f64da3124 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Wed, 14 Jan 2026 18:31:59 +0100 Subject: [PATCH 32/37] appaise the linter --- ggml/src/ggml-virtgpu/regenerate_remoting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-virtgpu/regenerate_remoting.py b/ggml/src/ggml-virtgpu/regenerate_remoting.py index 72360da97d7..4174a24327f 100755 --- a/ggml/src/ggml-virtgpu/regenerate_remoting.py +++ b/ggml/src/ggml-virtgpu/regenerate_remoting.py @@ -63,7 +63,7 @@ def _format_file_with_clang_format(self, file_path: Path) -> bool: text=True ) return True - except subprocess.CalledProcessError as e: + except subprocess.CalledProcessError: logging.exception(f" ⚠️ clang-format failed for {file_path}") return False except Exception as e: @@ -288,7 +288,7 @@ def regenerate_codebase(self) -> None: if not self.clang_format_available: logging.warning("\n⚠️clang-format not found in PATH. Generated files will not be formatted." - " Install clang-format to enable automatic code formatting.") + " Install clang-format to enable automatic code formatting.") else: logging.info("\n🎨 Formatting files with clang-format...") for file_path in generated_files: From cf241f8b5ac1d56ba412bfc6a03a1a70ac08b523 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 26 Jan 2026 14:59:43 +0100 Subject: [PATCH 33/37] fix the wrong indent style --- ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp | 2 +- ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h | 2 +- ggml/src/ggml-virtgpu/backend/backend.cpp | 4 ++-- ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp | 4 ++-- ggml/src/ggml-virtgpu/ggml-remoting.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp index 3161e91d222..a419f161f6a 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp @@ -29,7 +29,7 @@ uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) { } if (!reg->iface.get_device_count(reg)) { - GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__); + GGML_LOG_ERROR("%s: backend initialization failed: no device found\n", __func__); return APIR_BACKEND_INITIALIZE_NO_DEVICE; } diff --git a/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h b/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h index 1972435a9f0..44b347f853f 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h +++ b/ggml/src/ggml-virtgpu/backend/backend-virgl-apir.h @@ -23,7 +23,7 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct void apir_backend_deinit(uint32_t virgl_ctx_id); uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, virgl_apir_callbacks * virgl_cbs, - uint32_t cmd_type, + uint32_t cmd_type, char * dec_cur, const char * dec_end, char * enc_cur, diff --git a/ggml/src/ggml-virtgpu/backend/backend.cpp b/ggml/src/ggml-virtgpu/backend/backend.cpp index c270e9a5e43..fc677b0e655 100644 --- a/ggml/src/ggml-virtgpu/backend/backend.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend.cpp @@ -129,8 +129,8 @@ uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, }; virgl_apir_context ctx = { - .ctx_id = virgl_ctx_id, - .iface = virgl_cbs, + .ctx_id = virgl_ctx_id, + .iface = virgl_cbs, }; if (cmd_type >= APIR_BACKEND_DISPATCH_TABLE_COUNT) { diff --git a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp index 4dc960fa5b0..7f650659b8a 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-buffer-type.cpp @@ -71,8 +71,8 @@ static size_t ggml_backend_remoting_buffer_type_get_alloc_size(ggml_backend_buff virtgpu * gpu = BUFT_TO_GPU(buft); if (tensor->buffer == NULL - || !tensor->buffer->context - || !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) { + || !tensor->buffer->context + || !buft->device->iface.supports_buft(buft->device, tensor->buffer->buft)) { return ggml_nbytes(tensor); } diff --git a/ggml/src/ggml-virtgpu/ggml-remoting.h b/ggml/src/ggml-virtgpu/ggml-remoting.h index 135f1c52970..36fc6b2a7bd 100644 --- a/ggml/src/ggml-virtgpu/ggml-remoting.h +++ b/ggml/src/ggml-virtgpu/ggml-remoting.h @@ -62,7 +62,7 @@ static inline apir_buffer_type_host_handle_t ggml_buffer_type_to_apir_handle(ggm static inline apir_buffer_host_handle_t ggml_buffer_to_apir_handle(ggml_backend_buffer_t buffer) { if (!buffer->context) { - GGML_ABORT("%s: no context available :/", __func__); + GGML_ABORT("%s: no context available :/", __func__); } return BUFFER_TO_HOST_HANDLE(buffer); } From 29acebe142b338b0daaab7a409ec588bf8be22b4 Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 26 Jan 2026 15:00:19 +0100 Subject: [PATCH 34/37] ggml-virtgpu: use a mutex to protect the virtgpu initialization --- ggml/src/ggml-virtgpu/ggml-backend-reg.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index d54c2cbb39c..076300c94bf 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -8,16 +8,21 @@ static virtgpu * apir_initialize() { static virtgpu * apir_gpu_instance = NULL; static bool apir_initialized = false; - if (apir_initialized) { - return apir_gpu_instance; - } + { + static std::mutex mutex; + std::lock_guard lock(mutex); - apir_gpu_instance = create_virtgpu(); - if (!apir_gpu_instance) { - GGML_ABORT("failed to initialize the virtgpu"); - } + if (apir_initialized) { + return apir_gpu_instance; + } + + apir_gpu_instance = create_virtgpu(); + if (!apir_gpu_instance) { + GGML_ABORT("failed to initialize the virtgpu"); + } - apir_initialized = true; + apir_initialized = true; + } return apir_gpu_instance; } From 08e80802238128ead7df685ecd48692c6e39e2bc Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 26 Jan 2026 15:23:26 +0100 Subject: [PATCH 35/37] ggml-virtgpu: fetch venus_hw.h from virglrenderer project --- ggml/src/ggml-virtgpu/CMakeLists.txt | 18 ++++++ ggml/src/ggml-virtgpu/include/venus_hw.h | 74 ------------------------ 2 files changed, 18 insertions(+), 74 deletions(-) delete mode 100644 ggml/src/ggml-virtgpu/include/venus_hw.h diff --git a/ggml/src/ggml-virtgpu/CMakeLists.txt b/ggml/src/ggml-virtgpu/CMakeLists.txt index 5359832dfcb..ee60c9d6621 100644 --- a/ggml/src/ggml-virtgpu/CMakeLists.txt +++ b/ggml/src/ggml-virtgpu/CMakeLists.txt @@ -1,8 +1,23 @@ cmake_minimum_required(VERSION 3.19) cmake_policy(SET CMP0114 NEW) +include(ExternalProject) + message(STATUS "Including the VirtGPU/Virglrenderer API Remoting") +# Download venus_hw.h from virglrenderer repository +ExternalProject_Add( + venus_hw_header + URL https://gitlab.freedesktop.org/virgl/virglrenderer/-/raw/virglrenderer-1.2.0/src/venus_hw.h + DOWNLOAD_NO_EXTRACT YES + DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include + DOWNLOAD_NAME venus_hw.h + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + LOG_DOWNLOAD ON +) + if (NOT GGML_VIRTGPU_BACKEND STREQUAL "ONLY") message(STATUS "Enable the VirtGPU/Virglrenderer API Remoting frontend libraryf") @@ -42,6 +57,9 @@ if (NOT GGML_VIRTGPU_BACKEND STREQUAL "ONLY") target_include_directories(ggml-virtgpu PUBLIC ./include) target_include_directories(ggml-virtgpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + # Ensure venus_hw.h is downloaded before building ggml-virtgpu + add_dependencies(ggml-virtgpu venus_hw_header) + target_compile_options(ggml-virtgpu PRIVATE -std=c++20) else() message(STATUS "Not building the VirtGPU/Virglrenderer API Remoting frontend library") diff --git a/ggml/src/ggml-virtgpu/include/venus_hw.h b/ggml/src/ggml-virtgpu/include/venus_hw.h deleted file mode 100644 index 3ef774b8259..00000000000 --- a/ggml/src/ggml-virtgpu/include/venus_hw.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2020 Chromium - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef VENUS_HW_H -#define VENUS_HW_H - -#include - -struct virgl_renderer_capset_venus { - uint32_t wire_format_version; - uint32_t vk_xml_version; - uint32_t vk_ext_command_serialization_spec_version; - uint32_t vk_mesa_venus_protocol_spec_version; - - /* This flag indicates render server config, and will be needed until drm - * virtio-gpu blob mem gets fixed to attach_resource before resource_map. - */ - uint32_t supports_blob_id_0; - - /* Extension number N, where N is defined by the Vulkan spec, corresponds - * to bit [N / 32] & (1 << N % 32). The below mask1 covers the first 1023 - * Vulkan extensions (numbered from 1 to 1023). - * - * Bit (mask1[0] & 0x1) is used for backward compatibility purpose. When - * that bit is set, the extension mask(s) are valid. Otherwise, all the - * extensions are assumed to be supported by the renderer side protocol. - */ - uint32_t vk_extension_mask1[32]; - - /* The single-threaded renderer cannot afford potential blocking calls. It - * also leads to GPU lost if the wait depends on a following command. This - * capset allows such blocking calls to passthrough from the clients, and - * shifts the responsibilities to the client drivers. - */ - uint32_t allow_vk_wait_syncs; - - /* This flag indicates that the renderer supports multiple fencing - * timelines. The client driver is expected to associate each VkQueue with - * one of these timelines at queue creation by binding it with an unused - * ring_idx. Queues created without a ring_idx binding are associated to a - * shared legacy timeline. The special ring_idx==0 is reserved for CPU - * fences that are signaled by the renderer immediately upon consumption of - * the associated renderer submission. - */ - uint32_t supports_multiple_timelines; - - /* This flag indicates to the guest that hypervisor does not support memory - * pages injections and blob allocations must be done by guest from the - * dedicated heap (Host visible memory). - */ - uint32_t use_guest_vram; -}; - -#endif /* VENUS_HW_H */ From e38e146160e82f68776187d8797704998c1c10ff Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 26 Jan 2026 15:55:35 +0100 Subject: [PATCH 36/37] fix the wrong indent style --- .../backend/backend-dispatched.cpp | 2 +- ggml/src/ggml-virtgpu/backend/backend.cpp | 2 +- ggml/src/ggml-virtgpu/ggml-backend-reg.cpp | 18 +++++++++--------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp index a419f161f6a..51d445725f0 100644 --- a/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend-dispatched.cpp @@ -36,7 +36,7 @@ uint32_t backend_dispatch_initialize(void * ggml_backend_reg_fct_p) { dev = reg->iface.get_device(reg, 0); if (!dev) { - GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__); + GGML_LOG_ERROR("%s: backend initialization failed: no device received\n", __func__); return APIR_BACKEND_INITIALIZE_NO_DEVICE; } diff --git a/ggml/src/ggml-virtgpu/backend/backend.cpp b/ggml/src/ggml-virtgpu/backend/backend.cpp index fc677b0e655..95d602ed603 100644 --- a/ggml/src/ggml-virtgpu/backend/backend.cpp +++ b/ggml/src/ggml-virtgpu/backend/backend.cpp @@ -109,7 +109,7 @@ ApirLoadLibraryReturnCode apir_backend_initialize(uint32_t virgl_ctx_id, struct uint32_t apir_backend_dispatcher(uint32_t virgl_ctx_id, virgl_apir_callbacks * virgl_cbs, - uint32_t cmd_type, + uint32_t cmd_type, char * dec_cur, const char * dec_end, char * enc_cur, diff --git a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp index 076300c94bf..c46cf51c022 100644 --- a/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp +++ b/ggml/src/ggml-virtgpu/ggml-backend-reg.cpp @@ -9,19 +9,19 @@ static virtgpu * apir_initialize() { static bool apir_initialized = false; { - static std::mutex mutex; + static std::mutex mutex; std::lock_guard lock(mutex); - if (apir_initialized) { - return apir_gpu_instance; - } + if (apir_initialized) { + return apir_gpu_instance; + } - apir_gpu_instance = create_virtgpu(); - if (!apir_gpu_instance) { - GGML_ABORT("failed to initialize the virtgpu"); - } + apir_gpu_instance = create_virtgpu(); + if (!apir_gpu_instance) { + GGML_ABORT("failed to initialize the virtgpu"); + } - apir_initialized = true; + apir_initialized = true; } return apir_gpu_instance; From 4cac29c6a9c3a8945f6f9f026f7d292ae1967aef Mon Sep 17 00:00:00 2001 From: Kevin Pouget Date: Mon, 26 Jan 2026 19:56:14 +0100 Subject: [PATCH 37/37] fix typo --- ggml/src/ggml-virtgpu/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-virtgpu/CMakeLists.txt b/ggml/src/ggml-virtgpu/CMakeLists.txt index ee60c9d6621..e6b020beb5b 100644 --- a/ggml/src/ggml-virtgpu/CMakeLists.txt +++ b/ggml/src/ggml-virtgpu/CMakeLists.txt @@ -19,7 +19,7 @@ ExternalProject_Add( ) if (NOT GGML_VIRTGPU_BACKEND STREQUAL "ONLY") - message(STATUS "Enable the VirtGPU/Virglrenderer API Remoting frontend libraryf") + message(STATUS "Enable the VirtGPU/Virglrenderer API Remoting frontend library") find_package(PkgConfig REQUIRED) pkg_check_modules(DRM REQUIRED libdrm)