Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
dd34429
ggml: add the ggml-remoting frontend/backend to the build system
kpouget Jan 9, 2026
93245a3
ggml-remotingfrontend: guest-side backend for API Remoting acceleration
kpouget Jan 9, 2026
8a8d067
ggml-remotingbackend: host-side backend for Virglrenderer APIR component
kpouget Jan 9, 2026
61715cd
ggml: disable Vulkan backend loading with GGML_DISABLE_VULKAN
kpouget Jan 9, 2026
0464ca3
CMakePresets.json: don't expose presets for the API Remoting backends
kpouget Jan 12, 2026
350e94c
backend-utils.cpp: remove unused file
kpouget Jan 12, 2026
dd518ef
Update the indentation with clang-format
kpouget Jan 12, 2026
a37f379
Remove FATAL errors in the backend
kpouget Jan 12, 2026
cf88b8f
remove structs
kpouget Jan 12, 2026
3a9fcfb
Update regenerate_remoting.py to launch clang-format
kpouget Jan 12, 2026
74f0a85
reformat with clang-format
kpouget Jan 12, 2026
1a45049
more cleanups
kpouget Jan 12, 2026
2397099
Remove extra header files
kpouget Jan 12, 2026
700884b
Make sure that the LOG messages end with EOL
kpouget Jan 13, 2026
d160c79
Cleanup the CMakeLists
kpouget Jan 13, 2026
e9a469b
Use uint64_t instead of long long
kpouget Jan 13, 2026
ab4d5cc
use (full) upper case for constants
kpouget Jan 13, 2026
b522bfe
ggml-remoting-frontend.cpp: remove unused file
kpouget Jan 13, 2026
18ef30d
regenerate_remoting: remove unnecessary import
kpouget Jan 13, 2026
e0bb437
regenerate_remoting: appease the linter
kpouget Jan 13, 2026
ba48cfb
backend.cpp: use the right variable in error message
kpouget Jan 14, 2026
9182516
ggml-backend-reg: fix typo
kpouget Jan 14, 2026
7ec38db
ggml_backend_remoting_buffer_type_get_alloc_size: validate that the b…
kpouget Jan 16, 2026
f1ec1be
ggml-backend-reg.cpp: define the GGML_BACKEND_DL_IMPL
kpouget Jan 16, 2026
119bdec
Update to allow dynamic configuration from the hypervisor
kpouget Jan 16, 2026
8ff5522
remotingbackend: Simplify the initialization process
kpouget Jan 20, 2026
2401f63
Rename the GGML backend
kpouget Jan 20, 2026
4e38199
virtgpu-forward-buffer.cpp: remove dead code
kpouget Jan 22, 2026
179a146
finish updating the backend location
kpouget Jan 22, 2026
9eb77dd
ggml: src: ggml-virtgpu/regenerate_remoting: correctly use logging.ex…
kpouget Jan 26, 2026
66f75b3
appaise the linter
kpouget Jan 14, 2026
d2944e7
appaise the linter
kpouget Jan 14, 2026
cf241f8
fix the wrong indent style
kpouget Jan 26, 2026
29acebe
ggml-virtgpu: use a mutex to protect the virtgpu initialization
kpouget Jan 26, 2026
08e8080
ggml-virtgpu: fetch venus_hw.h from virglrenderer project
kpouget Jan 26, 2026
e38e146
fix the wrong indent style
kpouget Jan 26, 2026
4cac29c
fix typo
kpouget Jan 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
/ggml/src/ggml-rpc/ @rgerganov
/ggml/src/ggml-threading.* @ggerganov
/ggml/src/ggml-vulkan/ @0cc4m
/ggml/src/ggml-virtgpu/ @kpouget
/ggml/src/ggml-webgpu/ @reeselevine
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
/ggml/src/ggml.c @ggerganov
Expand Down
3 changes: 3 additions & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)
option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_VIRTGPU "ggml: use the VirtGPU/Virglrenderer API Remoting frontend" OFF)
option(GGML_VIRTGPU_BACKEND "ggml: build the VirtGPU/Virglrenderer API Remoting backend" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
Expand Down Expand Up @@ -320,6 +322,7 @@ set(GGML_PUBLIC_HEADERS
include/ggml-opt.h
include/ggml-metal.h
include/ggml-rpc.h
include/ggml-virtgpu.h
include/ggml-sycl.h
include/ggml-vulkan.h
include/ggml-webgpu.h
Expand Down
16 changes: 16 additions & 0 deletions ggml/include/ggml-virtgpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_REMOTING_FRONTEND_NAME "RemotingFrontend"

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_virtgpu_reg();

#ifdef __cplusplus
}
#endif
1 change: 1 addition & 0 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ ggml_add_backend(HIP)
ggml_add_backend(METAL)
ggml_add_backend(MUSA)
ggml_add_backend(RPC)
ggml_add_backend(VirtGPU)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU)
Expand Down
14 changes: 14 additions & 0 deletions ggml/src/ggml-backend-reg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
#include "ggml-rpc.h"
#endif

#ifdef GGML_USE_VIRTGPU_FRONTEND
#include "ggml-virtgpu.h"
#endif

#ifdef GGML_USE_CANN
#include "ggml-cann.h"
#endif
Expand Down Expand Up @@ -196,14 +200,23 @@ struct ggml_backend_registry {
register_backend(ggml_backend_sycl_reg());
#endif
#ifdef GGML_USE_VULKAN
// Add runtime disable check
if (getenv("GGML_DISABLE_VULKAN") == nullptr) {
register_backend(ggml_backend_vk_reg());
} else {
GGML_LOG_DEBUG("Vulkan backend disabled by GGML_DISABLE_VULKAN environment variable\n");
}
#endif
#ifdef GGML_USE_WEBGPU
register_backend(ggml_backend_webgpu_reg());
#endif
#ifdef GGML_USE_ZDNN
register_backend(ggml_backend_zdnn_reg());
#endif
#ifdef GGML_USE_VIRTGPU_FRONTEND
register_backend(ggml_backend_virtgpu_reg());
#endif

#ifdef GGML_USE_OPENCL
register_backend(ggml_backend_opencl_reg());
#endif
Expand Down Expand Up @@ -620,6 +633,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
ggml_backend_load_best("rpc", silent, dir_path);
ggml_backend_load_best("sycl", silent, dir_path);
ggml_backend_load_best("vulkan", silent, dir_path);
ggml_backend_load_best("virtgpu", silent, dir_path);
ggml_backend_load_best("opencl", silent, dir_path);
ggml_backend_load_best("hexagon", silent, dir_path);
ggml_backend_load_best("musa", silent, dir_path);
Expand Down
70 changes: 70 additions & 0 deletions ggml/src/ggml-virtgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
cmake_minimum_required(VERSION 3.19)
cmake_policy(SET CMP0114 NEW)

include(ExternalProject)

message(STATUS "Including the VirtGPU/Virglrenderer API Remoting")

# Download venus_hw.h from virglrenderer repository
ExternalProject_Add(
venus_hw_header
URL https://gitlab.freedesktop.org/virgl/virglrenderer/-/raw/virglrenderer-1.2.0/src/venus_hw.h
DOWNLOAD_NO_EXTRACT YES
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include
DOWNLOAD_NAME venus_hw.h
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
LOG_DOWNLOAD ON
)

if (NOT GGML_VIRTGPU_BACKEND STREQUAL "ONLY")
message(STATUS "Enable the VirtGPU/Virglrenderer API Remoting frontend library")

find_package(PkgConfig REQUIRED)
pkg_check_modules(DRM REQUIRED libdrm)
if (NOT GGML_BACKEND_DL)
# cannot simply use USE_VIRTGPU, as in the 'else()' case the
# frontend isn't compiled
target_compile_definitions(ggml PUBLIC "GGML_USE_VIRTGPU_FRONTEND")
endif()

ggml_add_backend_library(ggml-virtgpu
ggml-backend-buffer.cpp
ggml-backend.cpp
ggml-backend-device.cpp
ggml-backend-reg.cpp
ggml-backend-buffer-type.cpp
virtgpu-apir.h
virtgpu-forward.gen.h
virtgpu.cpp
virtgpu-shm.cpp
virtgpu-utils.cpp
virtgpu-forward-device.cpp
virtgpu-forward-buffer-type.cpp
virtgpu-forward-buffer.cpp
virtgpu-forward-backend.cpp
virtgpu-forward-impl.h
apir_cs_ggml-rpc-front.cpp
../../include/ggml-virtgpu.h)

target_include_directories(ggml-virtgpu PUBLIC /usr/include/libdrm/)

target_link_libraries(ggml-virtgpu PUBLIC ${DRM_LIBRARIES})
target_include_directories(ggml-virtgpu PUBLIC ${DRM_INCLUDE_DIRS})
target_compile_options(ggml-virtgpu PUBLIC ${DRM_CFLAGS_OTHER})

target_include_directories(ggml-virtgpu PUBLIC ./include)
target_include_directories(ggml-virtgpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR})

# Ensure venus_hw.h is downloaded before building ggml-virtgpu
add_dependencies(ggml-virtgpu venus_hw_header)

target_compile_options(ggml-virtgpu PRIVATE -std=c++20)
else()
message(STATUS "Not building the VirtGPU/Virglrenderer API Remoting frontend library")
endif()

if (NOT GGML_VIRTGPU_BACKEND STREQUAL "OFF")
add_subdirectory("backend")
endif()
87 changes: 87 additions & 0 deletions ggml/src/ggml-virtgpu/apir_cs_ggml-rpc-front.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include "backend/shared/apir_cs_rpc.h"
#include "ggml-backend-impl.h"
#include "ggml-impl.h"
#include "ggml-remoting.h"

#include <cinttypes>
#include <unordered_map>
#include <unordered_set>
#include <vector>

apir_rpc_tensor apir_serialize_tensor(const ggml_tensor * tensor) {
apir_rpc_tensor result;
result.id = reinterpret_cast<uint64_t>(tensor);
result.type = tensor->type;
if (tensor->buffer) {
ggml_backend_buffer_t buffer = tensor->buffer;

result.buffer = BUFFER_TO_HOST_HANDLE(buffer);
} else {
result.buffer = 0;
}
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
result.ne[i] = tensor->ne[i];
result.nb[i] = tensor->nb[i];
}
result.op = tensor->op;
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
result.op_params[i] = tensor->op_params[i];
}
result.flags = tensor->flags;
for (uint32_t i = 0; i < GGML_MAX_SRC; i++) {
result.src[i] = reinterpret_cast<uint64_t>(tensor->src[i]);
}
result.view_src = reinterpret_cast<uint64_t>(tensor->view_src);
result.view_offs = tensor->view_offs;
result.data = reinterpret_cast<uint64_t>(tensor->data);
if (tensor->data) {
if (!tensor->buffer) {
GGML_ABORT("tensor has data but not buffer");
}
// tensor->data is serialized as an offset to the buffer base address
result.data -= reinterpret_cast<uint64_t>(BUFFER_TO_GGML_CONTEXT(tensor->buffer)->base);
}
snprintf(result.name, GGML_MAX_NAME, "%s", tensor->name);
return result;
}

void apir_add_tensor(ggml_tensor * tensor,
std::vector<apir_rpc_tensor> & tensors,
std::unordered_set<ggml_tensor *> & visited) {
if (tensor == nullptr) {
return;
}
if (visited.find(tensor) != visited.end()) {
return;
}
visited.insert(tensor);
for (int i = 0; i < GGML_MAX_SRC; i++) {
apir_add_tensor(tensor->src[i], tensors, visited);
}
apir_add_tensor(tensor->view_src, tensors, visited);
tensors.push_back(apir_serialize_tensor(tensor));
}

void apir_serialize_graph(const ggml_cgraph * cgraph, std::vector<uint8_t> & output) {
uint32_t n_nodes = cgraph->n_nodes;
std::vector<apir_rpc_tensor> tensors;
std::unordered_set<ggml_tensor *> visited;
for (uint32_t i = 0; i < n_nodes; i++) {
apir_add_tensor(cgraph->nodes[i], tensors, visited);
}
// serialization format:
// | n_nodes (4 bytes) | nodes (n_nodes * sizeof(uint64_t) | n_tensors (4 bytes) | tensors (n_tensors * sizeof(apir_rpc_tensor)) |
uint32_t n_tensors = tensors.size();
int output_size =
sizeof(uint32_t) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t) + n_tensors * sizeof(apir_rpc_tensor);
output.resize(output_size, 0);
memcpy(output.data(), &n_nodes, sizeof(n_nodes));
for (uint32_t i = 0; i < n_nodes; i++) {
memcpy(output.data() + sizeof(n_nodes) + i * sizeof(uint64_t), &cgraph->nodes[i], sizeof(uint64_t));
}
uint32_t * out_ntensors = (uint32_t *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t));
*out_ntensors = n_tensors;
apir_rpc_tensor * out_tensors =
(apir_rpc_tensor *) (output.data() + sizeof(n_nodes) + n_nodes * sizeof(uint64_t) + sizeof(uint32_t));
memcpy(out_tensors, tensors.data(), n_tensors * sizeof(apir_rpc_tensor));
}
21 changes: 21 additions & 0 deletions ggml/src/ggml-virtgpu/backend/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.19)
cmake_policy(SET CMP0114 NEW)

message(STATUS "Enable the VirtGPU/Virglrenderer backend library")

ggml_add_backend_library(ggml-virtgpu-backend
backend.cpp
backend-dispatched.cpp
backend-dispatched-backend.cpp
backend-dispatched-device.cpp
backend-dispatched-buffer.cpp
backend-dispatched-buffer-type.cpp
shared/api_remoting.h
shared/apir_backend.h
shared/apir_cs.h
apir_cs_ggml-rpc-back.cpp)

target_compile_options(ggml-virtgpu-backend PRIVATE -std=c++20)

# Add include directory for ggml-backend-impl.h and other core headers
target_include_directories(ggml-virtgpu-backend PRIVATE ../..)
115 changes: 115 additions & 0 deletions ggml/src/ggml-virtgpu/backend/apir_cs_ggml-rpc-back.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#include "ggml-backend-impl.h"
#include "ggml-impl.h"
#include "shared/apir_cs_rpc.h"

#include <cinttypes>
#include <unordered_map>
#include <unordered_set>
#include <vector>

std::unordered_set<ggml_backend_buffer_t> backend_buffers;

void apir_track_backend_buffer(ggml_backend_buffer_t buffer) {
backend_buffers.insert(buffer);
}

bool apir_untrack_backend_buffer(ggml_backend_buffer_t buffer) {
auto it = backend_buffers.find(buffer);
if (it == backend_buffers.end()) {
return false;
}

backend_buffers.erase(it);
return true;
}

std::unordered_set<ggml_backend_buffer_t> apir_get_track_backend_buffers() {
return backend_buffers;
}

ggml_tensor * apir_deserialize_tensor(ggml_context * ctx, const apir_rpc_tensor * tensor) {
ggml_tensor * result =
ggml_new_tensor_4d(ctx, (ggml_type) tensor->type, tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
for (uint32_t i = 0; i < GGML_MAX_DIMS; i++) {
result->nb[i] = tensor->nb[i];
}
result->buffer = reinterpret_cast<ggml_backend_buffer_t>(tensor->buffer);
if (result->buffer && backend_buffers.find(result->buffer) == backend_buffers.end()) {
printf("WARNING: HOST BUFFER NOT FOUND | %p\n", (void *) result->buffer);
result->buffer = nullptr;
}

uint64_t tensor_data = tensor->data;
if (result->buffer) {
// require that the tensor data does not go beyond the buffer end
uint64_t tensor_size = (uint64_t) ggml_nbytes(result);
uint64_t buffer_start = (uint64_t) ggml_backend_buffer_get_base(result->buffer);
uint64_t buffer_size = (uint64_t) ggml_backend_buffer_get_size(result->buffer);

// tensor->data is serialized as an offset to the buffer base address
tensor_data += buffer_start;

GGML_ASSERT(tensor_data + tensor_size >= tensor_data); // check for overflow
GGML_ASSERT(tensor_data >= buffer_start && tensor_data + tensor_size <= buffer_start + buffer_size);
}

result->op = (ggml_op) tensor->op;
for (uint32_t i = 0; i < GGML_MAX_OP_PARAMS / sizeof(int32_t); i++) {
result->op_params[i] = tensor->op_params[i];
}
result->flags = tensor->flags;
result->data = reinterpret_cast<void *>(tensor_data);
ggml_set_name(result, tensor->name);
return result;
}

ggml_tensor * apir_create_node(uint64_t id,
ggml_context * ctx,
const std::unordered_map<uint64_t, const apir_rpc_tensor *> & tensor_ptrs,
std::unordered_map<uint64_t, ggml_tensor *> & tensor_map) {
if (id == 0) {
return nullptr;
}
if (tensor_map.find(id) != tensor_map.end()) {
return tensor_map[id];
}
const apir_rpc_tensor * tensor = tensor_ptrs.at(id);
ggml_tensor * result = apir_deserialize_tensor(ctx, tensor);
if (result == nullptr) {
return nullptr;
}
tensor_map[id] = result;
for (int i = 0; i < GGML_MAX_SRC; i++) {
result->src[i] = apir_create_node(tensor->src[i], ctx, tensor_ptrs, tensor_map);
}
result->view_src = apir_create_node(tensor->view_src, ctx, tensor_ptrs, tensor_map);
result->view_offs = tensor->view_offs;
return result;
}

ggml_cgraph * apir_deserialize_graph(uint32_t n_nodes,
uint32_t n_tensors,
const apir_rpc_tensor * tensors,
const uint64_t * nodes) {
size_t buf_size = ggml_tensor_overhead() * (n_nodes + n_tensors) + ggml_graph_overhead_custom(n_nodes, false);
ggml_init_params params = {
/*.mem_size =*/buf_size,
/*.mem_buffer =*/NULL,
/*.no_alloc =*/true,
};
ggml_context * ctx = ggml_init(params);
ggml_cgraph * graph = ggml_new_graph_custom(ctx, n_nodes, false);
graph->n_nodes = n_nodes;
std::unordered_map<uint64_t, const apir_rpc_tensor *> tensor_ptrs;
for (uint32_t i = 0; i < n_tensors; i++) {
tensor_ptrs[tensors[i].id] = &tensors[i];
}
std::unordered_map<uint64_t, ggml_tensor *> tensor_map;
for (uint32_t i = 0; i < n_nodes; i++) {
int64_t id;
memcpy(&id, &nodes[i], sizeof(id));
graph->nodes[i] = apir_create_node(id, ctx, tensor_ptrs, tensor_map);
}

return graph;
}
Loading
Loading