Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ option(GGML_RPC "ggml: use RPC"
option(GGML_SYCL "ggml: use SYCL" OFF)
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
option(GGML_SYCL_SUPPORT_LEVEL_ZERO "ggml: use Level Zero API in SYCL backend" ON)
option(GGML_SYCL_DNN "ggml: enable oneDNN in the SYCL backend" ON)
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
"ggml: sycl target device")
Expand Down
23 changes: 23 additions & 0 deletions ggml/src/ggml-sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,12 @@ detect_and_find_package(IntelSYCL)
if (IntelSYCL_FOUND)
# Use oneAPI CMake when possible
target_link_libraries(ggml-sycl PRIVATE IntelSYCL::SYCL_CXX)
# Add compatibility shim for work_group_static.hpp if not present in this oneAPI version
get_target_property(_sycl_inc IntelSYCL::SYCL_CXX INTERFACE_INCLUDE_DIRECTORIES)
if(NOT EXISTS "${_sycl_inc}/sycl/ext/oneapi/work_group_static.hpp")
message(STATUS "work_group_static.hpp not found in oneAPI, using compatibility shim")
target_include_directories(ggml-sycl BEFORE PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/sycl_compat")
endif()
else()
# Fallback to the simplest way of enabling SYCL when using intel/llvm nightly for instance
target_compile_options(ggml-sycl PRIVATE "-fsycl")
Expand All @@ -93,6 +99,23 @@ endif()

target_compile_options(ggml-sycl PRIVATE "-Wno-narrowing")

# Level Zero: direct device memory allocation bypasses xe/TTM staging overhead
message(STATUS "GGML_SYCL_SUPPORT_LEVEL_ZERO ${GGML_SYCL_SUPPORT_LEVEL_ZERO}")
if (GGML_SYCL_SUPPORT_LEVEL_ZERO)
find_path(LEVEL_ZERO_INCLUDE_DIR level_zero/ze_api.h
HINTS ${ONEAPI_ROOT}/include /usr/include)
find_library(ZE_LOADER_LIB ze_loader
HINTS ${ONEAPI_ROOT}/lib /usr/lib /usr/lib64 ENV LD_LIBRARY_PATH)
if(ZE_LOADER_LIB AND LEVEL_ZERO_INCLUDE_DIR)
target_link_libraries(ggml-sycl PRIVATE ${ZE_LOADER_LIB})
target_compile_definitions(ggml-sycl PRIVATE GGML_SYCL_SUPPORT_LEVEL_ZERO)
message(STATUS "Level Zero loader: ${ZE_LOADER_LIB}")
message(STATUS "Level Zero headers: ${LEVEL_ZERO_INCLUDE_DIR}")
else()
message(WARNING "Level Zero not found — L0 memory path disabled (loader=${ZE_LOADER_LIB}, inc=${LEVEL_ZERO_INCLUDE_DIR})")
endif()
endif()

# Link against oneDNN
set(GGML_SYCL_DNNL 0)
if(GGML_SYCL_DNN)
Expand Down
54 changes: 52 additions & 2 deletions ggml/src/ggml-sycl/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

#include "common.hpp"

#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
#include <sycl/backend.hpp>
#include <level_zero/ze_api.h>
#endif

#include "ggml-backend-impl.h"
#include "ggml-impl.h"

Expand Down Expand Up @@ -66,6 +71,52 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block
return sycl_down_blk_size;
}

// ggml_sycl_malloc_device: allocates device memory via Level Zero (if available)
// to avoid the xe driver's TTM staging path triggered by sycl::malloc_device.
// sycl::malloc_device creates a 1:1 host mirror of every VRAM allocation via
// xe_gem_prime_export; zeMemAllocDevice uses SVM/P2P path with ~8 MiB overhead.
void * ggml_sycl_malloc_device(size_t size, sycl::queue &q) {
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
if (g_ggml_sycl_enable_level_zero) {
auto ze_ctx = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(q.get_context());
auto ze_dev = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(q.get_device());

ze_relaxed_allocation_limits_exp_desc_t relaxed = {
ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC,
nullptr,
ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE
};
ze_device_mem_alloc_desc_t alloc_desc = {
ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC,
&relaxed,
0, 0
};
void *ptr = nullptr;
ze_result_t r = zeMemAllocDevice(ze_ctx, &alloc_desc, size, 64, ze_dev, &ptr);
if (r == ZE_RESULT_SUCCESS && ptr) {
return ptr;
}
GGML_LOG_WARN("zeMemAllocDevice failed (0x%x), falling back to sycl::malloc_device\n", r);
}
#endif
return sycl::malloc_device(size, q);
}

void ggml_sycl_free_device(void *ptr, sycl::queue &q) {
if (!ptr) return;
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
if (g_ggml_sycl_enable_level_zero) {
sycl::usm::alloc type = sycl::get_pointer_type(ptr, q.get_context());
if (type == sycl::usm::alloc::unknown) {
auto ze_ctx = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(q.get_context());
zeMemFree(ze_ctx, ptr);
return;
}
}
#endif
sycl::free(ptr, q);
}

void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams) {
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
for (int64_t is = 0; is < GGML_SYCL_MAX_STREAMS; ++is) {
Expand All @@ -75,8 +126,7 @@ void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> str
}
if (extra->data_device[i] != nullptr && streams.size()>0) {
ggml_sycl_set_device(i);
SYCL_CHECK(
CHECK_TRY_ERROR(sycl::free(extra->data_device[i], *(streams[i]))));
SYCL_CHECK(CHECK_TRY_ERROR(ggml_sycl_free_device(extra->data_device[i], *(streams[i]))));
}
}
delete extra;
Expand Down
9 changes: 9 additions & 0 deletions ggml/src/ggml-sycl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,15 @@ struct ggml_tensor_extra_gpu {
optimize_feature optimized_feature;
};

// Level Zero direct device memory helpers.
// ggml_sycl_malloc_device uses zeMemAllocDevice (bypasses TTM staging in xe driver).
// ggml_sycl_free_device matches the allocator used by ggml_sycl_malloc_device.
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
extern int g_ggml_sycl_enable_level_zero;
#endif
void * ggml_sycl_malloc_device(size_t size, sycl::queue &q);
void ggml_sycl_free_device(void *ptr, sycl::queue &q);

void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector<queue_ptr> streams={});

namespace sycl_ex = sycl::ext::oneapi::experimental;
Expand Down
4 changes: 2 additions & 2 deletions ggml/src/ggml-sycl/dpct/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,13 +684,13 @@ namespace dpct
"use total memory as free memory";
#if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105)
if (!has(sycl::aspect::ext_intel_free_memory)) {
std::cerr << warning_info << std::endl;
free_memory = total_memory;
} else {
free_memory = get_info<sycl::ext::intel::info::device::free_memory>();
}
#else
std::cerr << warning_info << std::endl;
static bool warned = false;
if (!warned) { std::cerr << warning_info << std::endl; warned = true; }
free_memory = total_memory;
#if defined(_MSC_VER) && !defined(__clang__)
#pragma message("Querying the number of bytes of free memory is not supported")
Expand Down
Loading