Skip to content

Commit

Permalink
[SYCL] Add support for SYCL Nvidia target (ggerganov#5738)
Browse files Browse the repository at this point in the history
* Add support for nvidia target in CMake

* Update sycl read-me for Nvidia target

* Fix errors
  • Loading branch information
AidanBeltonS authored and NeoZhangJianyu committed Mar 12, 2024
1 parent 98a4398 commit 1c4007c
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 1 deletion.
14 changes: 13 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ option(LLAMA_MPI "llama: use MPI"
option(LLAMA_QKK_64 "llama: use super-block size of 64 for k-quants" OFF)
option(LLAMA_SYCL "llama: use SYCL" OFF)
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)

option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
Expand Down Expand Up @@ -534,6 +535,10 @@ if (LLAMA_HIPBLAS)
endif()

if (LLAMA_SYCL)
if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$")
message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA")
endif()

if ( NOT DEFINED ENV{ONEAPI_ROOT})
message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh")
endif()
Expand All @@ -555,14 +560,21 @@ if (LLAMA_SYCL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
endif()

set(GGML_HEADERS_SYCL ggml-sycl.h)
set(GGML_SOURCES_SYCL ggml-sycl.cpp)

if (WIN32)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
else()
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl)
endif()
endif()
endif()

Expand Down
26 changes: 26 additions & 0 deletions README-sycl.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,29 @@ For iGPU, please make sure the shared memory from host memory is enough. For lla

For dGPU, please make sure the device memory is enough. For llama-2-7b.Q4_0, recommend the device memory is 4GB+.

## Nvidia GPU

### Verified

|Intel GPU| Status | Verified Model|
|-|-|-|
|Ampere Series| Support| A100|

### oneMKL

The current oneMKL release does not contain the oneMKL cuBlas backend.
As a result for Nvidia GPU's oneMKL must be built from source.

```
git clone https://github.com/oneapi-src/oneMKL
cd oneMKL
mkdir build
cd build
cmake -G Ninja .. -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_CUBLAS_BACKEND=ON
ninja
// Add paths as necessary
```

## Docker

Note:
Expand Down Expand Up @@ -186,6 +209,9 @@ source /opt/intel/oneapi/setvars.sh
# Or, for FP32:
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx

# For Nvidia GPUs
cmake .. -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx

# Build example/main only
#cmake --build . --config Release --target main

Expand Down

0 comments on commit 1c4007c

Please sign in to comment.