diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000..1381b1e63d --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# $ git config blame.ignoreRevsFile .git-blame-ignore-revs + +# Migrate code style to Black +41ccd65e2e659aa0add0e5ab59f1a46e32cc4c46 diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml new file mode 100644 index 0000000000..be581fd2f3 --- /dev/null +++ b/.github/workflows/black.yml @@ -0,0 +1,28 @@ +# This is a workflow to format Python code with black formatter + +name: black + +# Controls when the action will run. Triggers the workflow on push or pull request +# events but only for the master branch +on: + push: + branches: [master] + pull_request: + branches: [master] + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "black" + black: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + # Set up a Python environment for use in actions + - uses: actions/setup-python@v2 + + # Run black code formatter + - uses: psf/black@stable diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000..e15d41f23c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,75 @@ +# Changelog +All notable changes to this project will be documented in this file. + +## [Unreleased] +### Added +- Device descriptors "max_compute_units", "max_work_item_dimensions", "max_work_item_sizes", "max_work_group_size", "max_num_sub_groups" and "aspects" for int64 atomics inside dpctl C API and inside the dpctl.SyclDevice class. + +### Removed +- The Legacy OpenCL interface. + +## [0.3.6] - 2020-10-06 +### Added +- Changelog was added for dpctl. + +### Fixed +- Windows build was fixed. + +## [0.3.5] - 2020-10-06 +### Added +- Add a helper function to all Python SyclXXX classes to get the address of the base C API pointer as a long. + +### Changed +- Rename PyDPPL to dpCtl in comments (function name renaming to come later) + +### Fixed +- Fix bugs highlighted by tools. +- Various code clean ups. + +## [0.3.4] - 2020-10-05 +### Added +- Dump functions were enhanced to print back-end information. +- dpctl gained support for unint_8 and unsigned long data types. +- oneAPI Beta 10 tool chain support was added. + +### Changed +- dpctl is now aware of DPC++ Sycl PI back-ends. The functionality is now exposed via the context interface. +- C API's queue manager was refactored to require back-end. +- dpct's device_context now requires back-end, device-type, and device-id to be provided in a string format, e.g. opencl:gpu:0. + +### Fixed +- Fixed some important bugs found by static analysis. + +## [0.3.3] - 2020-10-02 +### Added +- Add dpctl.get_curent_device_type(). + +## [0.3.2] - 2020-09-29 +### Changed +- Set _cpu_device and _gpu_device to None by default. + +## [0.3.1] - 2020-09-28 +### Added +- Add get include and include headers. + +### Changed +- DPPL shared objects are installed into dpctl. + +### Fixed +- Refactor unit tests. + +## [0.3.0] - 2020-09-23 +### Added +- Adds C and Cython API for portions of Sycl queue, device, context interfaces. +- Implementing USM memory management. + +### Changed +- Refactored API to expose a minimal sycl::queue interface. +- Modify cpu_queues, gpu_queues and active_queues to functions. +- Change static vectors to static pointers to verctors. It disables call for destructors. Destructors are also call in undefined order. +- Rename package PyDPPL to dpCtl. +- Use dpcpp.exe on Windows instead of dpcpp-cl.exe deleted in oneAPI beta08. + +### Fixed +- Correct use ERRORLEVEL in conda scripts for Windows. +- Fix using dppl.has_sycl_platforms() and dppl.has_gpu_queues() functions in skipIf diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ee9a161273..c3076bd5e4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,3 +1,10 @@ -Diptorup Deb +# Python code style -Todd Anderson \ No newline at end of file +## black + +We use [black](https://black.readthedocs.io/en/stable/) code formatter. + +- Revision: `20.8b1` or branch `stable`. +- See configuration in `pyproject.toml`. + +Run before each commit: `black .` diff --git a/README.md b/README.md index 1891b80c33..fd54be2f92 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + What? ==== -A lightweight Python package exposing a subset of OpenCL and SYCL -functionalities. +A lightweight Python package exposing a subset of SYCL functionalities. Requirements ============ @@ -27,14 +28,17 @@ On Windows to cope with [long file names](https://github.com/IntelPython/dpctl/i ```cmd conda build --croot=C:/tmp conda-recipe ``` + +:warning: **You could face issues with conda-build=3.20**: Use conda-build=3.18! + 3. Install conda package ```bash conda install dpctl ``` -Using PyDPPL -============ -PyDPPL relies on SYCL runtime. With Intel oneAPI installed you should activate it. +Using dpCtl +=========== +dpCtl relies on DPC++ runtime. With Intel oneAPI installed you should activate it. On Windows: ```cmd diff --git a/backends/CMakeLists.txt b/backends/CMakeLists.txt index 373f0c8f2b..a8d02185f2 100644 --- a/backends/CMakeLists.txt +++ b/backends/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.3.2 FATAL_ERROR) -project("PyDPPL - A lightweight SYCL wrapper for Python") +project("dpCtl - A lightweight SYCL wrapper for Python") # The function checks is DPCPP_ROOT is valid and points to a dpcpp installation function (check_for_dpcpp) @@ -76,23 +76,14 @@ if(WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++17") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -ggdb3 -DDEBUG ") elseif(UNIX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} \ - -Wall -Wextra -Winit-self -Wuninitialized -Wmissing-declarations \ - -fdiagnostics-color=auto -O3 \ - ") + set(SDL_FLAGS "-fstack-protector -fstack-protector-all -fpic -fPIC -D_FORTIFY_SOURCE=2 -Wformat -Wformat-security -fno-strict-overflow -fno-delete-null-pointer-checks") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SDL_FLAGS} -Wall -Wextra -Winit-self -Wuninitialized -Wmissing-declarations -fdiagnostics-color=auto") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -ggdb3 -DDEBUG ") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fsycl") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SDL_FLAGS} -std=c++17 -fsycl") else() message(FATAL_ERROR "Unsupported system.") endif() - -set(OpenCL_INCLUDE_DIR "${DPCPP_ROOT}/include/sycl") -set(OpenCL_LIBRARY "${DPCPP_ROOT}/lib/libOpenCL.so") - -message(STATUS "OpenCL_INCLUDE_DIR: ${OpenCL_INCLUDE_DIR}") -message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}") - add_library( DPPLSyclInterface SHARED @@ -108,13 +99,6 @@ add_library( source/dppl_utils.cpp ) -# Install DPPLOpenCLInterface -add_library( - DPPLOpenCLInterface - SHARED - source/dppl_opencl_interface.c -) - # Install DPPLSyclInterface target_include_directories( DPPLSyclInterface @@ -124,18 +108,6 @@ target_include_directories( ${NUMPY_INCLUDE_DIR} ) -target_include_directories( - DPPLOpenCLInterface - PRIVATE - ${CMAKE_SOURCE_DIR}/include/ -) - -target_include_directories( - DPPLOpenCLInterface - PUBLIC - ${OpenCL_INCLUDE_DIR} -) - if(WIN32) message( STATUS @@ -152,10 +124,6 @@ if(WIN32) PRIVATE ${DPCPP_ROOT}/lib/sycl.lib PRIVATE ${DPCPP_ROOT}/lib/OpenCL.lib ) - target_link_libraries( - DPPLOpenCLInterface - PRIVATE ${DPCPP_ROOT}/lib/OpenCL.lib - ) endif() install( @@ -165,14 +133,6 @@ install( "${CMAKE_INSTALL_PREFIX}/lib/" ) -install( - TARGETS - DPPLOpenCLInterface - LIBRARY - DESTINATION - "${CMAKE_INSTALL_PREFIX}/lib/" -) - # Install all headers file(GLOB HEADERS "${CMAKE_SOURCE_DIR}/include/*.h*") foreach(HEADER ${HEADERS}) @@ -185,7 +145,7 @@ foreach(HEADER ${HEADERS}) install(FILES "${HEADER}" DESTINATION include/Support) endforeach() -# Enable to build the PyDPPL backend test cases +# Enable to build the dpCtl backend test cases add_subdirectory(tests) # Todo : Add build rules for doxygen diff --git a/backends/include/Support/CBindingWrapping.h b/backends/include/Support/CBindingWrapping.h index 40ef4027b2..4c77612269 100644 --- a/backends/include/Support/CBindingWrapping.h +++ b/backends/include/Support/CBindingWrapping.h @@ -1,6 +1,6 @@ //===----- Support/CBindingWrapping.h - DPPL-SYCL interface --*-- C ---*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/Support/DllExport.h b/backends/include/Support/DllExport.h index 28643f3195..de82311320 100644 --- a/backends/include/Support/DllExport.h +++ b/backends/include/Support/DllExport.h @@ -1,6 +1,6 @@ //===---------- Support/DllExport.h - DPPL-SYCL interface ---*--- C ---*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/Support/ExternC.h b/backends/include/Support/ExternC.h index 60b91de0dd..76baef7871 100644 --- a/backends/include/Support/ExternC.h +++ b/backends/include/Support/ExternC.h @@ -1,6 +1,6 @@ //===------------ Support/ExternC.h - DPPL-SYCL interface ---*--- C ---*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/Support/MemOwnershipAttrs.h b/backends/include/Support/MemOwnershipAttrs.h index 6f340036ac..f0579fdd46 100644 --- a/backends/include/Support/MemOwnershipAttrs.h +++ b/backends/include/Support/MemOwnershipAttrs.h @@ -1,6 +1,6 @@ //===----- dppl_mem_ownership_attrs.h - DPPL-SYCL interface --*-- C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/dppl_data_types.h b/backends/include/dppl_data_types.h index 3da14eb5b5..4427b3f893 100644 --- a/backends/include/dppl_data_types.h +++ b/backends/include/dppl_data_types.h @@ -1,6 +1,6 @@ -//===---------- dppl_data_types.h - DPPL-SYCL interface ----*---- C ---*---===// +//===------------------ dppl_data_types.h - dpctl-C_API ----*---- C ---*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/dppl_opencl_interface.h b/backends/include/dppl_opencl_interface.h deleted file mode 100644 index 52e03ed7de..0000000000 --- a/backends/include/dppl_opencl_interface.h +++ /dev/null @@ -1,312 +0,0 @@ -//===-- dppl_opencl_interface.h - DPPL-OpenCL interface -------*- C -*-----===// -// -// Python Data Parallel Processing Library (PyDPPL) -// -// Copyright 2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the declaration of a C API to expose a lightweight OpenCL -/// interface for the Python dpctl package. -/// -//===----------------------------------------------------------------------===// - -#pragma once - -#include -#include - - -#ifdef _WIN32 -# ifdef DPPLOpenCLInterface_EXPORTS -# define DPPL_API __declspec(dllexport) -# else -# define DPPL_API __declspec(dllimport) -# endif -#else -# define DPPL_API -#endif - - -enum DP_GLUE_ERROR_CODES -{ - DP_GLUE_SUCCESS = 0, - DP_GLUE_FAILURE = -1 -}; - - -/*! - * - */ -struct dp_env -{ - unsigned id_; - // TODO : Add members to store more device related information such as name - void *context; - void *device; - void *queue; - unsigned int max_work_item_dims; - size_t max_work_group_size; - int support_int64_atomics; - int support_float64_atomics; - int (*dump_fn) (void *); -}; - -typedef struct dp_env* env_t; - - -struct dp_buffer -{ - unsigned id_; - // This may, for example, be a cl_mem pointer - void *buffer_ptr; - // Stores the size of the buffer_ptr (e.g sizeof(cl_mem)) - size_t sizeof_buffer_ptr; -}; - -typedef struct dp_buffer* buffer_t; - - -struct dp_kernel -{ - unsigned id_; - void *kernel; - int (*dump_fn) (void *); -}; - -typedef struct dp_kernel* kernel_t; - - -struct dp_program -{ - unsigned id_; - void *program; -}; - -typedef struct dp_program* program_t; - - -struct dp_kernel_arg -{ - unsigned id_; - const void *arg_value; - size_t arg_size; -}; - -typedef struct dp_kernel_arg* kernel_arg_t; - - -/*! @struct dp_runtime_t - * @brief Stores an array of the available OpenCL or Level-0 platform/drivers. - * - * @var dp_runtime_t::num_platforms - * Depicts the number of platforms/drivers available on this system - * - * @var dp_runtime_t::platforms_ids - * An array of OpenCL platforms. - * - */ -struct dp_runtime -{ - unsigned id_; - unsigned num_platforms; - void *platform_ids; - bool has_cpu; - bool has_gpu; - env_t first_cpu_env; - env_t first_gpu_env; - env_t curr_env; - int (*dump_fn) (void *); -}; - -typedef struct dp_runtime* runtime_t; - -DPPL_API -int set_curr_env (runtime_t rt, env_t env); - -/*! - * @brief Initializes a new dp_runtime_t object - * - * @param[in/out] rt - An uninitialized runtime_t pointer that is initialized - * by the function. - * - * @return An error code indicating if the runtime_t object was successfully - * initialized. - */ -DPPL_API -int create_dp_runtime (runtime_t *rt); - - -/*! - * @brief Free the runtime and all its resources. - * - * @param[in] rt - Pointer to the numba_one_api_runtime_t object to be freed - * - * @return An error code indicating if resource freeing was successful. - */ -DPPL_API -int destroy_dp_runtime (runtime_t *rt); - - -/*! - * - */ -DPPL_API -int create_dp_rw_mem_buffer (env_t env_t_ptr, size_t buffsize, buffer_t *buff); - - -DPPL_API -int destroy_dp_rw_mem_buffer (buffer_t *buff); - - -/*! - * - */ -DPPL_API -int write_dp_mem_buffer_to_device (env_t env_t_ptr, - buffer_t buff, - bool blocking_copy, - size_t offset, - size_t buffersize, - const void *data_ptr); - - -/*! - * - */ -DPPL_API -int read_dp_mem_buffer_from_device (env_t env_t_ptr, - buffer_t buff, - bool blocking_copy, - size_t offset, - size_t buffersize, - void *data_ptr); - - -/*! - * - */ -DPPL_API -int create_dp_program_from_spirv (env_t env_t_ptr, - const void *il, - size_t length, - program_t *program_t_ptr); - - -/*! - * - */ -DPPL_API -int create_dp_program_from_source (env_t env_t_ptr, - unsigned int count, - const char **strings, - const size_t *lengths, - program_t *program_t_ptr); - -/*! - * - */ -DPPL_API -int destroy_dp_program (program_t *program_t_ptr); - - -DPPL_API -int build_dp_program (env_t env_t_ptr, program_t program_t_ptr); - -/*! - * - */ -DPPL_API -int create_dp_kernel (env_t env_t_ptr, - program_t program_ptr, - const char *kernel_name, - kernel_t *kernel_ptr); - - -DPPL_API -int destroy_dp_kernel (kernel_t *kernel_ptr); - - -/*! - * - */ -DPPL_API -int create_dp_kernel_arg (const void *arg_value, - size_t arg_size, - kernel_arg_t *kernel_arg_t_ptr); - - -/*! - * - */ -DPPL_API -int create_dp_kernel_arg_from_buffer (buffer_t *buffer_t_ptr, - kernel_arg_t *kernel_arg_t_ptr); - - -/*! - * - */ -DPPL_API -int destroy_dp_kernel_arg (kernel_arg_t *kernel_arg_t_ptr); - - -/*! - * - */ -DPPL_API -int set_args_and_enqueue_dp_kernel (env_t env_t_ptr, - kernel_t kernel_t_ptr, - size_t nargs, - const kernel_arg_t *args, - unsigned int work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size); - - -/*! - * - */ -DPPL_API -int set_args_and_enqueue_dp_kernel_auto_blocking (env_t env_t_ptr, - kernel_t kernel_t_ptr, - size_t nargs, - const kernel_arg_t *args, - unsigned int num_dims, - size_t *dim_starts, - size_t *dim_stops); - - -/*! - * - */ -DPPL_API -int retain_dp_context (env_t env_t_ptr); - - -/*! - * - */ -DPPL_API -int release_dp_context (env_t env_t_ptr); - - -//---- TODO: - -// 1. Add release/retain methods for buffers - -//--------- diff --git a/backends/include/dppl_sycl_context_interface.h b/backends/include/dppl_sycl_context_interface.h index 39905f6267..bb0226225f 100644 --- a/backends/include/dppl_sycl_context_interface.h +++ b/backends/include/dppl_sycl_context_interface.h @@ -1,6 +1,6 @@ -//===--- dppl_sycl_context_interface.h - DPPL-SYCL interface --*--C++ --*--===// +//===----------- dppl_sycl_context_interface.h - dpctl-C_API --*--C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -57,13 +57,14 @@ DPPL_API bool DPPLContext_IsHost (__dppl_keep const DPPLSyclContextRef CtxRef); /*! - * @brief + * @brief Returns the sycl backend for the DPPLSyclContextRef pointer. * * @param CtxRef An opaque pointer to a sycl::context. - * @return {return} My Param doc + * @return The sycl backend for the DPPLSyclContextRef returned as + * a DPPLSyclBackendType enum type. */ DPPL_API -DPPLSyclBEType +DPPLSyclBackendType DPPLContext_GetBackend (__dppl_keep const DPPLSyclContextRef CtxRef); /*! diff --git a/backends/include/dppl_sycl_device_interface.h b/backends/include/dppl_sycl_device_interface.h index f1619ffcae..f677c642fc 100644 --- a/backends/include/dppl_sycl_device_interface.h +++ b/backends/include/dppl_sycl_device_interface.h @@ -1,6 +1,6 @@ -//===--- dppl_sycl_device_interface.h - DPPL-SYCL interface ---*---C++ -*---===// +//===---------- dppl_sycl_device_interface.h - dpctl-C_API ---*---C++ -*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -110,10 +110,10 @@ DPPLDevice_GetDriverInfo (__dppl_keep const DPPLSyclDeviceRef DRef); */ DPPL_API uint32_t -DPPLDevice_GetMaxComputeUnites (__dppl_keep const DPPLSyclDeviceRef DRef); +DPPLDevice_GetMaxComputeUnits (__dppl_keep const DPPLSyclDeviceRef DRef); /*! - * @brief Wrapper for get_info(). * * @param DRef Opaque pointer to a sycl::device * @return Returns the valid result if device exists else returns 0. @@ -123,17 +123,17 @@ uint32_t DPPLDevice_GetMaxWorkItemDims (__dppl_keep const DPPLSyclDeviceRef DRef); /*! - * @brief Wrapper for get_info(). * * @param DRef Opaque pointer to a sycl::device - * @return Returns the valid result if device exists else returns Null. + * @return Returns the valid result if device exists else returns NULL. */ DPPL_API __dppl_keep size_t* DPPLDevice_GetMaxWorkItemSizes (__dppl_keep const DPPLSyclDeviceRef DRef); /*! - * @brief Wrapper for get_info(). * * @param DRef Opaque pointer to a sycl::device * @return Returns the valid result if device exists else returns 0. @@ -152,6 +152,26 @@ DPPL_API uint32_t DPPLDevice_GetMaxNumSubGroups (__dppl_keep const DPPLSyclDeviceRef DRef); +/*! + * @brief Wrapper over device.get_info. + * + * @param DRef Opaque pointer to a sycl::device + * @return Returns true if device has int64_base_atomics else returns false. + */ +DPPL_API +bool +DPPLDevice_HasInt64BaseAtomics (__dppl_keep const DPPLSyclDeviceRef DRef); + +/*! + * @brief Wrapper over device.get_info. + * + * @param DRef Opaque pointer to a sycl::device + * @return Returns true if device has int64_extended_atomics else returns false. + */ +DPPL_API +bool +DPPLDevice_HasInt64ExtendedAtomics (__dppl_keep const DPPLSyclDeviceRef DRef); + /*! * @brief Returns a C string for the device name. * diff --git a/backends/include/dppl_sycl_enum_types.h b/backends/include/dppl_sycl_enum_types.h index 2a787f995d..0d8d73f091 100644 --- a/backends/include/dppl_sycl_enum_types.h +++ b/backends/include/dppl_sycl_enum_types.h @@ -35,7 +35,7 @@ DPPL_C_EXTERN_C_BEGIN * @brief Redefinition of DPC++-specific Sycl backend types. * */ -enum DPPLSyclBEType +enum DPPLSyclBackendType { DPPL_UNKNOWN_BACKEND = 0x0, DPPL_OPENCL = 1 << 16, @@ -57,7 +57,7 @@ enum DPPLSyclDeviceType DPPL_AUTOMATIC = 1 << 4, DPPL_HOST_DEVICE = 1 << 5, DPPL_ALL = 1 << 6 - // IMP: before adding new values here look at DPPLSyclBEType enum. The + // IMP: before adding new values here look at DPPLSyclBackendType enum. The // values should not overlap. }; @@ -76,6 +76,7 @@ typedef enum DPPL_SHORT, DPPL_INT, DPPL_UNSIGNED_INT, + DPPL_UNSIGNED_INT8, DPPL_LONG, DPPL_UNSIGNED_LONG, DPPL_LONG_LONG, diff --git a/backends/include/dppl_sycl_event_interface.h b/backends/include/dppl_sycl_event_interface.h index c97eaf08f3..98bc9dae08 100644 --- a/backends/include/dppl_sycl_event_interface.h +++ b/backends/include/dppl_sycl_event_interface.h @@ -1,6 +1,6 @@ -//===--- dppl_sycl_event_interface.h - DPPL-SYCL interface ---*---C++ -*---===// +//===----------- dppl_sycl_event_interface.h - dpctl-C_API ---*---C++ -*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/dppl_sycl_kernel_interface.h b/backends/include/dppl_sycl_kernel_interface.h index 64d2f97a30..a48064cf83 100644 --- a/backends/include/dppl_sycl_kernel_interface.h +++ b/backends/include/dppl_sycl_kernel_interface.h @@ -1,6 +1,6 @@ -//===---- dppl_sycl_kernel_interface.h - DPPL-SYCL interface --*--C++ --*--===// +//===------------ dppl_sycl_kernel_interface.h - dpctl-C_API --*--C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/dppl_sycl_platform_interface.h b/backends/include/dppl_sycl_platform_interface.h index 334bd08fcf..b3df1b5c56 100644 --- a/backends/include/dppl_sycl_platform_interface.h +++ b/backends/include/dppl_sycl_platform_interface.h @@ -1,6 +1,6 @@ -//===--- dppl_sycl_platform_interface.h - DPPL-SYCL interface ---*--C++ -*-===// +//===----------- dppl_sycl_platform_interface.h - dpctl-C_API ---*--C++ -*-===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -34,37 +34,38 @@ DPPL_C_EXTERN_C_BEGIN /*! - * @brief Returns the number of sycl::platform available on the system. + * @brief Returns the number of non-host type sycl::platform available on the + * system. * * @return The number of available sycl::platforms. */ DPPL_API -size_t DPPLPlatform_GetNumPlatforms (); +size_t DPPLPlatform_GetNumNonHostPlatforms (); /*! - * @brief Returns the number of unique sycl backends on the system not counting - * the host backend. + * @brief Returns the number of unique non-host sycl backends on the system. * * @return The number of unique sycl backends. */ DPPL_API -size_t DPPLPlatform_GetNumBackends (); +size_t DPPLPlatform_GetNumNonHostBackends (); /*! - * @brief Returns an array of the unique DPPLSyclBEType values on the system. + * @brief Returns an array of the unique non-host DPPLSyclBackendType values on + * the system. * - * @return An array of DPPLSyclBEType enum values. + * @return An array of DPPLSyclBackendType enum values. */ DPPL_API -__dppl_give enum DPPLSyclBEType* DPPLPlatform_GetListOfBackends (); +__dppl_give DPPLSyclBackendType* DPPLPlatform_GetListOfNonHostBackends (); /*! - * @brief Frees an array of DPPLSyclBEType enum values. + * @brief Frees an array of DPPLSyclBackendType enum values. * - * @param BEArr An array of DPPLSyclBEType enum values to be freed. + * @param BEArr An array of DPPLSyclBackendType enum values to be freed. */ DPPL_API -void DPPLPlatform_DeleteListOfBackends (__dppl_take enum DPPLSyclBEType* BEArr); +void DPPLPlatform_DeleteListOfBackends (__dppl_take DPPLSyclBackendType* BEArr); /*! * @brief Prints out some selected info about all sycl::platform on the system. diff --git a/backends/include/dppl_sycl_program_interface.h b/backends/include/dppl_sycl_program_interface.h index 952156ff79..28056f49bc 100644 --- a/backends/include/dppl_sycl_program_interface.h +++ b/backends/include/dppl_sycl_program_interface.h @@ -1,6 +1,6 @@ -//===---- dppl_sycl_program_interface.h - DPPL-SYCL interface --*--C++ --*--===// +//===----------- dppl_sycl_program_interface.h - dpctl-C_API --*--C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/dppl_sycl_queue_interface.h b/backends/include/dppl_sycl_queue_interface.h index 2fc9523bc9..5ba2011907 100644 --- a/backends/include/dppl_sycl_queue_interface.h +++ b/backends/include/dppl_sycl_queue_interface.h @@ -1,6 +1,6 @@ -//===--- dppl_sycl_queue_interface.h - DPPL-SYCL interface ---*---C++ -*---===// +//===----------- dppl_sycl_queue_interface.h - dpctl-C_API ---*---C++ -*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -59,10 +59,11 @@ bool DPPLQueue_AreEq (__dppl_keep const DPPLSyclQueueRef QRef1, * @brief Returns the Sycl backend for the provided sycl::queue. * * @param QRef An opaque pointer to the sycl queue. - * @return A enum DPPLSyclBEType corresponding to the backed for the queue. + * @return A enum DPPLSyclBackendType corresponding to the backed for the + * queue. */ DPPL_API -enum DPPLSyclBEType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef); +DPPLSyclBackendType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef); /*! * @brief Returns the Sycl context for the queue. diff --git a/backends/include/dppl_sycl_queue_manager.h b/backends/include/dppl_sycl_queue_manager.h index 3afa2d3f5a..73822146d4 100644 --- a/backends/include/dppl_sycl_queue_manager.h +++ b/backends/include/dppl_sycl_queue_manager.h @@ -1,6 +1,6 @@ -//===--- dppl_sycl_queue_manager.h - DPPL-SYCL interface ---*---C++ ---*---===// +//===----------- dppl_sycl_queue_manager.h - dpctl-C_API ---*---C++ ---*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -65,8 +65,8 @@ __dppl_give DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue (); */ DPPL_API __dppl_give DPPLSyclQueueRef -DPPLQueueMgr_GetQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum); /*! @@ -78,7 +78,6 @@ DPPLQueueMgr_GetQueue (enum DPPLSyclBEType BETy, DPPL_API size_t DPPLQueueMgr_GetNumActivatedQueues (); - /*! * @brief Get the number of available queues for given backend and device type * combination. @@ -88,8 +87,8 @@ size_t DPPLQueueMgr_GetNumActivatedQueues (); * @return The number of available queues. */ DPPL_API -size_t DPPLQueueMgr_GetNumQueues (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy); +size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy); /*! * @brief Returns True if the passed in queue and the current queue are the @@ -115,8 +114,8 @@ bool DPPLQueueMgr_IsCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef); */ DPPL_API __dppl_give DPPLSyclQueueRef -DPPLQueueMgr_SetAsDefaultQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +DPPLQueueMgr_SetAsDefaultQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum); /*! @@ -141,8 +140,8 @@ DPPLQueueMgr_SetAsDefaultQueue (enum DPPLSyclBEType BETy, */ DPPL_API __dppl_give DPPLSyclQueueRef -DPPLQueueMgr_PushQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum); /*! diff --git a/backends/include/dppl_sycl_types.h b/backends/include/dppl_sycl_types.h index ce1f74dd85..74f2792f7a 100644 --- a/backends/include/dppl_sycl_types.h +++ b/backends/include/dppl_sycl_types.h @@ -1,6 +1,6 @@ -//===---------- dppl_sycl_types.h - DPPL-SYCL interface ---*--- C++ ---*---===// +//===-------------- dppl_sycl_types.h - dpctl-C_API ----*---- C++ ----*----===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/dppl_sycl_usm_interface.h b/backends/include/dppl_sycl_usm_interface.h index 6a52fcff42..6b771d7c2d 100644 --- a/backends/include/dppl_sycl_usm_interface.h +++ b/backends/include/dppl_sycl_usm_interface.h @@ -1,6 +1,6 @@ -//===--- dppl_sycl_usm_interface.h - DPPL-SYCL interface ---*---C++ -*---===// +//===------------- dppl_sycl_usm_interface.h - dpctl-C_API ---*---C++ -*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -84,6 +84,6 @@ void DPPLfree_with_context (__dppl_take DPPLSyclUSMRef MRef, DPPL_API const char * DPPLUSM_GetPointerType (__dppl_keep const DPPLSyclUSMRef MRef, - __dppl_keep const DPPLSyclContextRef СRef); + __dppl_keep const DPPLSyclContextRef CRef); DPPL_C_EXTERN_C_END diff --git a/backends/include/dppl_utils.h b/backends/include/dppl_utils.h index be523622ba..b0578173af 100644 --- a/backends/include/dppl_utils.h +++ b/backends/include/dppl_utils.h @@ -1,6 +1,6 @@ -//===------------- dppl_utils.h - DPPL-SYCL interface --*-- C++ -----*-----===// +//===------------------- dppl_utils.h - dpctl-C_API ---*--- C++ -----*-----===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/include/error_check_macros.h b/backends/include/error_check_macros.h deleted file mode 100644 index 77bd462e5a..0000000000 --- a/backends/include/error_check_macros.h +++ /dev/null @@ -1,111 +0,0 @@ -//===----- error_check_macros.h - DPPL-OpenCL interface -------*- C -*-----===// -// -// Python Data Parallel Processing Python (PyDPPL) -// -// Copyright 2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains a set of macros to check for different OpenCL error -/// codes. -/// -//===----------------------------------------------------------------------===// - -#pragma once - -#include - -// TODO : Add branches to check for OpenCL error codes and print relevant error -// messages. Then there would be no need to pass in the message string - -// FIXME : The error check macro needs to be improved. Currently, we encounter -// an error and goto the error label. Directly going to the error label can lead -// to us not releasing resources prior to returning from the function. To work -// around this situation, add a stack to store all the objects that should be -// released prior to returning. The stack gets populated as a function executes -// and on encountering an error, all objects on the stack get properly released -// prior to returning. (Look at enqueue_dp_kernel_from_source for a -// ghastly example where we really need proper resource management.) - -// FIXME : memory allocated in a function should be released in the error -// section - -#define CHECK_OPEN_CL_ERROR(x, M) do { \ - int retval = (x); \ - switch(retval) { \ - case 0: \ - break; \ - case -36: \ - fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n", \ - retval, "[CL_INVALID_COMMAND_QUEUE]command_queue is not a " \ - "valid command-queue.", \ - __LINE__, __FILE__); \ - goto error; \ - case -38: \ - fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n" \ - "%s\n", \ - retval, "[CL_INVALID_MEM_OBJECT] memory object is not a " \ - "valid OpenCL memory object.", \ - __LINE__, __FILE__,M); \ - goto error; \ - case -45: \ - fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n", \ - retval, "[CL_INVALID_PROGRAM_EXECUTABLE] no successfully " \ - "built program executable available for device " \ - "associated with command_queue.", \ - __LINE__, __FILE__); \ - goto error; \ - case -54: \ - fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n", \ - retval, "[CL_INVALID_WORK_GROUP_SIZE]", \ - __LINE__, __FILE__); \ - goto error; \ - default: \ - fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n", \ - retval, M, __LINE__, __FILE__); \ - goto error; \ - } \ -} while(0) - - -#define CHECK_MALLOC_ERROR(type, x) do { \ - type * ptr = (type*)(x); \ - if(ptr == NULL) { \ - fprintf(stderr, "Malloc Error for type %s on Line %d in %s", \ - #type, __LINE__, __FILE__); \ - perror(" "); \ - free(ptr); \ - ptr = NULL; \ - goto malloc_error; \ - } \ -} while(0) - - -#define CHECK_DPGLUE_ERROR(x, M) do { \ - int retval = (x); \ - switch(retval) { \ - case 0: \ - break; \ - case -1: \ - fprintf(stderr, "DP_Glue Error: %d (%s) on Line %d in %s\n", \ - retval, M, __LINE__, __FILE__); \ - goto error; \ - default: \ - fprintf(stderr, "DP_Glue Error: %d (%s) on Line %d in %s\n", \ - retval, M, __LINE__, __FILE__); \ - goto error; \ - } \ -} while(0) diff --git a/backends/source/dppl_opencl_interface.c b/backends/source/dppl_opencl_interface.c deleted file mode 100644 index 64b0de33cf..0000000000 --- a/backends/source/dppl_opencl_interface.c +++ /dev/null @@ -1,1166 +0,0 @@ -//===------ dppl_opencl_interface.c - DPPL-OpenCL interface ----*- C -*----===// -// -// Python Data Parallel Processing Library (PyDPPL) -// -// Copyright 2020 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements the data types and functions declared in -/// dppl_opencl_interface.h. -/// -//===----------------------------------------------------------------------===// -#include "dppl_opencl_interface.h" -#include "error_check_macros.h" -#include -#include -#include /* OpenCL headers */ - -/*------------------------------- Magic numbers ------------------------------*/ - -#define RUNTIME_ID 0x6dd5e8c8 -#define ENV_ID 0x6c78fd87 -#define BUFFER_ID 0xc55c47b1 -#define KERNEL_ID 0x032dc08e -#define PROGRAM_ID 0xc3842d12 -#define KERNELARG_ID 0xd42f630f - -#if DEBUG - -static void check_runtime_id (runtime_t x) -{ - assert(x->id_ == RUNTIME_ID); -} - -static void check_env_id (env_t x) -{ - assert(x->id_ == ENV_ID); -} - -static void check_buffer_id (buffer_t x) -{ - assert(x->id_ == BUFFER_ID); -} - -static void check_kernel_id (kernel_t x) -{ - assert(x->id_ == KERNEL_ID); -} - -static void check_program_id (program_t x) -{ - assert(x->id_ == PROGRAM_ID); -} - -static void check_kernelarg_id (kernel_arg_t x) -{ - assert(x->id_ == KERNELARG_ID); -} - -#endif - -/*------------------------------- Private helpers ----------------------------*/ - - -static int get_platform_name (cl_platform_id platform, char **platform_name) -{ - cl_int err; - size_t n; - - err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, *platform_name, &n); - CHECK_OPEN_CL_ERROR(err, "Could not get platform name length."); - - // Allocate memory for the platform name string - *platform_name = (char*)malloc(sizeof(char)*n); - CHECK_MALLOC_ERROR(char*, *platform_name); - - err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, n, *platform_name, - NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get platform name."); - - return DP_GLUE_SUCCESS; - -malloc_error: - return DP_GLUE_FAILURE; -error: - free(*platform_name); - return DP_GLUE_FAILURE; -} - - -/*! - * - */ -static int dump_device_info (void *obj) -{ - cl_int err; - char *value; - size_t size; - cl_uint maxComputeUnits; - env_t env_t_ptr; - - value = NULL; - env_t_ptr = (env_t)obj; - cl_device_id device = (cl_device_id)(env_t_ptr->device); - - err = clRetainDevice(device); - CHECK_OPEN_CL_ERROR(err, "Could not retain device."); - - // print device name - err = clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &size); - CHECK_OPEN_CL_ERROR(err, "Could not get device name."); - value = (char*)malloc(size); - err = clGetDeviceInfo(device, CL_DEVICE_NAME, size, value, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get device name."); - printf("Device: %s\n", value); - free(value); - - // print hardware device version - err = clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &size); - CHECK_OPEN_CL_ERROR(err, "Could not get device version."); - value = (char*) malloc(size); - err = clGetDeviceInfo(device, CL_DEVICE_VERSION, size, value, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get device version."); - printf("Hardware version: %s\n", value); - free(value); - - // print software driver version - clGetDeviceInfo(device, CL_DRIVER_VERSION, 0, NULL, &size); - CHECK_OPEN_CL_ERROR(err, "Could not get driver version."); - value = (char*) malloc(size); - clGetDeviceInfo(device, CL_DRIVER_VERSION, size, value, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get driver version."); - printf("Software version: %s\n", value); - free(value); - - // print c version supported by compiler for device - clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &size); - CHECK_OPEN_CL_ERROR(err, "Could not get open cl version."); - value = (char*) malloc(size); - clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, size, value, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get open cl version."); - printf("OpenCL C version: %s\n", value); - free(value); - - // print parallel compute units - clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, - sizeof(maxComputeUnits), &maxComputeUnits, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get number of compute units."); - printf("Parallel compute units: %d\n", maxComputeUnits); - - err = clReleaseDevice(device); - CHECK_OPEN_CL_ERROR(err, "Could not release device."); - - return DP_GLUE_SUCCESS; - -error: - free(value); - return DP_GLUE_FAILURE; -} - - -/*! - * @brief Helper function to print out information about the platform and - * devices available to this runtime. - * - */ -static int dump_dp_runtime_info (void *obj) -{ - size_t i; - runtime_t rt; - - rt = (runtime_t)obj; -#if DEBUG - check_runtime_id(rt); -#endif - if(rt) { - printf("Number of platforms : %d\n", rt->num_platforms); - cl_platform_id *platforms = rt->platform_ids; - for(i = 0; i < rt->num_platforms; ++i) { - char *platform_name = NULL; - get_platform_name(platforms[i], &platform_name); - printf("Platform #%zu: %s\n", i, platform_name); - free(platform_name); - } - } - - return DP_GLUE_SUCCESS; -} - - -/*! - * - */ -static int dump_dp_kernel_info (void *obj) -{ - cl_int err; - char *value; - size_t size; - cl_uint numKernelArgs; - cl_kernel kernel; - kernel_t kernel_t_ptr; - - value = NULL; - kernel_t_ptr = (kernel_t)obj; -#if DEBUG - check_kernel_id(kernel_t_ptr); -#endif - kernel = (cl_kernel)(kernel_t_ptr->kernel); - - // print kernel function name - err = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 0, NULL, &size); - CHECK_OPEN_CL_ERROR(err, "Could not get kernel function name size."); - value = (char*)malloc(size); - err = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, size, value, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get kernel function name."); - printf("Kernel Function name: %s\n", value); - free(value); - - // print the number of kernel args - err = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numKernelArgs), - &numKernelArgs, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get kernel num args."); - printf("Number of kernel arguments : %d\n", numKernelArgs); - - return DP_GLUE_SUCCESS; - -error: - free(value); - return DP_GLUE_FAILURE; -} - - -/*! - * - */ -static int get_first_device (cl_platform_id* platforms, - cl_uint platformCount, - cl_device_id *device, - cl_device_type device_ty) -{ - cl_int status; - cl_uint ndevices = 0; - unsigned int i; - - for (i = 0; i < platformCount; ++i) { - // get all devices of env_ty - status = clGetDeviceIDs(platforms[i], device_ty, 0, NULL, &ndevices); - // If this platform has no devices of this type then continue - if(!ndevices) continue; - - // get the first device - status = clGetDeviceIDs(platforms[i], device_ty, 1, device, NULL); - CHECK_OPEN_CL_ERROR(status, "Could not get first cl_device_id."); - - // If the first device of this type was discovered, no need to look more - if(ndevices) break; - } - - if(ndevices) - return DP_GLUE_SUCCESS; - else - return DP_GLUE_FAILURE; - -error: - return DP_GLUE_FAILURE; -} - -static int support_int64_atomics(cl_device_id *device) -{ - - cl_int err; - size_t size; - char *value; - - err = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 0, NULL, &size); - if (err != CL_SUCCESS ) { - printf("Unable to obtain device info for param\n"); - return DP_GLUE_FAILURE; - } - value = (char*) malloc(size); - clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, size, value, NULL); - - if(strstr(value, "cl_khr_int64_base_atomics") != NULL) { - return DP_GLUE_SUCCESS; - } else { - return DP_GLUE_FAILURE; - } -} - -static int support_float64_atomics(cl_device_id *device) -{ - - cl_int err; - size_t size; - char *value; - - err = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 0, NULL, &size); - if (err != CL_SUCCESS ) { - printf("Unable to obtain device info for param\n"); - return DP_GLUE_FAILURE; - } - value = (char*) malloc(size); - clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, size, value, NULL); - - if(strstr(value, "cl_khr_fp64") != NULL) { - return DP_GLUE_SUCCESS; - } else { - return DP_GLUE_FAILURE; - } -} - -/*! - * - */ -static int create_dp_env_t (cl_platform_id* platforms, - size_t nplatforms, - cl_device_type device_ty, - env_t *env_t_ptr) -{ - cl_int err; - int err1; - env_t env; - cl_device_id *device; - - env = NULL; - device = NULL; - - // Allocate the env_t object - env = (env_t)malloc(sizeof(struct dp_env)); - CHECK_MALLOC_ERROR(env_t, env); - env->id_ = ENV_ID; - - env->context = NULL; - env->device = NULL; - env->queue = NULL; - env->max_work_item_dims = 0; - env->max_work_group_size = 0; - env->dump_fn = NULL; - - device = (cl_device_id*)malloc(sizeof(cl_device_id)); - - err1 = get_first_device(platforms, nplatforms, device, device_ty); - CHECK_DPGLUE_ERROR(err1, "Failed inside get_first_device"); - - // get the CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS for this device - err = clGetDeviceInfo(*device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, - sizeof(env->max_work_item_dims), &env->max_work_item_dims, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get max work item dims"); - - // get the CL_DEVICE_MAX_WORK_GROUP_SIZE for this device - err = clGetDeviceInfo(*device, CL_DEVICE_MAX_WORK_GROUP_SIZE, - sizeof(env->max_work_group_size), &env->max_work_group_size, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not get max work group size"); - - // Create a context and associate it with device - env->context = clCreateContext(NULL, 1, device, NULL, NULL, &err); - CHECK_OPEN_CL_ERROR(err, "Could not create device context."); - // Create a queue and associate it with the context - env->queue = clCreateCommandQueueWithProperties((cl_context)env->context, - *device, 0, &err); - - CHECK_OPEN_CL_ERROR(err, "Could not create command queue."); - - env->device = *device; - env ->dump_fn = dump_device_info; - - if (DP_GLUE_SUCCESS == support_int64_atomics(device)) { - env->support_int64_atomics = 1; - } else { - env->support_int64_atomics = 0; - } - - if (DP_GLUE_SUCCESS == support_float64_atomics(device)) { - env->support_float64_atomics = 1; - } else { - env->support_float64_atomics = 0; - } - - free(device); - *env_t_ptr = env; - - return DP_GLUE_SUCCESS; - -malloc_error: - return DP_GLUE_FAILURE; -error: - free(env); - *env_t_ptr = NULL; - return DP_GLUE_FAILURE; -} - - -static int destroy_dp_env_t (env_t *env_t_ptr) -{ - cl_int err; -#if DEBUG - check_env_id(*env_t_ptr); -#endif - err = clReleaseCommandQueue((cl_command_queue)(*env_t_ptr)->queue); - CHECK_OPEN_CL_ERROR(err, "Could not release command queue."); - err = clReleaseDevice((cl_device_id)(*env_t_ptr)->device); - CHECK_OPEN_CL_ERROR(err, "Could not release device."); - err = clReleaseContext((cl_context)(*env_t_ptr)->context); - CHECK_OPEN_CL_ERROR(err, "Could not release context."); - - free(*env_t_ptr); - - return DP_GLUE_SUCCESS; - -error: - return DP_GLUE_FAILURE; -} - - -/*! - * @brief Initialize the runtime object. - */ -static int init_runtime_t_obj (runtime_t rt) -{ - cl_int status; - int ret; - cl_platform_id *platforms; -#if DEBUG - check_runtime_id(rt); -#endif - // get count of available platforms - status = clGetPlatformIDs(0, NULL, &(rt->num_platforms)); - CHECK_OPEN_CL_ERROR(status, "Could not get platform count."); - - if(!rt->num_platforms) { - fprintf(stderr, "No OpenCL platforms found.\n"); - goto error; - } - - // Allocate memory for the platforms array - rt->platform_ids = (cl_platform_id*)malloc( - sizeof(cl_platform_id)*rt->num_platforms - ); - CHECK_MALLOC_ERROR(cl_platform_id, rt->platform_ids); - - // Get the platforms - status = clGetPlatformIDs(rt->num_platforms, rt->platform_ids, NULL); - CHECK_OPEN_CL_ERROR(status, "Could not get platform ids"); - // Cast rt->platforms to a pointer of type cl_platform_id, as we cannot do - // pointer arithmetic on void*. - platforms = (cl_platform_id*)rt->platform_ids; - // Get the first cpu device on this platform - ret = create_dp_env_t(platforms, rt->num_platforms, - CL_DEVICE_TYPE_CPU, &rt->first_cpu_env); - rt->has_cpu = !ret; - -#if DEBUG - if(rt->has_cpu) - printf("DEBUG: CPU device acquired...\n"); - else - printf("DEBUG: No CPU available on the system\n"); -#endif - - // Get the first gpu device on this platform - ret = create_dp_env_t(platforms, rt->num_platforms, - CL_DEVICE_TYPE_GPU, &rt->first_gpu_env); - rt->has_gpu = !ret; - -#if DEBUG - if(rt->has_gpu) - printf("DEBUG: GPU device acquired...\n"); - else - printf("DEBUG: No GPU available on the system.\n"); -#endif - - if(rt->has_gpu) - rt->curr_env = rt->first_gpu_env; - else if(rt->has_cpu) - rt->curr_env = rt->first_cpu_env; - else - goto error; - - return DP_GLUE_SUCCESS; - -malloc_error: - - return DP_GLUE_FAILURE; -error: - free(rt->platform_ids); - - return DP_GLUE_FAILURE; -} - -/*-------------------------- End of private helpers --------------------------*/ - -int set_curr_env (runtime_t rt, env_t env) -{ - if(env && rt) { - rt->curr_env = env; - return DP_GLUE_SUCCESS; - } - return DP_GLUE_FAILURE; -} - -/*! - * @brief Initializes a new dp_runtime_t object - * - */ -int create_dp_runtime (runtime_t *rt) -{ - int err; - runtime_t rtobj; - - rtobj = NULL; - // Allocate a new struct dp_runtime object - rtobj = (runtime_t)malloc(sizeof(struct dp_runtime)); - CHECK_MALLOC_ERROR(runtime_t, rt); - - rtobj->id_ = RUNTIME_ID; - rtobj->num_platforms = 0; - rtobj->platform_ids = NULL; - err = init_runtime_t_obj(rtobj); - CHECK_DPGLUE_ERROR(err, "Could not initialize runtime object."); - rtobj->dump_fn = dump_dp_runtime_info; - - *rt = rtobj; -#if DEBUG - printf("DEBUG: Created an new dp_runtime object\n"); -#endif - return DP_GLUE_SUCCESS; - -malloc_error: - return DP_GLUE_FAILURE; -error: - free(rtobj); - return DP_GLUE_FAILURE; -} - - -/*! - * @brief Free the runtime and all its resources. - * - */ -int destroy_dp_runtime (runtime_t *rt) -{ - int err; -#if DEBUG - check_runtime_id(*rt); -#endif - -#if DEBUG - printf("DEBUG: Going to destroy the dp_runtime object\n"); -#endif - // free the first_cpu_device - if((*rt)->first_cpu_env) { - err = destroy_dp_env_t(&(*rt)->first_cpu_env); - CHECK_DPGLUE_ERROR(err, "Could not destroy first_cpu_device."); - } - - // free the first_gpu_device - if((*rt)->first_gpu_env) { - err = destroy_dp_env_t(&(*rt)->first_gpu_env); - CHECK_DPGLUE_ERROR(err, "Could not destroy first_gpu_device."); - } - - // free the platforms - free((cl_platform_id*)(*rt)->platform_ids); - // free the runtime_t object - free(*rt); - -#if DEBUG - printf("DEBUG: Destroyed the new dp_runtime object\n"); -#endif - return DP_GLUE_SUCCESS; - -error: - return DP_GLUE_FAILURE; -} - - -/*! - * - */ -int retain_dp_context (env_t env_t_ptr) -{ - cl_int err; - cl_context context; -#if DEBUG - check_env_id(env_t_ptr); -#endif - context = (cl_context)(env_t_ptr->context); - err = clRetainContext(context); - CHECK_OPEN_CL_ERROR(err, "Failed when calling clRetainContext."); - - return DP_GLUE_SUCCESS; -error: - return DP_GLUE_FAILURE; -} - - -/*! - * - */ -int release_dp_context (env_t env_t_ptr) -{ - cl_int err; - cl_context context; -#if DEBUG - check_env_id(env_t_ptr); -#endif - context = (cl_context)(env_t_ptr->context); - err = clReleaseContext(context); - CHECK_OPEN_CL_ERROR(err, "Failed when calling clRetainContext."); - - return DP_GLUE_SUCCESS; -error: - return DP_GLUE_FAILURE; -} - - -int create_dp_rw_mem_buffer (env_t env_t_ptr, - size_t buffsize, - buffer_t *buffer_t_ptr) -{ - cl_int err; - buffer_t buff; - cl_context context; -#if DEBUG - check_env_id(env_t_ptr); -#endif - buff = NULL; - - // Get the context from the device - context = (cl_context)(env_t_ptr->context); - err = clRetainContext(context); - CHECK_OPEN_CL_ERROR(err, "Failed to retain context."); - - // Allocate a dp_buffer object - buff = (buffer_t)malloc(sizeof(struct dp_buffer)); - CHECK_MALLOC_ERROR(buffer_t, buffer_t_ptr); - - buff->id_ = BUFFER_ID; - - // Create the OpenCL buffer. - // NOTE : Copying of data from host to device needs to happen explicitly - // using clEnqueue[Write|Read]Buffer. This would change in the future. - buff->buffer_ptr = clCreateBuffer(context, CL_MEM_READ_WRITE, buffsize, - NULL, &err); - CHECK_OPEN_CL_ERROR(err, "Failed to create CL buffer."); - - buff->sizeof_buffer_ptr = sizeof(cl_mem); -#if DEBUG - printf("DEBUG: CL RW buffer created...\n"); -#endif - *buffer_t_ptr = buff; - err = clReleaseContext(context); - CHECK_OPEN_CL_ERROR(err, "Failed to release context."); - - return DP_GLUE_SUCCESS; - -malloc_error: - return DP_GLUE_FAILURE; -error: - free(buff); - return DP_GLUE_FAILURE; -} - - -int destroy_dp_rw_mem_buffer (buffer_t *buff) -{ - cl_int err; -#if DEBUG - check_buffer_id(*buff); -#endif - err = clReleaseMemObject((cl_mem)(*buff)->buffer_ptr); - CHECK_OPEN_CL_ERROR(err, "Failed to release CL buffer."); - free(*buff); - -#if DEBUG - printf("DEBUG: CL buffer destroyed...\n"); -#endif - - return DP_GLUE_SUCCESS; - -error: - return DP_GLUE_FAILURE; -} - - -int write_dp_mem_buffer_to_device (env_t env_t_ptr, - buffer_t buffer_t_ptr, - bool blocking, - size_t offset, - size_t buffersize, - const void *data_ptr) -{ - cl_int err; - cl_command_queue queue; - cl_mem mem; -#if DEBUG - check_env_id(env_t_ptr); - check_buffer_id(buffer_t_ptr); -#endif - queue = (cl_command_queue)env_t_ptr->queue; - mem = (cl_mem)buffer_t_ptr->buffer_ptr; - -#if DEBUG - assert(mem && "buffer memory is NULL"); -#endif - - err = clRetainMemObject(mem); - CHECK_OPEN_CL_ERROR(err, "Failed to retain the command queue."); - err = clRetainCommandQueue(queue); - CHECK_OPEN_CL_ERROR(err, "Failed to retain the buffer memory object."); - - // Not using any events for the time being. Eventually we want to figure - // out the event dependencies using parfor analysis. - err = clEnqueueWriteBuffer(queue, mem, blocking?CL_TRUE:CL_FALSE, - offset, buffersize, data_ptr, 0, NULL, NULL); - CHECK_OPEN_CL_ERROR(err, "Failed to write to CL buffer."); - - err = clReleaseCommandQueue(queue); - CHECK_OPEN_CL_ERROR(err, "Failed to release the command queue."); - err = clReleaseMemObject(mem); - CHECK_OPEN_CL_ERROR(err, "Failed to release the buffer memory object."); - -#if DEBUG - printf("DEBUG: CL buffer written to device...\n"); -#endif - //--- TODO: Implement a version that uses clEnqueueMapBuffer - - return DP_GLUE_SUCCESS; -error: - return DP_GLUE_FAILURE; -} - - -int read_dp_mem_buffer_from_device (env_t env_t_ptr, - buffer_t buffer_t_ptr, - bool blocking, - size_t offset, - size_t buffersize, - void *data_ptr) -{ - cl_int err; - cl_command_queue queue; - cl_mem mem; -#if DEBUG - check_env_id(env_t_ptr); - check_buffer_id(buffer_t_ptr); -#endif - queue = (cl_command_queue)env_t_ptr->queue; - mem = (cl_mem)buffer_t_ptr->buffer_ptr; - - err = clRetainMemObject(mem); - CHECK_OPEN_CL_ERROR(err, "Failed to retain the command queue."); - err = clRetainCommandQueue(queue); - CHECK_OPEN_CL_ERROR(err, "Failed to retain the command queue."); - - // Not using any events for the time being. Eventually we want to figure - // out the event dependencies using parfor analysis. - err = clEnqueueReadBuffer(queue, mem, blocking?CL_TRUE:CL_FALSE, - offset, buffersize, data_ptr, 0, NULL, NULL); - CHECK_OPEN_CL_ERROR(err, "Failed to read from CL buffer."); - - err = clReleaseCommandQueue(queue); - CHECK_OPEN_CL_ERROR(err, "Failed to release the command queue."); - err = clReleaseMemObject(mem); - CHECK_OPEN_CL_ERROR(err, "Failed to release the buffer memory object."); - -#if DEBUG - printf("DEBUG: CL buffer read from device...\n"); -#endif - fflush(stdout); - //--- TODO: Implement a version that uses clEnqueueMapBuffer - - return DP_GLUE_SUCCESS; -error: - return DP_GLUE_FAILURE; -} - - -int create_dp_program_from_spirv (env_t env_t_ptr, - const void *il, - size_t length, - program_t *program_t_ptr) -{ - cl_int err; - cl_context context; - program_t prog; -#if DUMP_SPIRV - FILE *write_file; -#endif -#if DEBUG - check_env_id(env_t_ptr); -#endif - prog = NULL; - -#if DUMP_SPIRV - write_file = fopen("latest.spirv","wb"); - fwrite(il,length,1,write_file); - fclose(write_file); -#endif - - prog = (program_t)malloc(sizeof(struct dp_program)); - CHECK_MALLOC_ERROR(program_t, program_t_ptr); - - prog->id_ = PROGRAM_ID; - - context = (cl_context)env_t_ptr->context; - - err = clRetainContext(context); - CHECK_OPEN_CL_ERROR(err, "Could not retain context"); - // Create a program with a SPIR-V file - prog->program = clCreateProgramWithIL(context, il, length, &err); - CHECK_OPEN_CL_ERROR(err, "Could not create program with IL"); -#if DEBUG - printf("DEBUG: CL program created from spirv of length %zu...\n", length); -#endif - - *program_t_ptr = prog; - - err = clReleaseContext(context); - CHECK_OPEN_CL_ERROR(err, "Could not release context"); - - return DP_GLUE_SUCCESS; - -malloc_error: - return DP_GLUE_FAILURE; -error: - free(prog); - return DP_GLUE_FAILURE; -} - - -int create_dp_program_from_source (env_t env_t_ptr, - unsigned int count, - const char **strings, - const size_t *lengths, - program_t *program_t_ptr) -{ - cl_int err; - cl_context context; - program_t prog; -#if DEBUG - check_env_id(env_t_ptr); -#endif - prog = NULL; - prog = (program_t)malloc(sizeof(struct dp_program)); - CHECK_MALLOC_ERROR(program_t, program_t_ptr); - - prog->id_ = PROGRAM_ID; - - context = (cl_context)env_t_ptr->context; - - err = clRetainContext(context); - CHECK_OPEN_CL_ERROR(err, "Could not retain context"); - // Create a program with string source files - prog->program = clCreateProgramWithSource(context, count, strings, - lengths, &err); - CHECK_OPEN_CL_ERROR(err, "Could not create program with source"); -#if DEBUG - printf("DEBUG: CL program created from source...\n"); -#endif - - *program_t_ptr = prog; - - err = clReleaseContext(context); - CHECK_OPEN_CL_ERROR(err, "Could not release context"); - - return DP_GLUE_SUCCESS; - -malloc_error: - return DP_GLUE_FAILURE; -error: - free(prog); - return DP_GLUE_FAILURE; -} - - -int destroy_dp_program (program_t *program_ptr) -{ - cl_int err; -#if DEBUG - check_program_id(*program_ptr); -#endif - err = clReleaseProgram((cl_program)(*program_ptr)->program); - CHECK_OPEN_CL_ERROR(err, "Failed to release CL program."); - free(*program_ptr); - -#if DEBUG - printf("DEBUG: CL program destroyed...\n"); -#endif - - return DP_GLUE_SUCCESS; - -error: - return DP_GLUE_FAILURE; -} - - -int build_dp_program (env_t env_t_ptr, program_t program_t_ptr) -{ - cl_int err; - cl_device_id device; -#if DEBUG - check_env_id(env_t_ptr); - check_program_id(program_t_ptr); -#endif - device = (cl_device_id)env_t_ptr->device; - err = clRetainDevice(device); - CHECK_OPEN_CL_ERROR(err, "Could not retain device"); - // Build (compile) the program for the device - err = clBuildProgram((cl_program)program_t_ptr->program, 1, &device, NULL, - NULL, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not build program"); -#if DEBUG - printf("DEBUG: CL program successfully built.\n"); -#endif - err = clReleaseDevice(device); - CHECK_OPEN_CL_ERROR(err, "Could not release device"); - - return DP_GLUE_SUCCESS; - -error: - return DP_GLUE_FAILURE; -} - - -/*! - * - */ -int create_dp_kernel (env_t env_t_ptr, - program_t program_t_ptr, - const char *kernel_name, - kernel_t *kernel_ptr) -{ - cl_int err; - cl_context context; - kernel_t ker; -#if DEBUG - check_env_id(env_t_ptr); -#endif - ker = NULL; - ker = (kernel_t)malloc(sizeof(struct dp_kernel)); - CHECK_MALLOC_ERROR(kernel_t, kernel_ptr); - - ker->id_ = KERNEL_ID; - - context = (cl_context)(env_t_ptr->context); - err = clRetainContext(context); - CHECK_OPEN_CL_ERROR(err, "Could not retain context"); - ker->kernel = clCreateKernel((cl_program)(program_t_ptr->program), - kernel_name, &err); - CHECK_OPEN_CL_ERROR(err, "Could not create kernel"); - err = clReleaseContext(context); - CHECK_OPEN_CL_ERROR(err, "Could not release context"); -#if DEBUG - printf("DEBUG: CL kernel created\n"); -#endif - ker->dump_fn = dump_dp_kernel_info; - *kernel_ptr = ker; - return DP_GLUE_SUCCESS; - -malloc_error: - return DP_GLUE_FAILURE; -error: - free(ker); - return DP_GLUE_FAILURE; -} - - -int destroy_dp_kernel (kernel_t *kernel_ptr) -{ - cl_int err; -#if DEBUG - check_kernel_id(*kernel_ptr); -#endif - err = clReleaseKernel((cl_kernel)(*kernel_ptr)->kernel); - CHECK_OPEN_CL_ERROR(err, "Failed to release CL kernel."); - free(*kernel_ptr); - -#if DEBUG - printf("DEBUG: CL kernel destroyed...\n"); -#endif - - return DP_GLUE_SUCCESS; - -error: - return DP_GLUE_FAILURE; -} - - -/*! - * - */ -int create_dp_kernel_arg (const void *arg_value, - size_t arg_size, - kernel_arg_t *kernel_arg_t_ptr) -{ - kernel_arg_t kernel_arg; - - kernel_arg = NULL; - kernel_arg = (kernel_arg_t)malloc(sizeof(struct dp_kernel_arg)); - CHECK_MALLOC_ERROR(kernel_arg_t, kernel_arg); - - kernel_arg->id_ = KERNELARG_ID; - kernel_arg->arg_size = arg_size; - kernel_arg->arg_value = arg_value; - -#if DEBUG - printf("DEBUG: Kernel arg created\n"); -// void **tp = (void**)kernel_arg->arg_value; -// printf("DEBUG: create_kernel_arg %p (size %ld, addr %p)\n", -// kernel_arg, kernel_arg->arg_size, *tp); -#endif - - *kernel_arg_t_ptr = kernel_arg; - - return DP_GLUE_SUCCESS; - -malloc_error: - return DP_GLUE_FAILURE; -} - -/*! - * - */ -int create_dp_kernel_arg_from_buffer (buffer_t *buffer_t_ptr, - kernel_arg_t *kernel_arg_t_ptr) -{ -#if DEBUG - check_buffer_id(*buffer_t_ptr); -#endif - return create_dp_kernel_arg(&((*buffer_t_ptr)->buffer_ptr), - (*buffer_t_ptr)->sizeof_buffer_ptr, - kernel_arg_t_ptr); -} - -/*! - * - */ -int destroy_dp_kernel_arg (kernel_arg_t *kernel_arg_t_ptr) -{ - free(*kernel_arg_t_ptr); - -#if DEBUG - printf("DEBUG: Kernel arg destroyed...\n"); -#endif - - return DP_GLUE_SUCCESS; -} - - -/*! - * - */ -int set_args_and_enqueue_dp_kernel (env_t env_t_ptr, - kernel_t kernel_t_ptr, - size_t nargs, - const kernel_arg_t *array_of_args, - unsigned int work_dim, - const size_t *global_work_offset, - const size_t *global_work_size, - const size_t *local_work_size) -{ - size_t i; - cl_int err; - cl_kernel kernel; - cl_command_queue queue; - - err = 0; -#if DEBUG - check_env_id(env_t_ptr); - check_kernel_id(kernel_t_ptr); -#endif - kernel = (cl_kernel)kernel_t_ptr->kernel; - queue = (cl_command_queue)env_t_ptr->queue; -#if DEBUG - kernel_t_ptr->dump_fn(kernel_t_ptr); -#endif - // Set the kernel arguments - for(i = 0; i < nargs; ++i) { -#if DEBUG - printf("DEBUG: clSetKernelArgs for arg # %zu\n", i); -#endif - kernel_arg_t this_arg = array_of_args[i]; -#if DEBUG - check_kernelarg_id(this_arg); - void **tp = (void**)this_arg->arg_value; - printf("DEBUG: clSetKernelArgs for arg # %zu (size %zu, addr %p)\n", i, - this_arg->arg_size, *tp); -#endif - err = clSetKernelArg(kernel, i, this_arg->arg_size, - this_arg->arg_value); - CHECK_OPEN_CL_ERROR(err, "Could not set arguments to the kernel"); - } - - // Execute the kernel. Not using events for the time being. - err = clEnqueueNDRangeKernel(queue, kernel, work_dim, global_work_offset, - global_work_size, local_work_size, 0, NULL, NULL); - CHECK_OPEN_CL_ERROR(err, "Could not enqueue the kernel"); - - err = clFinish(queue); - CHECK_OPEN_CL_ERROR(err, "Failed while waiting for queue to finish"); -#if DEBUG - printf("DEBUG: CL Kernel Finish...\n"); -#endif - return DP_GLUE_SUCCESS; - -error: - return DP_GLUE_FAILURE; -} - - -/*! - * - */ -int set_args_and_enqueue_dp_kernel_auto_blocking (env_t env_t_ptr, - kernel_t kernel_t_ptr, - size_t nargs, - const kernel_arg_t *args, - unsigned int num_dims, - size_t *dim_starts, - size_t *dim_stops) -{ - size_t *global_work_size; -// size_t *local_work_size; - int err; - unsigned i; - - global_work_size = (size_t*)malloc(sizeof(size_t) * num_dims); -// local_work_size = (size_t*)malloc(sizeof(size_t) * num_dims); - CHECK_MALLOC_ERROR(size_t, global_work_size); -// CHECK_MALLOC_ERROR(size_t, local_work_size); - - assert(num_dims > 0 && num_dims < 4); - for (i = 0; i < num_dims; ++i) { - global_work_size[i] = dim_stops[i] - dim_starts[i] + 1; - } - - err = set_args_and_enqueue_dp_kernel(env_t_ptr, - kernel_t_ptr, - nargs, - args, - num_dims, - NULL, - global_work_size, - NULL); - free(global_work_size); -// free(local_work_size); - return err; - -malloc_error: - free(global_work_size); -// free(local_work_size); - return DP_GLUE_FAILURE; -} diff --git a/backends/source/dppl_sycl_context_interface.cpp b/backends/source/dppl_sycl_context_interface.cpp index a004d8d1d4..78c71f5ad0 100644 --- a/backends/source/dppl_sycl_context_interface.cpp +++ b/backends/source/dppl_sycl_context_interface.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_context_interface.cpp - DPPL-SYCL interface --*- C++ -*-===// +//===------- dppl_sycl_context_interface.cpp - dpctl-C_API ---*--- C++ -*-===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -55,7 +55,7 @@ void DPPLContext_Delete (__dppl_take DPPLSyclContextRef CtxRef) delete unwrap(CtxRef); } -DPPLSyclBEType +DPPLSyclBackendType DPPLContext_GetBackend (__dppl_keep const DPPLSyclContextRef CtxRef) { auto BE = unwrap(CtxRef)->get_platform().get_backend(); diff --git a/backends/source/dppl_sycl_device_interface.cpp b/backends/source/dppl_sycl_device_interface.cpp index 0c3bb77af2..0dbf2affe1 100644 --- a/backends/source/dppl_sycl_device_interface.cpp +++ b/backends/source/dppl_sycl_device_interface.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_device_interface.cpp - DPPL-SYCL interface --*- C++ -*--===// +//===------ dppl_sycl_device_interface.cpp - dpctl-C_API ---*--- C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -60,22 +60,22 @@ void dump_device_info (const device & Device) switch(devTy) { case info::device_type::cpu: - ss << "cpu" << '\n'; - break; + ss << "cpu" << '\n'; + break; case info::device_type::gpu: - ss << "gpu" << '\n'; - break; + ss << "gpu" << '\n'; + break; case info::device_type::accelerator: - ss << "accelerator" << '\n'; - break; + ss << "accelerator" << '\n'; + break; case info::device_type::custom: - ss << "custom" << '\n'; - break; + ss << "custom" << '\n'; + break; case info::device_type::host: - ss << "host" << '\n'; - break; + ss << "host" << '\n'; + break; default: - ss << "unknown" << '\n'; + ss << "unknown" << '\n'; } std::cout << ss.str(); @@ -104,8 +104,8 @@ void DPPLDevice_Delete (__dppl_take DPPLSyclDeviceRef DRef) bool DPPLDevice_IsAccelerator (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { - return unwrap(DRef)->is_accelerator(); + if (D) { + return D->is_accelerator(); } return false; } @@ -113,18 +113,17 @@ bool DPPLDevice_IsAccelerator (__dppl_keep const DPPLSyclDeviceRef DRef) bool DPPLDevice_IsCPU (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { - return unwrap(DRef)->is_cpu(); + if (D) { + return D->is_cpu(); } return false; - } bool DPPLDevice_IsGPU (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { - return unwrap(DRef)->is_gpu(); + if (D) { + return D->is_gpu(); } return false; } @@ -133,29 +132,28 @@ bool DPPLDevice_IsGPU (__dppl_keep const DPPLSyclDeviceRef DRef) bool DPPLDevice_IsHost (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { - return unwrap(DRef)->is_host(); + if (D) { + return D->is_host(); } return false; } uint32_t -DPPLDevice_GetMaxComputeUnites (__dppl_keep const DPPLSyclDeviceRef DRef) +DPPLDevice_GetMaxComputeUnits (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { + if (D) { return D->get_info(); } return 0; - } uint32_t DPPLDevice_GetMaxWorkItemDims (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { + if (D) { return D->get_info(); } return 0; @@ -166,7 +164,7 @@ DPPLDevice_GetMaxWorkItemSizes (__dppl_keep const DPPLSyclDeviceRef DRef) { size_t *sizes = nullptr; auto D = unwrap(DRef); - if(D) { + if (D) { auto id_sizes = D->get_info(); sizes = new size_t[3]; for(auto i = 0ul; i < 3; ++i) { @@ -180,7 +178,7 @@ size_t DPPLDevice_GetMaxWorkGroupSize (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { + if (D) { return D->get_info(); } return 0; @@ -190,18 +188,38 @@ uint32_t DPPLDevice_GetMaxNumSubGroups (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { + if (D) { return D->get_info(); } return 0; } +bool +DPPLDevice_HasInt64BaseAtomics (__dppl_keep const DPPLSyclDeviceRef DRef) +{ + auto D = unwrap(DRef); + if (D) { + return D->has(aspect::int64_base_atomics); + } + return false; +} + +bool +DPPLDevice_HasInt64ExtendedAtomics (__dppl_keep const DPPLSyclDeviceRef DRef) +{ + auto D = unwrap(DRef); + if (D) { + return D->has(aspect::int64_extended_atomics); + } + return false; +} + __dppl_give const char* DPPLDevice_GetName (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { - auto name = unwrap(DRef)->get_info(); + if (D) { + auto name = D->get_info(); auto cstr_name = new char [name.length()+1]; std::strcpy (cstr_name, name.c_str()); return cstr_name; @@ -213,8 +231,8 @@ __dppl_give const char* DPPLDevice_GetVendorName (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { - auto vendor = unwrap(DRef)->get_info(); + if (D) { + auto vendor = D->get_info(); auto cstr_vendor = new char [vendor.length()+1]; std::strcpy (cstr_vendor, vendor.c_str()); return cstr_vendor; @@ -226,8 +244,8 @@ __dppl_give const char* DPPLDevice_GetDriverInfo (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { - auto driver = unwrap(DRef)->get_info(); + if (D) { + auto driver = D->get_info(); auto cstr_driver = new char [driver.length()+1]; std::strcpy (cstr_driver, driver.c_str()); return cstr_driver; @@ -238,7 +256,7 @@ DPPLDevice_GetDriverInfo (__dppl_keep const DPPLSyclDeviceRef DRef) bool DPPLDevice_IsHostUnifiedMemory (__dppl_keep const DPPLSyclDeviceRef DRef) { auto D = unwrap(DRef); - if(D) { + if (D) { return D->get_info(); } return false; diff --git a/backends/source/dppl_sycl_event_interface.cpp b/backends/source/dppl_sycl_event_interface.cpp index 69a739483f..ae8356adba 100644 --- a/backends/source/dppl_sycl_event_interface.cpp +++ b/backends/source/dppl_sycl_event_interface.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_event_interface.cpp - DPPL-SYCL interface --*- C++ -*---===// +//===------ dppl_sycl_event_interface.cpp - dpctl-C_API ---*--- C++ --*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/source/dppl_sycl_kernel_interface.cpp b/backends/source/dppl_sycl_kernel_interface.cpp index 56fa9380cb..e030974bc3 100644 --- a/backends/source/dppl_sycl_kernel_interface.cpp +++ b/backends/source/dppl_sycl_kernel_interface.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_kernel_interface.cpp - DPPL-SYCL interface --*-- C++ -*-===// +//===------ dppl_sycl_kernel_interface.cpp - dpctl-C_API ---*--- C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/source/dppl_sycl_platform_interface.cpp b/backends/source/dppl_sycl_platform_interface.cpp index 4fff1d0bfd..2aa0af7ed4 100644 --- a/backends/source/dppl_sycl_platform_interface.cpp +++ b/backends/source/dppl_sycl_platform_interface.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_platform_interface.cpp - DPPL-SYCL interface --*- C++ -*-===// +//===------ dppl_sycl_platform_interface.cpp - dpctl-C_API --*-- C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -36,27 +36,26 @@ using namespace cl::sycl; namespace { -std::set -get_set_of_backends () +std::set +get_set_of_non_hostbackends () { - std::set be_set; + std::set be_set; for (auto p : platform::get_platforms()) { if(p.is_host()) - continue; + continue; auto be = p.get_backend(); switch (be) { case backend::host: - be_set.insert(DPPLSyclBEType::DPPL_HOST); break; case backend::cuda: - be_set.insert(DPPLSyclBEType::DPPL_CUDA); + be_set.insert(DPPLSyclBackendType::DPPL_CUDA); break; case backend::level_zero: - be_set.insert(DPPLSyclBEType::DPPL_LEVEL_ZERO); + be_set.insert(DPPLSyclBackendType::DPPL_LEVEL_ZERO); break; case backend::opencl: - be_set.insert(DPPLSyclBEType::DPPL_OPENCL); + be_set.insert(DPPLSyclBackendType::DPPL_OPENCL); break; default: break; @@ -128,22 +127,22 @@ void DPPLPlatform_DumpInfo () switch (devTy) { case info::device_type::cpu: - ss << "cpu" << '\n'; - break; + ss << "cpu" << '\n'; + break; case info::device_type::gpu: - ss << "gpu" << '\n'; - break; + ss << "gpu" << '\n'; + break; case info::device_type::accelerator: - ss << "accelerator" << '\n'; - break; + ss << "accelerator" << '\n'; + break; case info::device_type::custom: - ss << "custom" << '\n'; - break; + ss << "custom" << '\n'; + break; case info::device_type::host: - ss << "host" << '\n'; - break; + ss << "host" << '\n'; + break; default: - ss << "unknown" << '\n'; + ss << "unknown" << '\n'; } } std::cout << ss.str(); @@ -154,24 +153,30 @@ void DPPLPlatform_DumpInfo () /*! * Returns the number of sycl::platform on the system. */ -size_t DPPLPlatform_GetNumPlatforms () +size_t DPPLPlatform_GetNumNonHostPlatforms () { - return platform::get_platforms().size(); + auto nNonHostPlatforms = 0ul; + for (auto &p : platform::get_platforms()) { + if (p.is_host()) + continue; + ++nNonHostPlatforms; + } + return nNonHostPlatforms; } -size_t DPPLPlatform_GetNumBackends () +size_t DPPLPlatform_GetNumNonHostBackends () { - return get_set_of_backends().size(); + return get_set_of_non_hostbackends().size(); } -__dppl_give enum DPPLSyclBEType *DPPLPlatform_GetListOfBackends () +__dppl_give DPPLSyclBackendType *DPPLPlatform_GetListOfNonHostBackends () { - auto be_set = get_set_of_backends(); + auto be_set = get_set_of_non_hostbackends(); if (be_set.empty()) return nullptr; - DPPLSyclBEType *BEArr = new DPPLSyclBEType[be_set.size()]; + DPPLSyclBackendType *BEArr = new DPPLSyclBackendType[be_set.size()]; auto i = 0ul; for (auto be : be_set) { @@ -182,7 +187,7 @@ __dppl_give enum DPPLSyclBEType *DPPLPlatform_GetListOfBackends () return BEArr; } -void DPPLPlatform_DeleteListOfBackends (__dppl_take enum DPPLSyclBEType *BEArr) +void DPPLPlatform_DeleteListOfBackends (__dppl_take DPPLSyclBackendType *BEArr) { delete[] BEArr; } diff --git a/backends/source/dppl_sycl_program_interface.cpp b/backends/source/dppl_sycl_program_interface.cpp index 01d615a08b..edb8429c3f 100644 --- a/backends/source/dppl_sycl_program_interface.cpp +++ b/backends/source/dppl_sycl_program_interface.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_program_interface.cpp - DPPL-SYCL interface --*-- C++ -*-===// +//===----- dppl_sycl_program_interface.cpp - dpctl-C_API ---*--- C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/source/dppl_sycl_queue_interface.cpp b/backends/source/dppl_sycl_queue_interface.cpp index 2121a2292c..0231df8cf8 100644 --- a/backends/source/dppl_sycl_queue_interface.cpp +++ b/backends/source/dppl_sycl_queue_interface.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_queue_interface.cpp - DPPL-SYCL interface --*- C++ -*---===// +//===------ dppl_sycl_queue_interface.cpp - dpctl-C_API ---*--- C++ --*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -27,6 +27,8 @@ #include "dppl_sycl_queue_interface.h" #include "dppl_sycl_context_interface.h" #include "Support/CBindingWrapping.h" +#include +#include #include /* SYCL headers */ @@ -72,6 +74,9 @@ bool set_kernel_arg (handler &cgh, size_t idx, __dppl_keep void *Arg, case DPPL_UNSIGNED_INT: cgh.set_arg(idx, *(unsigned int*)Arg); break; + case DPPL_UNSIGNED_INT8: + cgh.set_arg(idx, *(uint8_t*)Arg); + break; case DPPL_LONG: cgh.set_arg(idx, *(long*)Arg); break; @@ -128,11 +133,18 @@ bool DPPLQueue_AreEq (__dppl_keep const DPPLSyclQueueRef QRef1, return (*unwrap(QRef1) == *unwrap(QRef2)); } -enum DPPLSyclBEType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef) +DPPLSyclBackendType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef) { auto Q = unwrap(QRef); - auto C = Q->get_context(); - return DPPLContext_GetBackend(wrap(&C)); + try { + auto C = Q->get_context(); + return DPPLContext_GetBackend(wrap(&C)); + } + catch (runtime_error &re) { + std::cerr << re.what() << '\n'; + // store error message + return DPPL_UNKNOWN_BACKEND; + } } __dppl_give DPPLSyclDeviceRef @@ -197,11 +209,11 @@ DPPLQueue_SubmitRange (__dppl_keep const DPPLSyclKernelRef KRef, "dimensions."); } }); - } catch (runtime_error re) { + } catch (runtime_error &re) { // \todo fix error handling std::cerr << re.what() << '\n'; return nullptr; - } catch (std::runtime_error sre) { + } catch (std::runtime_error &sre) { std::cerr << sre.what() << '\n'; return nullptr; } @@ -258,11 +270,11 @@ DPPLQueue_SubmitNDRange(__dppl_keep const DPPLSyclKernelRef KRef, "dimensions."); } }); - } catch (runtime_error re) { + } catch (runtime_error &re) { // \todo fix error handling std::cerr << re.what() << '\n'; return nullptr; - } catch (std::runtime_error sre) { + } catch (std::runtime_error &sre) { std::cerr << sre.what() << '\n'; return nullptr; } diff --git a/backends/source/dppl_sycl_queue_manager.cpp b/backends/source/dppl_sycl_queue_manager.cpp index ffe5abcff0..f708b4aea2 100644 --- a/backends/source/dppl_sycl_queue_manager.cpp +++ b/backends/source/dppl_sycl_queue_manager.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_queue_manager.cpp - DPPL-SYCL interface --*- C++ -*---===// +//===--------- dppl_sycl_queue_manager.cpp - dpctl-C_API --*-- C++ ---*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -25,7 +25,6 @@ //===----------------------------------------------------------------------===// #include "dppl_sycl_queue_manager.h" #include "Support/CBindingWrapping.h" -#include #include #include @@ -54,7 +53,8 @@ class QMgrHelper public: using QVec = vector_class; - static QVec* init_queues (backend BE, info::device_type DTy) { + static QVec* init_queues (backend BE, info::device_type DTy) + { QVec *queues = new QVec(); auto Platforms = platform::get_platforms(); for (auto &p : Platforms) { @@ -65,26 +65,72 @@ class QMgrHelper if (Devices.size() == 1) { auto d = Devices[0]; auto devty = d.get_info(); - auto Ctx = context(d); if(devty == DTy && be == BE) { - queues->emplace_back(Ctx, d); + auto Ctx = context(d); + queues->emplace_back(Ctx, d); break; } } else { - auto Ctx = context(Devices); + vector_class SelectedDevices; for(auto &d : Devices) { auto devty = d.get_info(); if(devty == DTy && be == BE) { - queues->emplace_back(Ctx, d); + SelectedDevices.push_back(d); + + // Workaround for situations when in some environments + // get_devices() returns each device TWICE. Then it fails in call + // for context constructor with all doubled devices. + // So use only one first device. break; } } + if (SelectedDevices.size() > 0) { + auto Ctx = context(SelectedDevices); + auto d = SelectedDevices[0]; + queues->emplace_back(Ctx, d); + } } } return queues; } + static QVec* init_active_queues () + { + QVec *active_queues; + try { + auto def_device = std::move(default_selector().select_device()); + auto BE = def_device.get_platform().get_backend(); + auto DevTy = def_device.get_info(); + + // \todo : We need to have a better way to match the default device + // to what SYCL returns based on the same scoring logic. Just + // storing the first device is not correct when we will have + // multiple devices of same type. + if(BE == backend::opencl && + DevTy == info::device_type::cpu) { + active_queues = new QVec({get_opencl_cpu_queues()[0]}); + } + else if(BE == backend::opencl && + DevTy == info::device_type::gpu) { + active_queues = new QVec({get_opencl_gpu_queues()[0]}); + } + else if(BE == backend::level_zero && + DevTy == info::device_type::gpu) { + active_queues = new QVec({get_level0_gpu_queues()[0]}); + } + else { + active_queues = new QVec(); + } + } + catch (runtime_error &re) { + // \todo Handle the error + active_queues = new QVec(); + } + + return active_queues; + } + static QVec& get_opencl_cpu_queues () { static QVec* queues = init_queues(backend::opencl, @@ -108,14 +154,13 @@ class QMgrHelper static QVec& get_active_queues () { - thread_local static QVec* active_queues = - new QVec({default_selector()}); + thread_local static QVec *active_queues = init_active_queues(); return *active_queues; } static __dppl_give DPPLSyclQueueRef - getQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, + getQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum); static __dppl_give DPPLSyclQueueRef @@ -124,13 +169,13 @@ class QMgrHelper static bool isCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef); static __dppl_give DPPLSyclQueueRef - setAsDefaultQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, + setAsDefaultQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum); static __dppl_give DPPLSyclQueueRef - pushSyclQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, + pushSyclQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum); static void @@ -146,7 +191,7 @@ class QMgrHelper */ DPPLSyclQueueRef QMgrHelper::getCurrentQueue () { - auto activated_q = get_active_queues(); + auto &activated_q = get_active_queues(); if(activated_q.empty()) { // \todo handle error std::cerr << "No currently active queues.\n"; @@ -163,15 +208,15 @@ DPPLSyclQueueRef QMgrHelper::getCurrentQueue () * be used for that purpose. */ __dppl_give DPPLSyclQueueRef -QMgrHelper::getQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +QMgrHelper::getQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum) { queue *QRef = nullptr; switch (BETy|DeviceTy) { - case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: { auto cpuQs = get_opencl_cpu_queues(); if (DNum >= cpuQs.size()) { @@ -183,7 +228,7 @@ QMgrHelper::getQueue (enum DPPLSyclBEType BETy, QRef = new queue(cpuQs[DNum]); break; } - case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: { auto gpuQs = get_opencl_gpu_queues(); if (DNum >= gpuQs.size()) { @@ -195,7 +240,7 @@ QMgrHelper::getQueue (enum DPPLSyclBEType BETy, QRef = new queue(gpuQs[DNum]); break; } - case DPPLSyclBEType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: { auto l0GpuQs = get_level0_gpu_queues(); if (DNum >= l0GpuQs.size()) { @@ -222,7 +267,7 @@ QMgrHelper::getQueue (enum DPPLSyclBEType BETy, */ bool QMgrHelper::isCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef) { - auto activated_q = get_active_queues(); + auto &activated_q = get_active_queues(); if(activated_q.empty()) { // \todo handle error std::cerr << "No currently active queues.\n"; @@ -238,8 +283,8 @@ bool QMgrHelper::isCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef) * sycl::queue corresponding to the device type and device number. */ __dppl_give DPPLSyclQueueRef -QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +QMgrHelper::setAsDefaultQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum) { queue *QRef = nullptr; @@ -251,7 +296,7 @@ QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy, switch (BETy|DeviceTy) { - case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: { auto oclcpu_q = get_opencl_cpu_queues(); if (DNum >= oclcpu_q.size()) { @@ -263,7 +308,7 @@ QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy, activeQ[0] = oclcpu_q[DNum]; break; } - case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: { auto oclgpu_q = get_opencl_gpu_queues(); if (DNum >= oclgpu_q.size()) { @@ -275,7 +320,7 @@ QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy, activeQ[0] = oclgpu_q[DNum]; break; } - case DPPLSyclBEType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: { auto l0gpu_q = get_level0_gpu_queues(); if (DNum >= l0gpu_q.size()) { @@ -305,8 +350,8 @@ QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy, * purpose. */ __dppl_give DPPLSyclQueueRef -QMgrHelper::pushSyclQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +QMgrHelper::pushSyclQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum) { queue *QRef = nullptr; @@ -318,7 +363,7 @@ QMgrHelper::pushSyclQueue (enum DPPLSyclBEType BETy, switch (BETy|DeviceTy) { - case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: { if (DNum >= get_opencl_cpu_queues().size()) { // \todo handle error @@ -330,7 +375,7 @@ QMgrHelper::pushSyclQueue (enum DPPLSyclBEType BETy, QRef = new queue(activeQ[activeQ.size()-1]); break; } - case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: { if (DNum >= get_opencl_gpu_queues().size()) { // \todo handle error @@ -342,7 +387,7 @@ QMgrHelper::pushSyclQueue (enum DPPLSyclBEType BETy, QRef = new queue(activeQ[get_active_queues().size()-1]); break; } - case DPPLSyclBEType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: { if (DNum >= get_level0_gpu_queues().size()) { // \todo handle error @@ -405,20 +450,20 @@ size_t DPPLQueueMgr_GetNumActivatedQueues () * Returns the number of available queues for a specific backend and device * type combination. */ -size_t DPPLQueueMgr_GetNumQueues (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy) +size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy) { switch (BETy|DeviceTy) { - case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU: { return QMgrHelper::get_opencl_cpu_queues().size(); } - case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU: { return QMgrHelper::get_opencl_gpu_queues().size(); } - case DPPLSyclBEType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: + case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU: { return QMgrHelper::get_level0_gpu_queues().size(); } @@ -443,8 +488,8 @@ DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue () * Returns a copy of a sycl::queue corresponding to the specified device type * and device number. A runtime_error gets thrown if no such device exists. */ -DPPLSyclQueueRef DPPLQueueMgr_GetQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum) { return QMgrHelper::getQueue(BETy, DeviceTy, DNum); @@ -464,8 +509,8 @@ bool DPPLQueueMgr_IsCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef) * device, Null is returned. */ __dppl_give DPPLSyclQueueRef -DPPLQueueMgr_SetAsDefaultQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +DPPLQueueMgr_SetAsDefaultQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum) { return QMgrHelper::setAsDefaultQueue(BETy, DeviceTy, DNum); @@ -475,8 +520,8 @@ DPPLQueueMgr_SetAsDefaultQueue (enum DPPLSyclBEType BETy, * \see QMgrHelper::pushSyclQueue() */ __dppl_give DPPLSyclQueueRef -DPPLQueueMgr_PushQueue (enum DPPLSyclBEType BETy, - enum DPPLSyclDeviceType DeviceTy, +DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy, + DPPLSyclDeviceType DeviceTy, size_t DNum) { return QMgrHelper::pushSyclQueue(BETy, DeviceTy, DNum); diff --git a/backends/source/dppl_sycl_usm_interface.cpp b/backends/source/dppl_sycl_usm_interface.cpp index 54ff95efc6..959398f462 100644 --- a/backends/source/dppl_sycl_usm_interface.cpp +++ b/backends/source/dppl_sycl_usm_interface.cpp @@ -1,6 +1,6 @@ -//===--- dppl_sycl_usm_interface.cpp - DPPL-SYCL interface --*- C++ -*---===// +//===------- dppl_sycl_usm_interface.cpp - dpctl-C_API ---*--- C++ ---*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -73,10 +73,10 @@ void DPPLfree_with_queue (__dppl_take DPPLSyclUSMRef MRef, } void DPPLfree_with_context (__dppl_take DPPLSyclUSMRef MRef, - __dppl_keep const DPPLSyclContextRef СRef) + __dppl_keep const DPPLSyclContextRef CRef) { auto Ptr = unwrap(MRef); - auto C = unwrap(СRef); + auto C = unwrap(CRef); free(Ptr, *C); } diff --git a/backends/source/dppl_utils.cpp b/backends/source/dppl_utils.cpp index f18bd94f78..6468809070 100644 --- a/backends/source/dppl_utils.cpp +++ b/backends/source/dppl_utils.cpp @@ -1,6 +1,6 @@ -//===--------- dppl_utils.cpp - DPPL-SYCL interface ----*---- C++ ----*----===// +//===-------------- dppl_utils.cpp - dpctl-C_API ----*---- C++ -----*-----===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -33,4 +33,4 @@ void DPPLCString_Delete (__dppl_take const char* str) void DPPLSize_t_Array_Delete (__dppl_take size_t* arr) { delete[] arr; -} \ No newline at end of file +} diff --git a/backends/tests/CMakeLists.txt b/backends/tests/CMakeLists.txt index 86233ef7d5..d0efc5ebfe 100644 --- a/backends/tests/CMakeLists.txt +++ b/backends/tests/CMakeLists.txt @@ -3,9 +3,9 @@ string(COMPARE EQUAL "${GTEST_LIB_DIR}" "" no_gtest_lib_dir) if(${no_gtest_incl_dir} OR ${no_gtest_lib_dir}) message(WARNING - "GTest is needed to test PyDPPL's backend test cases. Pass in \ + "GTest is needed to test dpCtl's C API test cases. Pass in \ -DGTEST_INCLUDE_DIR and -DGTEST_LIB_DIR when you configure Cmake if\ - you wish to run PyDPPL backend tests." + you wish to run dpCtl backend tests." ) else() # We need thread support for gtest @@ -22,7 +22,7 @@ else() link_directories(${GTEST_LIB_DIR}) - set(PYDPPL_BACKEND_TEST_CASES + set(DPCTL_C_API_TEST_CASES test_sycl_device_interface test_sycl_kernel_interface test_sycl_platform_interface @@ -39,7 +39,7 @@ else() file(COPY ${tf} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) endforeach() - foreach(TEST_CASE ${PYDPPL_BACKEND_TEST_CASES}) + foreach(TEST_CASE ${DPCTL_C_API_TEST_CASES}) add_executable(${TEST_CASE} EXCLUDE_FROM_ALL ${TEST_CASE}.cpp) target_link_libraries( ${TEST_CASE} ${CMAKE_THREAD_LIBS_INIT} gtest DPPLSyclInterface diff --git a/backends/tests/test_sycl_device_interface.cpp b/backends/tests/test_sycl_device_interface.cpp index 4f209d5b4f..43da260e39 100644 --- a/backends/tests/test_sycl_device_interface.cpp +++ b/backends/tests/test_sycl_device_interface.cpp @@ -1,4 +1,4 @@ -//===----- test_sycl_device_interface.cpp - DPPL-SYCL interface -*- C++ -*-===// +//===----- test_sycl_device_interface.cpp - dpctl-C_API interface -*- C++ -*-===// // // Python Data Parallel Processing Library (PyDPPL) // @@ -20,7 +20,7 @@ /// /// \file /// This file has unit test cases for functions defined in -/// dppl_sycl_kernel_interface.h. +/// dppl_sycl_device_interface.h. /// //===----------------------------------------------------------------------===// @@ -39,6 +39,7 @@ struct TestDPPLSyclDeviceInterface : public ::testing::Test { DPPLSyclDeviceRef OpenCL_cpu = nullptr; DPPLSyclDeviceRef OpenCL_gpu = nullptr; + DPPLSyclDeviceRef OpenCL_Level0_gpu = nullptr; TestDPPLSyclDeviceInterface () { @@ -53,12 +54,19 @@ struct TestDPPLSyclDeviceInterface : public ::testing::Test OpenCL_gpu = DPPLQueue_GetDevice(Q); DPPLQueue_Delete(Q); } + + if(DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU)) { + auto Q = DPPLQueueMgr_GetQueue(DPPL_LEVEL_ZERO, DPPL_GPU, 0); + OpenCL_Level0_gpu = DPPLQueue_GetDevice(Q); + DPPLQueue_Delete(Q); + } } ~TestDPPLSyclDeviceInterface () { DPPLDevice_Delete(OpenCL_cpu); DPPLDevice_Delete(OpenCL_gpu); + DPPLDevice_Delete(OpenCL_Level0_gpu); } }; @@ -83,22 +91,41 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetDriverInfo) DPPLCString_Delete(DriverInfo); } -TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxComputeUnites) +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetDriverInfo) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto DriverInfo = DPPLDevice_GetDriverInfo(OpenCL_Level0_gpu); + EXPECT_TRUE(DriverInfo != nullptr); + DPPLCString_Delete(DriverInfo); +} + +TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxComputeUnits) { if(!OpenCL_cpu) GTEST_SKIP_("Skipping as no OpenCL CPU device found."); - auto n = DPPLDevice_GetMaxComputeUnites(OpenCL_cpu); - EXPECT_TRUE(n != 0); + auto n = DPPLDevice_GetMaxComputeUnits(OpenCL_cpu); + EXPECT_TRUE(n > 0); } -TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxComputeUnites) +TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxComputeUnits) { if(!OpenCL_gpu) GTEST_SKIP_("Skipping as no OpenCL GPU device found."); - auto n = DPPLDevice_GetMaxComputeUnites(OpenCL_gpu); - EXPECT_TRUE(n != 0); + auto n = DPPLDevice_GetMaxComputeUnits(OpenCL_gpu); + EXPECT_TRUE(n > 0); +} + +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxComputeUnits) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto n = DPPLDevice_GetMaxComputeUnits(OpenCL_Level0_gpu); + EXPECT_TRUE(n > 0); } TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxWorkItemDims) @@ -107,7 +134,7 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxWorkItemDims) GTEST_SKIP_("Skipping as no OpenCL CPU device found."); auto n = DPPLDevice_GetMaxWorkItemDims(OpenCL_cpu); - EXPECT_TRUE(n != 0); + EXPECT_TRUE(n > 0); } TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkItemDims) @@ -116,26 +143,55 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkItemDims) GTEST_SKIP_("Skipping as no OpenCL GPU device found."); auto n = DPPLDevice_GetMaxWorkItemDims(OpenCL_gpu); - EXPECT_TRUE(n != 0); + EXPECT_TRUE(n > 0); +} + +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxWorkItemDims) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto n = DPPLDevice_GetMaxWorkItemDims(OpenCL_Level0_gpu); + EXPECT_TRUE(n > 0); } TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxWorkItemSizes) { if(!OpenCL_cpu) - GTEST_SKIP_("Skipping as no OpenCL GPU device found."); + GTEST_SKIP_("Skipping as no OpenCL CPU device found."); auto item_sizes = DPPLDevice_GetMaxWorkItemSizes(OpenCL_cpu); EXPECT_TRUE(item_sizes != nullptr); DPPLSize_t_Array_Delete(item_sizes); } +TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkItemSizes) +{ + if(!OpenCL_gpu) + GTEST_SKIP_("Skipping as no OpenCL GPU device found."); + + auto item_sizes = DPPLDevice_GetMaxWorkItemSizes(OpenCL_gpu); + EXPECT_TRUE(item_sizes != nullptr); + DPPLSize_t_Array_Delete(item_sizes); +} + +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxWorkItemSizes) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto item_sizes = DPPLDevice_GetMaxWorkItemSizes(OpenCL_Level0_gpu); + EXPECT_TRUE(item_sizes != nullptr); + DPPLSize_t_Array_Delete(item_sizes); +} + TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxWorkGroupSize) { if(!OpenCL_cpu) GTEST_SKIP_("Skipping as no OpenCL CPU device found."); auto n = DPPLDevice_GetMaxWorkGroupSize(OpenCL_cpu); - EXPECT_TRUE(n != 0); + EXPECT_TRUE(n > 0); } TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkGroupSize) @@ -144,7 +200,16 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkGroupSize) GTEST_SKIP_("Skipping as no OpenCL GPU device found."); auto n = DPPLDevice_GetMaxWorkGroupSize(OpenCL_gpu); - EXPECT_TRUE(n != 0); + EXPECT_TRUE(n > 0); +} + +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxWorkGroupSize) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto n = DPPLDevice_GetMaxWorkGroupSize(OpenCL_Level0_gpu); + EXPECT_TRUE(n > 0); } TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxNumSubGroups) @@ -153,7 +218,7 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxNumSubGroups) GTEST_SKIP_("Skipping as no OpenCL CPU device found."); auto n = DPPLDevice_GetMaxNumSubGroups(OpenCL_cpu); - EXPECT_TRUE(n != 0); + EXPECT_TRUE(n > 0); } TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxNumSubGroups) @@ -162,7 +227,88 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxNumSubGroups) GTEST_SKIP_("Skipping as no OpenCL GPU device found."); auto n = DPPLDevice_GetMaxNumSubGroups(OpenCL_gpu); - EXPECT_TRUE(n != 0); + EXPECT_TRUE(n > 0); +} + +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxNumSubGroups) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto n = DPPLDevice_GetMaxNumSubGroups(OpenCL_Level0_gpu); + EXPECT_TRUE(n > 0); +} + +//TODO: Update when DPC++ properly supports aspects +TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_HasInt64BaseAtomics) +{ + if(!OpenCL_cpu) + GTEST_SKIP_("Skipping as no OpenCL CPU device found."); + + auto atomics = DPPLDevice_HasInt64BaseAtomics(OpenCL_cpu); + auto D = reinterpret_cast(OpenCL_cpu); + auto has_atomics= D->has(aspect::int64_base_atomics); + EXPECT_TRUE(has_atomics == atomics); +} + +//TODO: Update when DPC++ properly supports aspects +TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_HasInt64BaseAtomics) +{ + if(!OpenCL_gpu) + GTEST_SKIP_("Skipping as no OpenCL GPU device found."); + + auto atomics = DPPLDevice_HasInt64BaseAtomics(OpenCL_gpu); + auto D = reinterpret_cast(OpenCL_gpu); + auto has_atomics= D->has(aspect::int64_base_atomics); + EXPECT_TRUE(has_atomics == atomics); +} + +//TODO: Update when DPC++ properly supports aspects +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_HasInt64BaseAtomics) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto atomics = DPPLDevice_HasInt64BaseAtomics(OpenCL_Level0_gpu); + auto D = reinterpret_cast(OpenCL_Level0_gpu); + auto has_atomics= D->has(aspect::int64_base_atomics); + EXPECT_TRUE(has_atomics == atomics); +} + +//TODO: Update when DPC++ properly supports aspects +TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_HasInt64ExtendedAtomics) +{ + if(!OpenCL_cpu) + GTEST_SKIP_("Skipping as no OpenCL CPU device found."); + + auto atomics = DPPLDevice_HasInt64ExtendedAtomics(OpenCL_cpu); + auto D = reinterpret_cast(OpenCL_cpu); + auto has_atomics= D->has(aspect::int64_extended_atomics); + EXPECT_TRUE(has_atomics == atomics); +} + +//TODO: Update when DPC++ properly supports aspects +TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_HasInt64ExtendedAtomics) +{ + if(!OpenCL_gpu) + GTEST_SKIP_("Skipping as no OpenCL GPU device found."); + + auto atomics = DPPLDevice_HasInt64ExtendedAtomics(OpenCL_gpu); + auto D = reinterpret_cast(OpenCL_gpu); + auto has_atomics= D->has(aspect::int64_extended_atomics); + EXPECT_TRUE(has_atomics == atomics); +} + +//TODO: Update when DPC++ properly supports aspects +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_HasInt64ExtendedAtomics) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto atomics = DPPLDevice_HasInt64ExtendedAtomics(OpenCL_Level0_gpu); + auto D = reinterpret_cast(OpenCL_Level0_gpu); + auto has_atomics= D->has(aspect::int64_extended_atomics); + EXPECT_TRUE(has_atomics == atomics); } TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetName) @@ -185,6 +331,16 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetName) DPPLCString_Delete(DevName); } +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetName) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto DevName = DPPLDevice_GetName(OpenCL_Level0_gpu); + EXPECT_TRUE(DevName != nullptr); + DPPLCString_Delete(DevName); +} + TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetVendorName) { if(!OpenCL_cpu) @@ -205,6 +361,16 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetVendorName) DPPLCString_Delete(VendorName); } +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetVendorName) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + auto VendorName = DPPLDevice_GetVendorName(OpenCL_Level0_gpu); + EXPECT_TRUE(VendorName != nullptr); + DPPLCString_Delete(VendorName); +} + TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_IsCPU) { if(!OpenCL_cpu) @@ -221,10 +387,18 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_IsGPU) EXPECT_TRUE(DPPLDevice_IsGPU(OpenCL_gpu)); } +TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_IsGPU) +{ + if(!OpenCL_Level0_gpu) + GTEST_SKIP_("Skipping as no Level0 GPU device found."); + + EXPECT_TRUE(DPPLDevice_IsGPU(OpenCL_Level0_gpu)); +} + int main (int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); int ret = RUN_ALL_TESTS(); return ret; -} \ No newline at end of file +} diff --git a/backends/tests/test_sycl_kernel_interface.cpp b/backends/tests/test_sycl_kernel_interface.cpp index b07592c202..8ef4d8d951 100644 --- a/backends/tests/test_sycl_kernel_interface.cpp +++ b/backends/tests/test_sycl_kernel_interface.cpp @@ -1,6 +1,6 @@ -//===---- test_sycl_program_interface.cpp - DPPL-SYCL interface -*- C++ -*-===// +//===-------- test_sycl_program_interface.cpp - dpctl-C_API -*- C++ ---*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/tests/test_sycl_platform_interface.cpp b/backends/tests/test_sycl_platform_interface.cpp index 06891cf6f0..fed815e9bf 100644 --- a/backends/tests/test_sycl_platform_interface.cpp +++ b/backends/tests/test_sycl_platform_interface.cpp @@ -1,6 +1,6 @@ -//===--- test_sycl_platform_interface.cpp - DPPL-SYCL interface -*- C++ -*-===// +//===------- test_sycl_platform_interface.cpp - dpctl-C_API --*-- C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -31,26 +31,31 @@ struct TestDPPLSyclPlatformInterface : public ::testing::Test TEST_F (TestDPPLSyclPlatformInterface, CheckGetNumPlatforms) { - auto nplatforms = DPPLPlatform_GetNumPlatforms(); + auto nplatforms = DPPLPlatform_GetNumNonHostPlatforms(); EXPECT_GE(nplatforms, 0); } TEST_F (TestDPPLSyclPlatformInterface, GetNumBackends) { - auto nbackends = DPPLPlatform_GetNumBackends(); + auto nbackends = DPPLPlatform_GetNumNonHostBackends(); EXPECT_GE(nbackends, 0); } TEST_F (TestDPPLSyclPlatformInterface, GetListOfBackends) { - auto nbackends = DPPLPlatform_GetNumBackends(); - auto backends = DPPLPlatform_GetListOfBackends(); - EXPECT_TRUE(backends != nullptr); + auto nbackends = DPPLPlatform_GetNumNonHostBackends(); + + if(!nbackends) + GTEST_SKIP_("No non host backends available"); + + auto backends = DPPLPlatform_GetListOfNonHostBackends(); + EXPECT_TRUE(backends != nullptr); for(auto i = 0ul; i < nbackends; ++i) { EXPECT_TRUE( - backends[i] == DPPLSyclBEType::DPPL_CUDA || - backends[i] == DPPLSyclBEType::DPPL_OPENCL || - backends[i] == DPPLSyclBEType::DPPL_LEVEL_ZERO); + backends[i] == DPPLSyclBackendType::DPPL_CUDA || + backends[i] == DPPLSyclBackendType::DPPL_OPENCL || + backends[i] == DPPLSyclBackendType::DPPL_LEVEL_ZERO + ); } DPPLPlatform_DeleteListOfBackends(backends); } diff --git a/backends/tests/test_sycl_program_interface.cpp b/backends/tests/test_sycl_program_interface.cpp index 027a81f6c3..b32f7b7ab5 100644 --- a/backends/tests/test_sycl_program_interface.cpp +++ b/backends/tests/test_sycl_program_interface.cpp @@ -1,6 +1,6 @@ -//===---- test_sycl_program_interface.cpp - DPPL-SYCL interface -*- C++ -*-===// +//===---------- test_sycl_program_interface.cpp - dpctl-C_API --*-- C++ -*-===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // diff --git a/backends/tests/test_sycl_queue_interface.cpp b/backends/tests/test_sycl_queue_interface.cpp index 0a1711e622..2d68bdcdec 100644 --- a/backends/tests/test_sycl_queue_interface.cpp +++ b/backends/tests/test_sycl_queue_interface.cpp @@ -1,6 +1,6 @@ -//===---- test_sycl_queue_interface.cpp - DPPL-SYCL interface -*- C++ --*--===// +//===-------- test_sycl_queue_interface.cpp - dpctl-C_API ---*--- C++ --*--===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -31,32 +31,48 @@ #include "dppl_sycl_queue_interface.h" #include "dppl_sycl_queue_manager.h" #include "dppl_sycl_usm_interface.h" - #include "Support/CBindingWrapping.h" - +#include #include +using namespace cl::sycl; + namespace { - constexpr size_t SIZE = 1024; +constexpr size_t SIZE = 1024; - DEFINE_SIMPLE_CONVERSION_FUNCTIONS(void, DPPLSyclUSMRef); +DEFINE_SIMPLE_CONVERSION_FUNCTIONS(void, DPPLSyclUSMRef); - void add_kernel_checker (const float *a, const float *b, const float *c) - { - // Validate the data - for(auto i = 0ul; i < SIZE; ++i) { - EXPECT_EQ(c[i], a[i] + b[i]); - } +void add_kernel_checker (const float *a, const float *b, const float *c) +{ + // Validate the data + for(auto i = 0ul; i < SIZE; ++i) { + EXPECT_EQ(c[i], a[i] + b[i]); } +} - void axpy_kernel_checker (const float *a, const float *b, const float *c, - float d) - { - for(auto i = 0ul; i < SIZE; ++i) { - EXPECT_EQ(c[i], a[i] + d*b[i]); +void axpy_kernel_checker (const float *a, const float *b, const float *c, + float d) +{ + for(auto i = 0ul; i < SIZE; ++i) { + EXPECT_EQ(c[i], a[i] + d*b[i]); + } +} + +bool has_devices () +{ + bool ret = false; + for (auto &p : platform::get_platforms()) { + if (p.is_host()) + continue; + if(!p.get_devices().empty()) { + ret = true; + break; } } + return ret; +} + } struct TestDPPLSyclQueueInterface : public ::testing::Test @@ -89,30 +105,30 @@ struct TestDPPLSyclQueueInterface : public ::testing::Test TEST_F (TestDPPLSyclQueueInterface, CheckAreEq) { - auto Q1 = DPPLQueueMgr_GetCurrentQueue(); - auto Q2 = DPPLQueueMgr_GetCurrentQueue(); - EXPECT_TRUE(DPPLQueue_AreEq(Q1, Q2)); + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); - auto nOclGPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBEType::DPPL_OPENCL, + auto nOclGPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBackendType::DPPL_OPENCL, DPPLSyclDeviceType::DPPL_GPU); - auto nOclCPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBEType::DPPL_OPENCL, - DPPLSyclDeviceType::DPPL_CPU); - { if(!nOclGPU) - GTEST_SKIP_("No OpenCL GPUs available.\n"); + GTEST_SKIP_("Skipping: No OpenCL GPUs available.\n"); + + auto Q1 = DPPLQueueMgr_GetCurrentQueue(); + auto Q2 = DPPLQueueMgr_GetCurrentQueue(); + EXPECT_TRUE(DPPLQueue_AreEq(Q1, Q2)); auto Def_Q = DPPLQueueMgr_SetAsDefaultQueue( - DPPLSyclBEType::DPPL_OPENCL, + DPPLSyclBackendType::DPPL_OPENCL, DPPLSyclDeviceType::DPPL_GPU, 0 ); auto OclGPU_Q0 = DPPLQueueMgr_PushQueue( - DPPLSyclBEType::DPPL_OPENCL, + DPPLSyclBackendType::DPPL_OPENCL, DPPLSyclDeviceType::DPPL_GPU, 0 ); auto OclGPU_Q1 = DPPLQueueMgr_PushQueue( - DPPLSyclBEType::DPPL_OPENCL, + DPPLSyclBackendType::DPPL_OPENCL, DPPLSyclDeviceType::DPPL_GPU, 0 ); @@ -124,30 +140,39 @@ TEST_F (TestDPPLSyclQueueInterface, CheckAreEq) DPPLQueue_Delete(OclGPU_Q1); DPPLQueueMgr_PopQueue(); DPPLQueueMgr_PopQueue(); - } +} + +TEST_F (TestDPPLSyclQueueInterface, CheckAreEq2) +{ + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); - { + auto nOclGPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_GPU); + auto nOclCPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBackendType::DPPL_OPENCL, + DPPLSyclDeviceType::DPPL_CPU); if(!nOclGPU || !nOclCPU) GTEST_SKIP_("OpenCL GPUs and CPU not available.\n"); auto GPU_Q = DPPLQueueMgr_PushQueue( - DPPLSyclBEType::DPPL_OPENCL, + DPPLSyclBackendType::DPPL_OPENCL, DPPLSyclDeviceType::DPPL_GPU, 0 ); auto CPU_Q = DPPLQueueMgr_PushQueue( - DPPLSyclBEType::DPPL_OPENCL, + DPPLSyclBackendType::DPPL_OPENCL, DPPLSyclDeviceType::DPPL_CPU, 0 ); EXPECT_FALSE(DPPLQueue_AreEq(GPU_Q, CPU_Q)); DPPLQueueMgr_PopQueue(); DPPLQueueMgr_PopQueue(); - } - } TEST_F (TestDPPLSyclQueueInterface, CheckGetBackend) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + auto Q1 = DPPLQueueMgr_GetCurrentQueue(); auto BE = DPPLQueue_GetBackend(Q1); EXPECT_TRUE((BE == DPPL_OPENCL) || @@ -178,6 +203,9 @@ TEST_F (TestDPPLSyclQueueInterface, CheckGetBackend) TEST_F (TestDPPLSyclQueueInterface, CheckGetContext) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + auto Q1 = DPPLQueueMgr_GetCurrentQueue(); auto Ctx = DPPLQueue_GetContext(Q1); ASSERT_TRUE(Ctx != nullptr); @@ -212,6 +240,9 @@ TEST_F (TestDPPLSyclQueueInterface, CheckGetContext) TEST_F (TestDPPLSyclQueueInterface, CheckGetDevice) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + auto Q1 = DPPLQueueMgr_GetCurrentQueue(); auto D = DPPLQueue_GetDevice(Q1); ASSERT_TRUE(D != nullptr); @@ -249,10 +280,13 @@ TEST_F (TestDPPLSyclQueueInterface, CheckGetDevice) TEST_F (TestDPPLSyclQueueInterface, CheckSubmit) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + auto nOpenCLGpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU); if(!nOpenCLGpuQ) - GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + GTEST_SKIP_("Skipping: No OpenCL GPU device.\n"); auto Queue = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); auto CtxRef = DPPLQueue_GetContext(Queue); diff --git a/backends/tests/test_sycl_queue_manager.cpp b/backends/tests/test_sycl_queue_manager.cpp index 675fc01c4c..55f8cb725d 100644 --- a/backends/tests/test_sycl_queue_manager.cpp +++ b/backends/tests/test_sycl_queue_manager.cpp @@ -1,6 +1,6 @@ -//===--- test_sycl_queue_manager.cpp - DPPL-SYCL interface --*- C++ ---*---===// +//===------- test_sycl_queue_manager.cpp - dpctl-C_API ---*--- C++ ----*---===// // -// Python Data Parallel Processing Library (PyDPPL) +// Data Parallel Control Library (dpCtl) // // Copyright 2020 Intel Corporation // @@ -37,26 +37,41 @@ using namespace cl::sycl; namespace { - void foo (size_t & num) - { - auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0); - auto q2 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); - // Capture the number of active queues in first - num = DPPLQueueMgr_GetNumActivatedQueues(); - DPPLQueueMgr_PopQueue(); - DPPLQueueMgr_PopQueue(); - DPPLQueue_Delete(q1); - DPPLQueue_Delete(q2); - } +void foo (size_t & num) +{ + auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0); + auto q2 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); + // Capture the number of active queues in first + num = DPPLQueueMgr_GetNumActivatedQueues(); + DPPLQueueMgr_PopQueue(); + DPPLQueueMgr_PopQueue(); + DPPLQueue_Delete(q1); + DPPLQueue_Delete(q2); +} - void bar (size_t & num) - { - auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); - // Capture the number of active queues in second - num = DPPLQueueMgr_GetNumActivatedQueues(); - DPPLQueueMgr_PopQueue(); - DPPLQueue_Delete(q1); +void bar (size_t & num) +{ + auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); + // Capture the number of active queues in second + num = DPPLQueueMgr_GetNumActivatedQueues(); + DPPLQueueMgr_PopQueue(); + DPPLQueue_Delete(q1); +} + +bool has_devices () +{ + bool ret = false; + for (auto &p : platform::get_platforms()) { + if (p.is_host()) + continue; + if(!p.get_devices().empty()) { + ret = true; + break; + } } + return ret; +} + } struct TestDPPLSyclQueueManager : public ::testing::Test @@ -65,6 +80,9 @@ struct TestDPPLSyclQueueManager : public ::testing::Test TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetCurrentQueue) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + DPPLSyclQueueRef q; ASSERT_NO_THROW(q = DPPLQueueMgr_GetCurrentQueue()); ASSERT_TRUE(q != nullptr); @@ -73,9 +91,12 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetCurrentQueue) TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLCpuQ) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + auto nOpenCLCpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU); if(!nOpenCLCpuQ) - GTEST_SKIP_("Skipping as no OpenCL CPU device found."); + GTEST_SKIP_("Skipping: No OpenCL CPU device found."); auto q = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_CPU, 0); EXPECT_TRUE(q != nullptr); @@ -94,9 +115,12 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLCpuQ) TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLGpuQ) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + auto nOpenCLGpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU); if(!nOpenCLGpuQ) - GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + GTEST_SKIP_("Skipping: No OpenCL GPU device found.\n"); auto q = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0); EXPECT_TRUE(q != nullptr); @@ -115,9 +139,12 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLGpuQ) TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetLevel0GpuQ) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + auto nL0GpuQ = DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU); if(!nL0GpuQ) - GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n"); + GTEST_SKIP_("Skipping: No OpenCL GPU device found.\n"); auto q = DPPLQueueMgr_GetQueue(DPPL_LEVEL_ZERO, DPPL_GPU, 0); EXPECT_TRUE(q != nullptr); @@ -136,6 +163,9 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetLevel0GpuQ) TEST_F (TestDPPLSyclQueueManager, CheckGetNumActivatedQueues) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); + size_t num0, num1, num2, num4; auto nOpenCLCpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU); @@ -172,6 +202,8 @@ TEST_F (TestDPPLSyclQueueManager, CheckGetNumActivatedQueues) TEST_F (TestDPPLSyclQueueManager, CheckDPPLDumpDeviceInfo) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); auto q = DPPLQueueMgr_GetCurrentQueue(); EXPECT_NO_FATAL_FAILURE(DPPLDevice_DumpInfo(DPPLQueue_GetDevice(q))); EXPECT_NO_FATAL_FAILURE(DPPLQueue_Delete(q)); @@ -179,20 +211,39 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLDumpDeviceInfo) TEST_F (TestDPPLSyclQueueManager, CheckIsCurrentQueue) { + if(!has_devices()) + GTEST_SKIP_("Skipping: No Sycl devices.\n"); if(!DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU)) - GTEST_SKIP_("No OpenCL GPU.\n"); + GTEST_SKIP_("Skipping: No OpenCL GPU.\n"); auto Q0 = DPPLQueueMgr_GetCurrentQueue(); EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q0)); - auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); - EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q)); - EXPECT_FALSE(DPPLQueueMgr_IsCurrentQueue(Q0)); - DPPLQueue_Delete(Q); + auto Q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); + EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q1)); + DPPLQueue_Delete(Q1); DPPLQueueMgr_PopQueue(); EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q0)); DPPLQueue_Delete(Q0); } +TEST_F (TestDPPLSyclQueueManager, CheckIsCurrentQueue2) +{ + if(!DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU) || + !DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU)) + GTEST_SKIP_("Skipping: No OpenCL GPU and OpenCL CPU.\n"); + + auto Q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0); + EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q1)); + auto Q2 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0); + EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q2)); + EXPECT_FALSE(DPPLQueueMgr_IsCurrentQueue(Q1)); + DPPLQueue_Delete(Q2); + DPPLQueueMgr_PopQueue(); + EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q1)); + DPPLQueue_Delete(Q1); + DPPLQueueMgr_PopQueue(); +} + int main (int argc, char** argv) { diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat index a951f8a1f6..1d811447a8 100644 --- a/conda-recipe/bld.bat +++ b/conda-recipe/bld.bat @@ -3,7 +3,7 @@ IF ERRORLEVEL 1 exit 1 REM conda uses %ERRORLEVEL% but FPGA scripts can set it. So it should be reseted. set ERRORLEVEL= -set "CC=dpcpp.exe" +set "CC=clang-cl.exe" set "CXX=dpcpp.exe" rmdir /S /Q build_cmake @@ -17,7 +17,7 @@ rmdir /S /Q "%INSTALL_PREFIX%" cmake -G Ninja ^ -DCMAKE_BUILD_TYPE=Release ^ - "-DCMAKE_INSTALL_PREFIX=%LIBRARY_PREFIX%" ^ + "-DCMAKE_INSTALL_PREFIX=%INSTALL_PREFIX%" ^ "-DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX%" ^ "-DDPCPP_ROOT=%DPCPP_ROOT%" ^ "%SRC_DIR%/backends" @@ -29,17 +29,12 @@ IF %ERRORLEVEL% NEQ 0 exit 1 cd .. xcopy install\lib\*.lib dpctl /E /Y -xcopy install\lib\*.dll dpctl /E /Y +xcopy install\bin\*.dll dpctl /E /Y mkdir dpctl\include xcopy backends\include dpctl\include /E /Y -REM required by _opencl_core (dpctl.ocldrv) -set "DPPL_OPENCL_INTERFACE_LIBDIR=dpctl" -set "DPPL_OPENCL_INTERFACE_INCLDIR=dpctl\include" -set "OpenCL_LIBDIR=%DPCPP_ROOT%\lib" - REM required by _sycl_core(dpctl) set "DPPL_SYCL_INTERFACE_LIBDIR=dpctl" set "DPPL_SYCL_INTERFACE_INCLDIR=dpctl\include" diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh index 72c28d711d..607da35268 100755 --- a/conda-recipe/build.sh +++ b/conda-recipe/build.sh @@ -40,10 +40,6 @@ cp install/lib/*.so dpctl/ mkdir -p dpctl/include cp -r backends/include/* dpctl/include -# required by dpctl.opencl_core -export DPPL_OPENCL_INTERFACE_LIBDIR=dpctl -export DPPL_OPENCL_INTERFACE_INCLDIR=dpctl/include -export OpenCL_LIBDIR=${DPCPP_ROOT}/lib # required by dpctl.sycl_core export DPPL_SYCL_INTERFACE_LIBDIR=dpctl @@ -53,6 +49,5 @@ export DPPL_SYCL_INTERFACE_INCLDIR=dpctl/include # FIXME: How to pass this using setup.py? This flags is needed when # dpcpp compiles the generated cpp file. export CFLAGS="-fPIC -O3 ${CFLAGS}" -export LDFLAGS="-L ${OpenCL_LIBDIR} ${LDFLAGS}" ${PYTHON} setup.py clean --all ${PYTHON} setup.py build install diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml index 48ff634421..acf485898a 100644 --- a/conda-recipe/meta.yaml +++ b/conda-recipe/meta.yaml @@ -17,7 +17,6 @@ requirements: - {{ compiler('cxx') }} host: - setuptools - - cffi >=1.0.0 - cython - cmake - python @@ -27,10 +26,9 @@ requirements: run: - python - numpy >=1.17 - - cffi >=1.0.0 about: - home: https://github.com/IntelPython/PyDPPL.git + home: https://github.com/IntelPython/dpCtl.git license: Apache-2.0 license_file: LICENSE summary: 'A lightweight Python wrapper for a subset of OpenCL and SYCL API.' diff --git a/conda-recipe/run_test.bat b/conda-recipe/run_test.bat index 992277b497..ed3a395aec 100644 --- a/conda-recipe/run_test.bat +++ b/conda-recipe/run_test.bat @@ -8,8 +8,5 @@ set ERRORLEVEL= "%PYTHON%" -c "import dpctl" IF %ERRORLEVEL% NEQ 0 exit 1 -"%PYTHON%" -c "import dpctl.ocldrv" -IF %ERRORLEVEL% NEQ 0 exit 1 - "%PYTHON%" -m unittest -v dpctl.tests IF %ERRORLEVEL% NEQ 0 exit 1 diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh index 775783ce50..ff46be6632 100644 --- a/conda-recipe/run_test.sh +++ b/conda-recipe/run_test.sh @@ -6,5 +6,4 @@ set -e source ${ONEAPI_ROOT}/compiler/latest/env/vars.sh || true ${PYTHON} -c "import dpctl" -${PYTHON} -c "import dpctl.ocldrv" ${PYTHON} -m unittest -v dpctl.tests diff --git a/dpctl/__init__.py b/dpctl/__init__.py index 333867f89c..af9aa93076 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -22,7 +22,7 @@ ## This top-level dpctl module. ## ##===----------------------------------------------------------------------===## -''' +""" Data Parallel Control (dpCtl) dpCtl provides a lightweight Python abstraction over DPC++/SYCL and @@ -33,7 +33,7 @@ dpCtl's intended usage is as a common SYCL interoperability layer for different Python libraries and applications. The OpenCL support inside - PyDPPL is slated to be deprecated and then removed in future releases + dpCtl is slated to be deprecated and then removed in future releases of the library. Currently, only a small subset of DPC++ runtime objects are exposed @@ -43,12 +43,13 @@ Please use `pydoc dpctl.ocldrv` to look at the current API for dpctl.ocldrv. -''' +""" __author__ = "Intel Corp." from ._sycl_core import * from ._version import get_versions + def get_include(): """ Return the directory that contains the dpCtl *.h header files. @@ -57,7 +58,9 @@ def get_include(): this function to locate the appropriate include directory. """ import os.path - return os.path.join(os.path.dirname(__file__), 'include') -__version__ = get_versions()['version'] + return os.path.join(os.path.dirname(__file__), "include") + + +__version__ = get_versions()["version"] del get_versions diff --git a/dpctl/backend.pxd b/dpctl/_backend.pxd similarity index 82% rename from dpctl/backend.pxd rename to dpctl/_backend.pxd index c7cecae87a..584ae79fd8 100644 --- a/dpctl/backend.pxd +++ b/dpctl/_backend.pxd @@ -28,20 +28,22 @@ # cython: language_level=3 from libcpp cimport bool +from libc.stdint cimport uint32_t cdef extern from "dppl_utils.h": cdef void DPPLCString_Delete (const char *str) + cdef void DPPLSize_t_Array_Delete (size_t *arr) cdef extern from "dppl_sycl_enum_types.h": - cdef enum _backend_type 'DPPLSyclBEType': + cdef enum _backend_type 'DPPLSyclBackendType': _OPENCL 'DPPL_OPENCL' _HOST 'DPPL_HOST' _LEVEL_ZERO 'DPPL_LEVEL_ZERO' _CUDA 'DPPL_CUDA' _UNKNOWN_BACKEND 'DPPL_UNKNOWN_BACKEND' - ctypedef _backend_type DPPLSyclBEType + ctypedef _backend_type DPPLSyclBackendType cdef enum _device_type 'DPPLSyclDeviceType': _GPU 'DPPL_GPU' @@ -57,7 +59,8 @@ cdef extern from "dppl_sycl_enum_types.h": _UNSIGNED_CHAR 'DPPL_UNSIGNED_CHAR', _SHORT 'DPPL_SHORT', _INT 'DPPL_INT', - _UNSIGNED_INT 'DPPL_INT', + _UNSIGNED_INT 'DPPL_UNSIGNED_INT', + _UNSIGNED_INT8 'DPPL_UNSIGNED_INT8', _LONG 'DPPL_LONG', _UNSIGNED_LONG 'DPPL_UNSIGNED_LONG', _LONG_LONG 'DPPL_LONG_LONG', @@ -96,10 +99,17 @@ cdef extern from "dppl_sycl_device_interface.h": cdef bool DPPLDevice_IsCPU (const DPPLSyclDeviceRef DRef) cdef bool DPPLDevice_IsGPU (const DPPLSyclDeviceRef DRef) cdef bool DPPLDevice_IsHost (const DPPLSyclDeviceRef DRef) - cdef const char* DPPLDevice_GetDriverInfo (const DPPLSyclDeviceRef DRef) - cdef const char* DPPLDevice_GetName (const DPPLSyclDeviceRef DRef) - cdef const char* DPPLDevice_GetVendorName (const DPPLSyclDeviceRef DRef) + cpdef const char *DPPLDevice_GetDriverInfo (const DPPLSyclDeviceRef DRef) + cpdef const char *DPPLDevice_GetName (const DPPLSyclDeviceRef DRef) + cpdef const char *DPPLDevice_GetVendorName (const DPPLSyclDeviceRef DRef) cdef bool DPPLDevice_IsHostUnifiedMemory (const DPPLSyclDeviceRef DRef) + cpdef uint32_t DPPLDevice_GetMaxComputeUnits (const DPPLSyclDeviceRef DRef) + cpdef uint32_t DPPLDevice_GetMaxWorkItemDims (const DPPLSyclDeviceRef DRef) + cpdef size_t *DPPLDevice_GetMaxWorkItemSizes (const DPPLSyclDeviceRef DRef) + cpdef size_t DPPLDevice_GetMaxWorkGroupSize (const DPPLSyclDeviceRef DRef) + cpdef uint32_t DPPLDevice_GetMaxNumSubGroups (const DPPLSyclDeviceRef DRef) + cpdef bool DPPLDevice_HasInt64BaseAtomics (const DPPLSyclDeviceRef DRef) + cpdef bool DPPLDevice_HasInt64ExtendedAtomics (const DPPLSyclDeviceRef DRef) cdef extern from "dppl_sycl_event_interface.h": @@ -114,17 +124,18 @@ cdef extern from "dppl_sycl_kernel_interface.h": cdef extern from "dppl_sycl_platform_interface.h": - cdef size_t DPPLPlatform_GetNumPlatforms () + cdef size_t DPPLPlatform_GetNumNonHostPlatforms () cdef void DPPLPlatform_DumpInfo () - cdef size_t DPPLPlatform_GetNumBackends () - cdef DPPLSyclBEType *DPPLPlatform_GetListOfBackends () - cdef void DPPLPlatform_DeleteListOfBackends (DPPLSyclBEType * BEs) + cdef size_t DPPLPlatform_GetNumNonHostBackends () + cdef DPPLSyclBackendType *DPPLPlatform_GetListOfNonHostBackends () + cdef void DPPLPlatform_DeleteListOfBackends (DPPLSyclBackendType * BEs) cdef extern from "dppl_sycl_context_interface.h": cdef bool DPPLContext_AreEq (const DPPLSyclContextRef CtxRef1, const DPPLSyclContextRef CtxRef2) - cdef DPPLSyclBEType DPPLContext_GetBackend (const DPPLSyclContextRef CtxRef) + cdef DPPLSyclBackendType DPPLContext_GetBackend ( + const DPPLSyclContextRef CtxRef) cdef void DPPLContext_Delete (DPPLSyclContextRef CtxRef) @@ -148,7 +159,7 @@ cdef extern from "dppl_sycl_queue_interface.h": cdef bool DPPLQueue_AreEq (const DPPLSyclQueueRef QRef1, const DPPLSyclQueueRef QRef2) cdef void DPPLQueue_Delete (DPPLSyclQueueRef QRef) - cdef DPPLSyclBEType DPPLQueue_GetBackend (const DPPLSyclQueueRef Q) + cdef DPPLSyclBackendType DPPLQueue_GetBackend (const DPPLSyclQueueRef Q) cdef DPPLSyclContextRef DPPLQueue_GetContext (const DPPLSyclQueueRef Q) cdef DPPLSyclDeviceRef DPPLQueue_GetDevice (const DPPLSyclQueueRef Q) cdef DPPLSyclEventRef DPPLQueue_SubmitRange ( @@ -179,19 +190,19 @@ cdef extern from "dppl_sycl_queue_interface.h": cdef extern from "dppl_sycl_queue_manager.h": cdef DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue () - cdef size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBEType BETy, + cdef size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy, DPPLSyclDeviceType DeviceTy) cdef size_t DPPLQueueMgr_GetNumActivatedQueues () - cdef DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclBEType BETy, + cdef DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy, DPPLSyclDeviceType DeviceTy, size_t DNum) cdef bool DPPLQueueMgr_IsCurrentQueue (const DPPLSyclQueueRef QRef) cdef void DPPLQueueMgr_PopQueue () - cdef DPPLSyclQueueRef DPPLQueueMgr_PushQueue (DPPLSyclBEType BETy, + cdef DPPLSyclQueueRef DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy, DPPLSyclDeviceType DeviceTy, size_t DNum) cdef DPPLSyclQueueRef DPPLQueueMgr_SetAsDefaultQueue ( - DPPLSyclBEType BETy, + DPPLSyclBackendType BETy, DPPLSyclDeviceType DeviceTy, size_t DNum ) diff --git a/dpctl/_memory.pxd b/dpctl/_memory.pxd index 3c868125ca..2ab5066c8d 100644 --- a/dpctl/_memory.pxd +++ b/dpctl/_memory.pxd @@ -21,7 +21,7 @@ # distutils: language = c++ # cython: language_level=3 -from .backend cimport DPPLSyclUSMRef +from ._backend cimport DPPLSyclUSMRef from ._sycl_core cimport SyclQueue diff --git a/dpctl/_memory.pyx b/dpctl/_memory.pyx index 1671d3bb7c..96259b0451 100644 --- a/dpctl/_memory.pyx +++ b/dpctl/_memory.pyx @@ -29,7 +29,7 @@ # cython: language_level=3 import dpctl -from dpctl.backend cimport * +from dpctl._backend cimport * from ._sycl_core cimport SyclContext, SyclQueue from cpython cimport Py_buffer diff --git a/dpctl/_sycl_core.pxd b/dpctl/_sycl_core.pxd index a95e5f28c5..89a74dca57 100644 --- a/dpctl/_sycl_core.pxd +++ b/dpctl/_sycl_core.pxd @@ -27,7 +27,8 @@ # distutils: language = c++ # cython: language_level=3 -from .backend cimport * +from ._backend cimport * +from libc.stdint cimport uint32_t cdef class SyclContext: @@ -48,10 +49,28 @@ cdef class SyclDevice: cdef const char *_vendor_name cdef const char *_device_name cdef const char *_driver_version + cdef uint32_t _max_compute_units + cdef uint32_t _max_work_item_dims + cdef size_t *_max_work_item_sizes + cdef size_t _max_work_group_size + cdef uint32_t _max_num_sub_groups + cdef bool _int64_base_atomics + cdef bool _int64_extended_atomics @staticmethod cdef SyclDevice _create (DPPLSyclDeviceRef dref) cdef DPPLSyclDeviceRef get_device_ref (self) + cpdef get_device_name (self) + cpdef get_device_type (self) + cpdef get_vendor_name (self) + cpdef get_driver_version (self) + cpdef get_max_compute_units (self) + cpdef get_max_work_item_dims (self) + cpdef get_max_work_item_sizes (self) + cpdef get_max_work_group_size (self) + cpdef get_max_num_sub_groups (self) + cpdef has_int64_base_atomics (self) + cpdef has_int64_extended_atomics (self) cdef class SyclEvent: @@ -118,3 +137,6 @@ cdef class SyclQueue: cpdef void wait (self) cdef DPPLSyclQueueRef get_queue_ref (self) cpdef memcpy (self, dest, src, int count) + + +cpdef SyclQueue get_current_queue() diff --git a/dpctl/sycl_core.pyx b/dpctl/_sycl_core.pyx similarity index 82% rename from dpctl/sycl_core.pyx rename to dpctl/_sycl_core.pyx index ea1916fc46..129728894a 100644 --- a/dpctl/sycl_core.pyx +++ b/dpctl/_sycl_core.pyx @@ -29,7 +29,7 @@ from __future__ import print_function from enum import Enum, auto import logging -from .backend cimport * +from ._backend cimport * from ._memory cimport Memory from libc.stdlib cimport malloc, free @@ -105,6 +105,15 @@ cdef class SyclContext: cdef DPPLSyclContextRef get_context_ref (self): return self._ctxt_ref + def addressof_ref (self): + """Returns the address of the DPPLSyclContextRef pointer as a + long. + + Returns: + The address of the DPPLSyclContextRef object used to create this + SyclContext cast to a long. + """ + return int(self._ctx_ref) cdef class SyclDevice: ''' Wrapper class for a Sycl Device @@ -117,6 +126,13 @@ cdef class SyclDevice: ret._vendor_name = DPPLDevice_GetVendorName(dref) ret._device_name = DPPLDevice_GetName(dref) ret._driver_version = DPPLDevice_GetDriverInfo(dref) + ret._max_compute_units = DPPLDevice_GetMaxComputeUnits(dref) + ret._max_work_item_dims = DPPLDevice_GetMaxWorkItemDims(dref) + ret._max_work_item_sizes = DPPLDevice_GetMaxWorkItemSizes(dref) + ret._max_work_group_size = DPPLDevice_GetMaxWorkGroupSize(dref) + ret._max_num_sub_groups = DPPLDevice_GetMaxNumSubGroups(dref) + ret._int64_base_atomics = DPPLDevice_HasInt64BaseAtomics(dref) + ret._int64_extended_atomics = DPPLDevice_HasInt64ExtendedAtomics(dref) return ret def __dealloc__ (self): @@ -124,18 +140,19 @@ cdef class SyclDevice: DPPLCString_Delete(self._device_name) DPPLCString_Delete(self._vendor_name) DPPLCString_Delete(self._driver_version) + DPPLSize_t_Array_Delete(self._max_work_item_sizes) def dump_device_info (self): ''' Print information about the SYCL device. ''' DPPLDevice_DumpInfo(self._device_ref) - def get_device_name (self): + cpdef get_device_name (self): ''' Returns the name of the device as a string ''' return self._device_name.decode() - def get_device_type (self): + cpdef get_device_type (self): ''' Returns the type of the device as a `device_type` enum ''' if DPPLDevice_IsGPU(self._device_ref): @@ -145,12 +162,12 @@ cdef class SyclDevice: else: raise ValueError("Unknown device type.") - def get_vendor_name (self): + cpdef get_vendor_name (self): ''' Returns the device vendor name as a string ''' return self._vendor_name.decode() - def get_driver_version (self): + cpdef get_driver_version (self): ''' Returns the OpenCL software driver version as a string in the form: major number.minor number, if this SYCL device is an OpenCL device. Returns a string class @@ -158,11 +175,72 @@ cdef class SyclDevice: ''' return self._driver_version.decode() + cpdef has_int64_base_atomics (self): + ''' Returns true if device has int64_base_atomics else returns false. + ''' + return self._int64_base_atomics + + cpdef has_int64_extended_atomics (self): + ''' Returns true if device has int64_extended_atomics else returns false. + ''' + return self._int64_extended_atomics + + cpdef get_max_compute_units (self): + ''' Returns the number of parallel compute units + available to the device. The minimum value is 1. + ''' + return self._max_compute_units + + cpdef get_max_work_item_dims (self): + ''' Returns the maximum dimensions that specify + the global and local work-item IDs used by the + data parallel execution model. The minimum + value is 3 if this SYCL device is not of device + type info::device_type::custom. + ''' + return self._max_work_item_dims + + cpdef get_max_work_item_sizes (self): + ''' Returns the maximum number of work-items + that are permitted in each dimension of the + work-group of the nd_range. The minimum + value is (1; 1; 1) for devices that are not of + device type info::device_type::custom. + ''' + max_work_item_sizes = [] + for n in range(3): + max_work_item_sizes.append(self._max_work_item_sizes[n]) + return tuple(max_work_item_sizes) + + cpdef get_max_work_group_size (self): + ''' Returns the maximum number of work-items + that are permitted in a work-group executing a + kernel on a single compute unit. The minimum + value is 1. + ''' + return self._max_work_group_size + + cpdef get_max_num_sub_groups (self): + ''' Returns the maximum number of sub-groups + in a work-group for any kernel executed on the + device. The minimum value is 1. + ''' + return self._max_num_sub_groups + cdef DPPLSyclDeviceRef get_device_ref (self): ''' Returns the DPPLSyclDeviceRef pointer for this class. ''' return self._device_ref + def addressof_ref (self): + """Returns the address of the DPPLSyclDeviceRef pointer as a + long. + + Returns: + The address of the DPPLSyclDeviceRef object used to create this + SyclDevice cast to a long. + """ + return int(self._device_ref) cdef class SyclEvent: ''' Wrapper class for a Sycl Event @@ -187,6 +265,16 @@ cdef class SyclEvent: cpdef void wait (self): DPPLEvent_Wait(self._event_ref) + def addressof_ref (self): + """Returns the address of the C API DPPLSyclEventRef pointer as + a long. + + Returns: + The address of the DPPLSyclEventRef object used to create this + SyclEvent cast to a long. + """ + return int(self._event_ref) + cdef class SyclKernel: ''' Wraps a sycl::kernel object created from an OpenCL interoperability @@ -219,6 +307,15 @@ cdef class SyclKernel: ''' return self._kernel_ref + def addressof_ref (self): + """Returns the address of the C API DPPLSyclKernelRef pointer + as a long. + + Returns: + The address of the DPPLSyclKernelRef object used to create this + SyclKernel cast to a long. + """ + return int(self._kernel_ref) cdef class SyclProgram: ''' Wraps a sycl::program object created from an OpenCL interoperability @@ -250,6 +347,16 @@ cdef class SyclProgram: name = kernel_name.encode('utf8') return DPPLProgram_HasKernel(self._program_ref, name) + def addressof_ref (self): + """Returns the address of the C API DPPLSyclProgramRef pointer + as a long. + + Returns: + The address of the DPPLSyclProgramRef object used to create this + SyclProgram cast to a long. + """ + return int(self._program_ref) + import ctypes cdef class SyclQueue: @@ -258,6 +365,8 @@ cdef class SyclQueue: @staticmethod cdef SyclQueue _create (DPPLSyclQueueRef qref): + if qref is NULL: + raise SyclQueueCreationError("Queue creation failed.") cdef SyclQueue ret = SyclQueue.__new__(SyclQueue) ret._context = SyclContext._create(DPPLQueue_GetContext(qref)) ret._device = SyclDevice._create(DPPLQueue_GetDevice(qref)) @@ -294,9 +403,15 @@ cdef class SyclQueue: elif isinstance(arg, ctypes.c_uint): kargs[idx] = (ctypes.addressof(arg)) kargty[idx] = _arg_data_type._UNSIGNED_INT + elif isinstance(arg, ctypes.c_uint8): + kargs[idx] = (ctypes.addressof(arg)) + kargty[idx] = _arg_data_type._UNSIGNED_INT8 elif isinstance(arg, ctypes.c_long): kargs[idx] = (ctypes.addressof(arg)) kargty[idx] = _arg_data_type._LONG + elif isinstance(arg, ctypes.c_ulong): + kargs[idx] = (ctypes.addressof(arg)) + kargty[idx] = _arg_data_type._UNSIGNED_LONG elif isinstance(arg, ctypes.c_longlong): kargs[idx] = (ctypes.addressof(arg)) kargty[idx] = _arg_data_type._LONG_LONG @@ -353,7 +468,7 @@ cdef class SyclQueue: def get_sycl_backend (self): """ Returns the Sycl bakend associated with the queue. """ - cdef DPPLSyclBEType BE = DPPLQueue_GetBackend(self._queue_ref) + cdef DPPLSyclBackendType BE = DPPLQueue_GetBackend(self._queue_ref) if BE == _backend_type._OPENCL: return backend_type.opencl elif BE == _backend_type._LEVEL_ZERO: @@ -374,6 +489,16 @@ cdef class SyclQueue: cdef DPPLSyclQueueRef get_queue_ref (self): return self._queue_ref + def addressof_ref (self): + """Returns the address of the C API DPPLSyclQueueRef pointer as + a long. + + Returns: + The address of the DPPLSyclQueueRef object used to create this + SyclQueue cast to a long. + """ + return int(self._queue_ref) + cpdef SyclEvent submit (self, SyclKernel kernel, list args, list gS, \ list lS = None, list dEvents = None): @@ -523,15 +648,6 @@ cdef class _SyclRTManager: device_type.gpu : _device_type._GPU, } - cdef _raise_queue_creation_error (self, str be, str dev, int devid, fname): - e = SyclQueueCreationError( - "Queue creation failed for :", be, dev, devid - ) - e.fname = fname - e.code = -1 - raise e - - def _set_as_current_queue (self, backend_ty, device_ty, device_id): cdef DPPLSyclQueueRef queue_ref @@ -540,11 +656,6 @@ cdef class _SyclRTManager: try : devTy = self._device_str_ty_dict[device_ty] queue_ref = DPPLQueueMgr_PushQueue(beTy, devTy, device_id) - if queue_ref is NULL: - self._raise_queue_creation_error( - backend_ty, device_ty, device_id, - "DPPLQueueMgr_PushQueue" - ) return SyclQueue._create(queue_ref) except KeyError: raise UnsupportedDeviceError("Device can only be gpu or cpu") @@ -563,7 +674,7 @@ cdef class _SyclRTManager: def print_available_backends (self): """ Prints the available backends. """ - print(self._backend_ty_dict.keys()) + print(self._backend_str_ty_dict.keys()) def get_current_backend (self): """ Returns the backend for the current queue as `backend_type` enum @@ -575,7 +686,7 @@ cdef class _SyclRTManager: ''' return self.get_current_queue().get_sycl_device().get_device_type() - def get_current_queue (self): + cpdef SyclQueue get_current_queue (self): ''' Returns the activated SYCL queue as a PyCapsule. ''' return SyclQueue._create(DPPLQueueMgr_GetCurrentQueue()) @@ -586,9 +697,9 @@ cdef class _SyclRTManager: return DPPLQueueMgr_GetNumActivatedQueues() def get_num_platforms (self): - ''' Returns the number of available SYCL/OpenCL platforms. + ''' Returns the number of available non-host SYCL platforms. ''' - return DPPLPlatform_GetNumPlatforms() + return DPPLPlatform_GetNumNonHostPlatforms() def get_num_queues (self, backend_ty, device_ty): cdef size_t num = 0 @@ -640,7 +751,7 @@ cdef class _SyclRTManager: return False def has_sycl_platforms (self): - cdef size_t num_platforms = DPPLPlatform_GetNumPlatforms() + cdef size_t num_platforms = DPPLPlatform_GetNumNonHostPlatforms() if num_platforms: return True else: @@ -656,9 +767,15 @@ cdef class _SyclRTManager: def set_default_queue (self, backend_ty, device_ty, device_id): cdef DPPLSyclQueueRef ret try : - beTy = self._backend_ty_dict[backend_ty] + if isinstance(backend_ty, str): + beTy = self._backend_str_ty_dict[backend_ty] + else: + beTy = self._backend_enum_ty_dict[backend_ty] try : - devTy = self._device_ty_dict[device_ty] + if isinstance(device_ty, str): + devTy = self._device_str_ty_dict[device_ty] + else: + devTyp = self._device_enum_ty_dist[device_ty] ret = DPPLQueueMgr_SetAsDefaultQueue(beTy, devTy, device_id) if ret is NULL: self._raise_queue_creation_error( @@ -679,7 +796,6 @@ _mgr = _SyclRTManager() # Global bound functions dump = _mgr.dump -get_current_queue = _mgr.get_current_queue get_current_device_type = _mgr.get_current_device_type get_num_platforms = _mgr.get_num_platforms get_num_activated_queues = _mgr.get_num_activated_queues @@ -690,6 +806,10 @@ has_sycl_platforms = _mgr.has_sycl_platforms set_default_queue = _mgr.set_default_queue is_in_device_context = _mgr.is_in_device_context +cpdef SyclQueue get_current_queue(): + ''' Obtain current Sycl Queue from Data Parallel Control package ''' + return _mgr.get_current_queue() + def create_program_from_source (SyclQueue q, unicode source, unicode copts=""): ''' Creates a Sycl interoperability program from an OpenCL source string. @@ -776,6 +896,7 @@ def device_context (str queue_str="opencl:gpu:0"): # calling get_current_context, or use the returned context object directly. # If set_context is unable to create a new context an exception is raised. + ctxt = None try: attrs = queue_str.split(':') nattrs = len(attrs) @@ -786,7 +907,6 @@ def device_context (str queue_str="opencl:gpu:0"): "device_number defaults to 0") if nattrs == 2: attrs.append("0") - ctxt = None ctxt = _mgr._set_as_current_queue(attrs[0], attrs[1], int(attrs[2])) yield ctxt finally: diff --git a/dpctl/_version.py b/dpctl/_version.py index 39363ca3aa..395c950102 100644 --- a/dpctl/_version.py +++ b/dpctl/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -58,17 +57,18 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None @@ -76,10 +76,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -116,16 +119,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -181,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -190,7 +199,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -198,19 +207,26 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -225,8 +241,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -234,10 +249,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -260,17 +284,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -279,10 +302,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -293,13 +318,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces @@ -330,8 +355,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -445,11 +469,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -469,9 +495,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } def get_versions(): @@ -485,8 +515,7 @@ def get_versions(): verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -495,13 +524,16 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for i in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -515,6 +547,10 @@ def get_versions(): except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/dpctl/ocldrv.py b/dpctl/ocldrv.py deleted file mode 100644 index 87af22cbd7..0000000000 --- a/dpctl/ocldrv.py +++ /dev/null @@ -1,709 +0,0 @@ -##===------------- ocldrv.py - dpctl.ocldrv module ------*- Python -*------===## -## -## Data Parallel Control (dpCtl) -## -## Copyright 2020 Intel Corporation -## -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## -##===----------------------------------------------------------------------===## -### -### \file -### This file exposes Python classes for different OpenCL classes that are -### exposed by the _opencl_core CFFI extension module. -##===----------------------------------------------------------------------===## -''' The dpctl.ocldrv module contains a set of Python wrapper classes for - OpenCL objects. The module has wrappers for cl_context, cl_device, - cl_mem, cl_program, and cl_kernel objects. - - The two main user-visible API classes are Runtime, DeviceArray, and - DeviceEnv and Runtime. The other classes are only used by the Numba - JIT compiler. - - Global data members: - runtime - An instance of the Runtime class. - has_cpu_device - A flag set to True when an OpenCL CPU device is found - on the system. - has_cpu_device - A flag set to True when an OpenCL GPU device is found - on the system. - -''' - -from __future__ import absolute_import, division, print_function - -from contextlib import contextmanager -import ctypes -import logging - -from numpy import ndarray - -from ._opencl_core import ffi, lib - - -__author__ = "Intel Corp." - -_logger = logging.getLogger(__name__) - -# create console handler and set level to debug -_ch = logging.StreamHandler() -_ch.setLevel(logging.WARNING) -# create formatter -_formatter = logging.Formatter('DPPL-%(levelname)s - %(message)s') -# add formatter to ch -_ch.setFormatter(_formatter) -# add ch to logger -_logger.addHandler(_ch) - - -########################################################################## -# Exception classes -########################################################################## - - -class DpplDriverError(Exception): - """ The exception is raised when dpctl.ocldrv cannot find an OpenCL Driver. - """ - pass - - -class DeviceNotFoundError(Exception): - """ The exception is raised when the requested type of OpenCL device is - not available or not supported by dpctl.ocldrv. - """ - pass - - -class UnsupportedTypeError(Exception): - """ The exception is raised when an unsupported type is encountered when - creating an OpenCL KernelArg. Only DeviceArray or numpy.ndarray types - are supported. - """ - pass - - -########################################################################## -# Helper functions -########################################################################## - - -def _raise_driver_error(fname, errcode): - e = DpplDriverError("Could not find an OpenCL Driver. Ensure OpenCL \ - driver is installed.") - e.fname = fname - e.code = errcode - raise e - - -def _raise_device_not_found_error(fname): - e = DeviceNotFoundError("OpenCL device not available on the system.") - e.fname = fname - raise e - - -def _raise_unsupported_type_error(fname): - e = UnsupportedTypeError("Type needs to be DeviceArray or numpy.ndarray.") - e.fname = fname - raise e - - -def _raise_unsupported_kernel_arg_error(fname): - e = (UnsupportedTypeError("Type needs to be DeviceArray or a supported " - "ctypes type.")) - e.fname = fname - raise e - - -def _is_supported_ctypes_raw_obj(obj): - return isinstance(obj, (ctypes.c_ssize_t, - ctypes.c_double, - ctypes.c_float, - ctypes.c_uint8, - ctypes.c_size_t)) - -########################################################################## -# DeviceArray class -########################################################################## - - -class DeviceArray: - ''' A Python wrapper for an OpenCL cl_men buffer with read-write access. A - DeviceArray can only be created from a NumPy ndarray. - ''' - _buffObj = None - _ndarray = None - _buffSize = None - _dataPtr = None - - def __init__(self, env_ptr, arr): - ''' Creates a new DeviceArray from an ndarray. - - Note that DeviceArray creation only allocates the cl_mem buffer - and does not actually move the data to the device. Data copy from - host to device is done when the DeviceArray instance is passed as - an argument to DeviceEnv.copy_array_to_device(). - ''' - - # We only support device buffers for ndarray and ctypes (for basic - # types like int, etc) - if not isinstance(arr, ndarray): - _raise_unsupported_type_error("DeviceArray constructor") - - # create a dp_buffer_t object - self._buffObj = ffi.new("buffer_t *") - self._ndarray = arr - self._buffSize = arr.itemsize * arr.size - self._dataPtr = ffi.cast("void *", arr.ctypes.data) - retval = (lib.create_dp_rw_mem_buffer(env_ptr, - self._buffSize, - self._buffObj)) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("create_dp_rw_mem_buffer", -1) - - def __del__(self): - ''' Destroy the DeviceArray and release the OpenCL buffer.''' - - retval = (lib.destroy_dp_rw_mem_buffer(self._buffObj)) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("destroy_dp_rw_mem_buffer", -1) - - def get_buffer_obj(self): - ''' Returns a cdata wrapper object encapsulating an OpenCL buffer. - ''' - - return self._buffObj - - def get_buffer_size(self): - ''' Returns the size of the OpenCL buffer in bytes. - ''' - - return self._buffSize - - def get_buffer_ptr(self): - ''' Returns a cdata wrapper over the actual OpenCL cl_mem pointer. - ''' - - return self.get_buffer_obj()[0].buffer_ptr - - def get_data_ptr(self): - ''' Returns the data pointer for the NumPy ndarray used to create - the DeviceArray object. - ''' - - return self._dataPtr - - def get_ndarray(self): - ''' Returns the NumPy ndarray used to create the DeviceArray object. - ''' - - return self._ndarray - -########################################################################## -# Program class -########################################################################## - - -class Program(): - - def __init__(self, device_env, spirv_module): - self._prog_t_obj = ffi.new("program_t *") - retval = (lib.create_dp_program_from_spirv(device_env.get_env_ptr(), - spirv_module, - len(spirv_module), - self._prog_t_obj)) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error( - "create_dp_program_from_spirv", -1) - - retval = (lib.build_dp_program(device_env.get_env_ptr(), - self._prog_t_obj[0])) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("build_dp_program", -1) - - def __del__(self): - retval = (lib.destroy_dp_program(self._prog_t_obj)) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("destroy_dp_program", -1) - - def get_prog_t_obj(self): - return self._prog_t_obj[0] - - -########################################################################## -# Kernel class -########################################################################## - - -class Kernel(): - - def __init__(self, device_env, prog_t_obj, kernel_name): - self._kernel_t_obj = ffi.new("kernel_t *") - retval = (lib.create_dp_kernel(device_env.get_env_ptr(), - prog_t_obj.get_prog_t_obj(), - kernel_name.encode(), - self._kernel_t_obj)) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("create_dp_kernel", -1) - - def __del__(self): - retval = (lib.destroy_dp_kernel(self._kernel_t_obj)) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("destroy_dp_kernel", -1) - - def get_kernel_t_obj(self): - return self._kernel_t_obj[0] - - def dump(self): - retval = self._kernel_t_obj.dump_fn(self._kernel_t_obj) - if retval == -1: - _raise_driver_error("kernel dump_fn", -1) - -########################################################################## -# KernelArg class -########################################################################## - - -class KernelArg(): - - def __init__(self, arg, void_p_arg=False): - self.arg = arg - self.kernel_arg_t = ffi.new("kernel_arg_t *") - if void_p_arg is True: - self.ptr_to_arg_p = ffi.new("void **") - self.ptr_to_arg_p[0] = ffi.cast("void *", 0) - retval = (lib.create_dp_kernel_arg(self.ptr_to_arg_p, - ffi.sizeof(self.ptr_to_arg_p), - self.kernel_arg_t)) - if(retval): - _raise_driver_error("create_dp_kernel_arg", -1) - else: - if isinstance(arg, DeviceArray): - self.ptr_to_arg_p = ffi.new("void **") - self.ptr_to_arg_p[0] = arg.get_buffer_obj()[0].buffer_ptr - retval = (lib.create_dp_kernel_arg( - self.ptr_to_arg_p, - arg.get_buffer_obj()[0].sizeof_buffer_ptr, - self.kernel_arg_t)) - if(retval): - _raise_driver_error("create_dp_kernel_arg", -1) - else: - # it has to be of type ctypes - if getattr(arg, '__module__', None) == "ctypes": - self.ptr_to_arg_p = ffi.cast("void *", - ctypes.addressof(arg)) - retval = (lib.create_dp_kernel_arg(self.ptr_to_arg_p, - ctypes.sizeof(arg), - self.kernel_arg_t)) - if(retval): - _raise_driver_error("create_dp_kernel_arg", -1) - else: - _logger.warning("Unsupported Type %s", type(arg)) - _raise_unsupported_kernel_arg_error("KernelArg init") - - def __del__(self): - retval = (lib.destroy_dp_kernel_arg(self.kernel_arg_t)) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("destroy_dp_kernel_arg", -1) - - def get_kernel_arg_obj(self): - return self.kernel_arg_t[0] - - -########################################################################## -# DeviceEnv class -########################################################################## - - -class DeviceEnv(): - ''' A Python wrapper over an OpenCL cl_context object. - ''' - - def __init__(self, env_t_obj): - self._env_ptr = env_t_obj - - def __del__(self): - pass - - def retain_context(self): - ''' Increment the reference count of the OpenCL context object. - ''' - - retval = (lib.retain_dp_context(self._env_ptr.context)) - if(retval == -1): - _raise_driver_error("retain_dp_context", -1) - - return (self._env_ptr.context) - - def release_context(self): - ''' Increment the reference count of the OpenCL context object. - ''' - - retval = (lib.release_dp_context(self._env_ptr.context)) - if retval == -1: - _raise_driver_error("release_dp_context", -1) - - def copy_array_to_device(self, array): - ''' Accepts either a DeviceArray or a NumPy ndarray and copies the - data from host to an OpenCL device buffer. Returns either the - DeviceArray that was passed in as an argument, or for the case of - ndarrays returns a new DeviceArray. - - If the function is called with a DeviceArray argument, the - function performs a blocking write of the data from the - DeviceArray's ndarray member into its OpenCL device buffer member. - When the function is called with an ndarray argument is, a new - DeviceArray is first created. The data copy operation is then - performed on the new DeviceArray. - ''' - - if isinstance(array, DeviceArray): - retval = (lib.write_dp_mem_buffer_to_device( - self._env_ptr, - array.get_buffer_obj()[0], - True, - 0, - array.get_buffer_size(), - array.get_data_ptr())) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("write_dp_mem_buffer_to_device", -1) - return array - elif (isinstance(array, ndarray) or getattr(array, '__module__', None) - == "ctypes"): - dArr = DeviceArray(self._env_ptr, array) - retval = (lib.write_dp_mem_buffer_to_device( - self._env_ptr, - dArr.get_buffer_obj()[0], - True, - 0, - dArr.get_buffer_size(), - dArr.get_data_ptr())) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("write_dp_mem_buffer_to_device", -1) - return dArr - else: - _raise_unsupported_type_error("copy_array_to_device") - - def copy_array_from_device(self, array): - ''' Copies data from a cl_mem buffer into a DeviceArray's host memory - pointer. The function argument should be a DeviceArray object. - ''' - - if not isinstance(array, DeviceArray): - _raise_unsupported_type_error("copy_array_to_device") - retval = (lib.read_dp_mem_buffer_from_device( - self._env_ptr, - array.get_buffer_obj()[0], - True, - 0, - array.get_buffer_size(), - array.get_data_ptr())) - if retval == -1: - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("read_dp_mem_buffer_from_device", -1) - - def create_device_array(self, array): - ''' Returns an new DeviceArray instance. - ''' - - if not ((isinstance(array, ndarray) or - getattr(array, '__module__', None) - == "ctypes")): - _raise_unsupported_type_error("alloc_array_in_device") - - return DeviceArray(self._env_ptr, array) - - def device_support_int64_atomics(self): - ''' Returns True current device supports 64-bit int atomic operations - ''' - - return self._env_ptr.support_int64_atomics - - def device_support_float64_atomics(self): - ''' Returns True if current device supports 64-bit float atomic operations - ''' - - return self._env_ptr.support_float64_atomics - - def get_context_ptr(self): - ''' Returns a cdata wrapper for the OpenCL cl_context object. - ''' - - return self._env_ptr.context - - def get_device_ptr(self): - ''' Returns a cdata wrapper for the OpenCL cl_device object. - ''' - - return self._env_ptr.device - - def get_queue_ptr(self): - ''' Returns a cdata wrapper for the OpenCL cl_command_queue object. - ''' - - return self._env_ptr.queue - - def get_env_ptr(self): - ''' Returns a cdata wrapper for a C object encapsulating an OpenCL - cl_device object, a cl_command_queue object, - and a cl_context object. - ''' - - return self._env_ptr - - def get_max_work_item_dims(self): - ''' Returns the maximum number of work items per work group for - the OpenCL device. - ''' - - return self._env_ptr.max_work_item_dims - - def get_max_work_group_size(self): - ''' Returns the max work group size for the OpenCL device. - ''' - - return self._env_ptr.max_work_group_size - - def dump(self): - ''' Prints metadata for the underlying OpenCL device. - ''' - - retval = self._env_ptr[0].dump_fn(self._env_ptr) - if retval == -1: - _raise_driver_error("env dump_fn", -1) - return retval - -########################################################################## -# Runtime class -########################################################################## - - -class Runtime(): - '''Runtime is a singleton class that creates a C wrapper object storing - available OpenCL contexts and corresponding OpenCL command queues. The - context and the queue are stored only for the first available GPU and CPU - OpenCL devices found on the system. - ''' - - _singleton = None - - def __new__(cls): - obj = cls._singleton - if obj is not None: - return obj - else: - obj = object.__new__(cls) - - cls._lib = lib - cls._ffi = ffi - - ffiobj = ffi.new("runtime_t *") - retval = (lib.create_dp_runtime(ffiobj)) - if(retval): - _logger.warning("OpenCL Error Code : %s", retval) - _raise_driver_error("create_dp_runtime", -1) - - cls._runtime = ffiobj - - if cls._runtime[0][0].has_cpu: - cls._cpu_device = DeviceEnv(cls._runtime[0][0].first_cpu_env) - else: - cls._cpu_device = None - _logger.warning("No CPU device") - - if cls._runtime[0][0].has_gpu: - cls._gpu_device = DeviceEnv(cls._runtime[0][0].first_gpu_env) - else: - cls._gpu_device = None - _logger.warning("No GPU device") - - cls._curr_device = DeviceEnv(cls._runtime[0][0].curr_env) - cls._singleton = obj - - return obj - - def __init__(self): - pass - - def __del__(self): - if self._runtime: - retval = (self._lib.destroy_dp_runtime(self._runtime)) - if(retval): - _raise_driver_error("destroy_dp_runtime", -1) - - def has_cpu_device(self): - ''' Returns True is the system has an OpenCL driver for the CPU.''' - - return self._cpu_device is not None - - def has_gpu_device(self): - ''' Returns True is the system has an OpenCL driver for the GPU.''' - - return self._gpu_device is not None - - - def get_cpu_device(self): - ''' Returns a cdata wrapper for the first available OpenCL - CPU context. - ''' - - if(self._cpu_device is None): - _raise_device_not_found_error("get_cpu_device") - - return self._cpu_device - - def get_gpu_device(self): - ''' Returns a cdata wrapper for the first available OpenCL - GPU context. - ''' - - if(self._gpu_device is None): - _raise_device_not_found_error("get_gpu_device") - - return self._gpu_device - - def get_current_device(self): - ''' Returns a cdata wrapper for the first available OpenCL - CPU context. - ''' - - return self._curr_device - - def get_runtime_ptr(self): - ''' Returns a reference to the runtime object. - ''' - - return self._runtime[0] - - def dump(self): - ''' Prints OpenCL metadata about the available devices and contexts. - ''' - - retval = self._runtime[0].dump_fn(Runtime._runtime[0]) - if retval == -1: - _raise_driver_error("runtime dump_fn", -1) - return retval - -########################################################################## -# Public API -########################################################################## - -#------- Global Data - -runtime = Runtime() -has_cpu_device = runtime.has_cpu_device() -has_gpu_device = runtime.has_gpu_device() - -#------- Global Functions - -def enqueue_kernel (device_env, kernel, kernelargs, global_work_size, - local_work_size): - ''' A single wrapper function over OpenCL clCreateKernelArgs and - clEnqueueNDRangeKernel. The function blocks till the enqueued kernel - finishes execution. - ''' - - l_work_size_array = None - kernel_arg_array = ffi.new("kernel_arg_t [" + str(len(kernelargs)) + "]") - g_work_size_array = ffi.new("size_t [" + str(len(global_work_size)) + "]") - if local_work_size: - l_work_size_array = ffi.new( - "size_t [" + str(len(local_work_size)) + "]") - else: - l_work_size_array = ffi.NULL - for i in range(len(kernelargs)): - kernel_arg_array[i] = kernelargs[i].get_kernel_arg_obj() - for i in range(len(global_work_size)): - g_work_size_array[i] = global_work_size[i] - for i in range(len(local_work_size)): - l_work_size_array[i] = local_work_size[i] - retval = (lib.set_args_and_enqueue_dp_kernel(device_env.get_env_ptr(), - kernel.get_kernel_t_obj(), - len(kernelargs), - kernel_arg_array, - len(global_work_size), - ffi.NULL, - g_work_size_array, - l_work_size_array)) - if(retval): - _raise_driver_error("set_args_and_enqueue_dp_kernel", -1) - - -def is_available(): - ''' Return a Boolean to indicate the availability of a DPPL device. - ''' - - return runtime.has_cpu_device() or runtime.has_gpu_device() - - -def dppl_error(): - ''' Raised a DpplDriverError exception. - ''' - - _raise_driver_error() - - -########################################################################## -# Context Managers -########################################################################## - - -@contextmanager -def igpu_context(*args, **kwds): - ''' A context manager sets the current DeviceEnv inside the global - runtime object to the default GPU DeviceEnv. The GPU DeviceEnv is - yielded by the context manager. - ''' - - device_id = 0 - # some validation code - if(args): - assert(len(args) == 1 and args[0] == 0) - _logger.debug("Set the current env to igpu device queue %s", device_id) - lib.set_curr_env(runtime.get_runtime_ptr(), - runtime.get_gpu_device().get_env_ptr()) - device_env = runtime.get_current_device() - yield device_env - - # After yield as the exit method - #TODO : one exit reset the current env to previous value - _logger.debug("Exit method called") - - -@contextmanager -def cpu_context(*args, **kwds): - ''' A context manager sets the current DeviceEnv inside the global - runtime object to the default CPU DeviceEnv. The CPU DeviceEnv is - yielded by the context manager. - ''' - - device_id = 0 - # some validation code - if(args): - assert(len(args) == 1 and args[0] == 0) - _logger.debug("Set the current env to cpu device queue %s", device_id) - lib.set_curr_env(runtime.get_runtime_ptr(), - runtime.get_cpu_device().get_env_ptr()) - device_env = runtime.get_current_device() - yield device_env - - # After yield as the exit method - _logger.debug("Exit method called") diff --git a/dpctl/opencl_core.py b/dpctl/opencl_core.py deleted file mode 100644 index 8b0a14e0c6..0000000000 --- a/dpctl/opencl_core.py +++ /dev/null @@ -1,79 +0,0 @@ -##===--------- opencl_core.py - dpctl.ocldrv interface -----*- Python -*---===## -## -## Data paraller Control (dpctl) -## -## Copyright 2020 Intel Corporation -## -## Licensed under the Apache License, Version 2.0 (the "License"); -## you may not use this file except in compliance with the License. -## You may obtain a copy of the License at -## -## http://www.apache.org/licenses/LICENSE-2.0 -## -## Unless required by applicable law or agreed to in writing, software -## distributed under the License is distributed on an "AS IS" BASIS, -## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -## See the License for the specific language governing permissions and -## limitations under the License. -## -##===----------------------------------------------------------------------===## -### -### \file -### This file implements a CFFI interface for dppl_opencl_interface.h -### functions. -##===----------------------------------------------------------------------===## - -import os - -from cffi import FFI - - -ffi = FFI() - -dppl_opencl_interface_incldir = os.environ.get( - 'DPPL_OPENCL_INTERFACE_INCLDIR', None) -dppl_opencl_interface_libdir = os.environ.get( - 'DPPL_OPENCL_INTERFACE_LIBDIR', None) -opencl_libdir = os.environ.get('OpenCL_LIBDIR', None) - -if opencl_libdir is None: - raise ValueError("Abort! Set the OpenCL_LIBDIR envar to point to " - "an OpenCL ICD") - -if dppl_opencl_interface_libdir is None: - raise ValueError("Abort! Set the DPPL_OPENCL_INTERFACE_LIBDIR envar to " - "point to ibdplibdpglueglue.so") - -if dppl_opencl_interface_incldir is None: - raise ValueError("Abort! Set the DP_GLUE_INCLDIR envar to point to " - "dppl_opencl_interface.h") - -glue_h = ''.join(list(filter(lambda x: len(x) > 0 and x[0] != "#", - open(dppl_opencl_interface_incldir + - '/dppl_opencl_interface.h', 'r') - .readlines()))).replace('DPPL_API', '') - -# cdef() expects a single string declaring the C types, functions and -# globals needed to use the shared object. It must be in valid C syntax. -ffi.cdef(glue_h) - -ffi_lib_name = "dpctl._opencl_core" - -import sys -IS_WIN = sys.platform in ['win32', 'cygwin'] -del sys - -ffi.set_source( - ffi_lib_name, - """ - #include "dppl_opencl_interface.h" // the C header of the library - """, - include_dirs=[dppl_opencl_interface_incldir], - library_dirs=[dppl_opencl_interface_libdir, opencl_libdir], - extra_link_args=[] if IS_WIN else ['-Wl,-rpath=$ORIGIN'], - libraries=["DPPLOpenCLInterface", "OpenCL"], -) # library name, for the linker -del IS_WIN - -if __name__ == "__main__": - ffi.compile(verbose=True) diff --git a/dpctl/tests/__init__.py b/dpctl/tests/__init__.py index f04131d53a..a53980d17a 100644 --- a/dpctl/tests/__init__.py +++ b/dpctl/tests/__init__.py @@ -23,6 +23,7 @@ ##===----------------------------------------------------------------------===## from .test_dump_functions import * +from .test_sycl_device import * from .test_sycl_kernel_submit import * from .test_sycl_program import * from .test_sycl_queue import * diff --git a/dpctl/tests/test_dump_functions.py b/dpctl/tests/test_dump_functions.py index 52b41edace..07c4b2d09b 100644 --- a/dpctl/tests/test_dump_functions.py +++ b/dpctl/tests/test_dump_functions.py @@ -25,29 +25,31 @@ import unittest import dpctl -import dpctl.ocldrv as drv class TestDumpMethods(unittest.TestCase): - - def test_dpctl_dump (self): + def test_dpctl_dump(self): try: dpctl.dump() except Exception: self.fail("Encountered an exception inside dump().") - def test_dpctl_dump_device_info (self): + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_dpctl_dump_device_info(self): q = dpctl.get_current_queue() try: q.get_sycl_device().dump_device_info() except Exception: self.fail("Encountered an exception inside dump_device_info().") - def test_dpctl_ocldrv_dump (self): - try: - dpctl.ocldrv.runtime.dump() - except Exception: - self.fail("Encountered an exception inside dump_device_info().") + # def test_dpctl_ocldrv_dump (self): + # try: + # dpctl.ocldrv.runtime.dump() + # except Exception: + # self.fail("Encountered an exception inside dump_device_info().") + -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py new file mode 100644 index 0000000000..e222a55542 --- /dev/null +++ b/dpctl/tests/test_sycl_device.py @@ -0,0 +1,110 @@ +##===------------- test_sycl_device.py - dpctl -------*- Python -*---------===## +## +## Data Parallel Control (dpctl) +## +## Copyright 2020 Intel Corporation +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +##===----------------------------------------------------------------------===## +## +## \file +## Defines unit test cases for the SyclDevice classes defined in sycl_core.pyx. +##===----------------------------------------------------------------------===## + +import dpctl +import unittest + + +@unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available") +class TestSyclDevice(unittest.TestCase): + def test_get_max_compute_units(self): + try: + q = dpctl.get_current_queue() + except Exception: + self.fail("Encountered an exception inside get_current_queue().") + try: + max_compute_units = q.get_sycl_device().get_max_compute_units() + except Exception: + self.fail("Encountered an exception inside get_max_compute_units().") + self.assertTrue(max_compute_units > 0) + + def test_get_max_work_item_dims(self): + try: + q = dpctl.get_current_queue() + except Exception: + self.fail("Encountered an exception inside get_current_queue().") + try: + max_work_item_dims = q.get_sycl_device().get_max_work_item_dims() + except Exception: + self.fail("Encountered an exception inside get_max_work_item_dims().") + self.assertTrue(max_work_item_dims > 0) + + def test_get_max_work_item_sizes(self): + try: + q = dpctl.get_current_queue() + except Exception: + self.fail("Encountered an exception inside get_current_queue().") + try: + max_work_item_sizes = q.get_sycl_device().get_max_work_item_sizes() + except Exception: + self.fail("Encountered an exception inside get_max_work_item_sizes().") + self.assertNotEqual(max_work_item_sizes, (None, None, None)) + + def test_get_max_work_group_size(self): + try: + q = dpctl.get_current_queue() + except Exception: + self.fail("Encountered an exception inside get_current_queue().") + try: + max_work_group_size = q.get_sycl_device().get_max_work_group_size() + except Exception: + self.fail("Encountered an exception inside get_max_work_group_size().") + self.assertTrue(max_work_group_size > 0) + + def test_get_max_num_sub_groups(self): + try: + q = dpctl.get_current_queue() + except Exception: + self.fail("Encountered an exception inside get_current_queue().") + try: + max_num_sub_groups = q.get_sycl_device().get_max_num_sub_groups() + except Exception: + self.fail("Encountered an exception inside get_max_num_sub_groups().") + self.assertTrue(max_num_sub_groups > 0) + + def test_has_int64_base_atomics(self): + try: + q = dpctl.get_current_queue() + except Exception: + self.fail("Encountered an exception inside get_current_queue().") + try: + aspects_base_atomics = q.get_sycl_device().has_int64_base_atomics() + except Exception: + self.fail("Encountered an exception inside has_int64_base_atomics().") + self.assertNotEqual(aspects_base_atomics, False) + + def test_has_int64_extended_atomics(self): + try: + q = dpctl.get_current_queue() + except Exception: + self.fail("Encountered an exception inside get_current_queue().") + try: + aspects_extended_atomics = q.get_sycl_device().has_int64_extended_atomics() + except Exception: + self.fail("Encountered an exception inside has_int64_extended_atomics().") + self.assertNotEqual(aspects_extended_atomics, False) + + +if __name__ == "__main__": + unittest.main() diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index fd07de1fee..772eb15042 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -28,10 +28,10 @@ import dpctl._memory as dpctl_mem import numpy as np -@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") -class Test1DKernelSubmit (unittest.TestCase): - def test_create_program_from_source (self): +@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") +class Test1DKernelSubmit(unittest.TestCase): + def test_create_program_from_source(self): oclSrc = " \ kernel void axpy(global int* a, global int* b, global int* c, int d) { \ size_t index = get_global_id(0); \ @@ -40,14 +40,14 @@ def test_create_program_from_source (self): with dpctl.device_context("opencl:gpu:0"): q = dpctl.get_current_queue() prog = dpctl.create_program_from_source(q, oclSrc) - axpyKernel = prog.get_sycl_kernel('axpy') + axpyKernel = prog.get_sycl_kernel("axpy") - abuf = dpctl_mem.MemoryUSMShared(1024*np.dtype('i').itemsize) - bbuf = dpctl_mem.MemoryUSMShared(1024*np.dtype('i').itemsize) - cbuf = dpctl_mem.MemoryUSMShared(1024*np.dtype('i').itemsize) - a = np.ndarray((1024), buffer=abuf, dtype='i') - b = np.ndarray((1024), buffer=bbuf, dtype='i') - c = np.ndarray((1024), buffer=cbuf, dtype='i') + abuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize) + bbuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize) + cbuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize) + a = np.ndarray((1024), buffer=abuf, dtype="i") + b = np.ndarray((1024), buffer=bbuf, dtype="i") + c = np.ndarray((1024), buffer=cbuf, dtype="i") a[:] = np.arange(1024) b[:] = np.arange(1024, 0, -1) c[:] = 0 @@ -62,8 +62,8 @@ def test_create_program_from_source (self): r = [1024] q.submit(axpyKernel, args, r) - self.assertTrue(np.allclose(c, a*d + b)) + self.assertTrue(np.allclose(c, a * d + b)) -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/dpctl/tests/test_sycl_program.py b/dpctl/tests/test_sycl_program.py index c59ea631ed..0e695bf6bb 100644 --- a/dpctl/tests/test_sycl_program.py +++ b/dpctl/tests/test_sycl_program.py @@ -27,10 +27,10 @@ import unittest import os -@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") -class TestProgramFromOCLSource (unittest.TestCase): - def test_create_program_from_source (self): +@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") +class TestProgramFromOCLSource(unittest.TestCase): + def test_create_program_from_source(self): oclSrc = " \ kernel void add(global int* a, global int* b, global int* c) { \ size_t index = get_global_id(0); \ @@ -48,62 +48,59 @@ def test_create_program_from_source (self): self.assertTrue(prog.has_sycl_kernel("add")) self.assertTrue(prog.has_sycl_kernel("axpy")) - addKernel = prog.get_sycl_kernel('add') - axpyKernel = prog.get_sycl_kernel('axpy') + addKernel = prog.get_sycl_kernel("add") + axpyKernel = prog.get_sycl_kernel("axpy") - self.assertEqual(addKernel.get_function_name(),"add") - self.assertEqual(axpyKernel.get_function_name(),"axpy") + self.assertEqual(addKernel.get_function_name(), "add") + self.assertEqual(axpyKernel.get_function_name(), "axpy") self.assertEqual(addKernel.get_num_args(), 3) self.assertEqual(axpyKernel.get_num_args(), 4) @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") -class TestProgramFromSPRIV (unittest.TestCase): - +class TestProgramFromSPRIV(unittest.TestCase): def test_create_program_from_spirv(self): CURR_DIR = os.path.dirname(os.path.abspath(__file__)) - spirv_file = os.path.join(CURR_DIR, 'input_files/multi_kernel.spv') - with open(spirv_file, 'rb') as fin: + spirv_file = os.path.join(CURR_DIR, "input_files/multi_kernel.spv") + with open(spirv_file, "rb") as fin: spirv = fin.read() with dpctl.device_context("opencl:gpu:0"): q = dpctl.get_current_queue() - prog = dpctl.create_program_from_spirv(q,spirv) + prog = dpctl.create_program_from_spirv(q, spirv) self.assertIsNotNone(prog) self.assertTrue(prog.has_sycl_kernel("add")) self.assertTrue(prog.has_sycl_kernel("axpy")) - addKernel = prog.get_sycl_kernel('add') - axpyKernel = prog.get_sycl_kernel('axpy') + addKernel = prog.get_sycl_kernel("add") + axpyKernel = prog.get_sycl_kernel("axpy") - self.assertEqual(addKernel.get_function_name(),"add") - self.assertEqual(axpyKernel.get_function_name(),"axpy") + self.assertEqual(addKernel.get_function_name(), "add") + self.assertEqual(axpyKernel.get_function_name(), "axpy") self.assertEqual(addKernel.get_num_args(), 3) self.assertEqual(axpyKernel.get_num_args(), 4) + @unittest.skipUnless( dpctl.has_gpu_queues(backend_ty=dpctl.backend_type.level_zero), - "No Level0 GPU queues available" + "No Level0 GPU queues available", ) -class TestProgramForLevel0GPU (unittest.TestCase): - +class TestProgramForLevel0GPU(unittest.TestCase): def test_create_program_from_spirv(self): CURR_DIR = os.path.dirname(os.path.abspath(__file__)) - spirv_file = os.path.join(CURR_DIR, 'input_files/multi_kernel.spv') - with open(spirv_file, 'rb') as fin: + spirv_file = os.path.join(CURR_DIR, "input_files/multi_kernel.spv") + with open(spirv_file, "rb") as fin: spirv = fin.read() with dpctl.device_context("level0:gpu:0"): q = dpctl.get_current_queue() try: - prog = dpctl.create_program_from_spirv(q,spirv) - self.fail( - "Tried to create program for an unsupported Level0 GPU." - ) + prog = dpctl.create_program_from_spirv(q, spirv) + self.fail("Tried to create program for an unsupported Level0 GPU.") except ValueError: pass - def test_create_program_from_source (self): + def test_create_program_from_source(self): oclSrc = " \ kernel void add(global int* a, global int* b, global int* c) { \ size_t index = get_global_id(0); \ @@ -117,12 +114,10 @@ def test_create_program_from_source (self): q = dpctl.get_current_queue() try: prog = dpctl.create_program_from_source(q, oclSrc) - self.fail( - "Tried to create program for an unsupported Level0 GPU." - ) + self.fail("Tried to create program for an unsupported Level0 GPU.") except ValueError: pass -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/dpctl/tests/test_sycl_queue.py b/dpctl/tests/test_sycl_queue.py index 92a0dafab4..317685c9f3 100644 --- a/dpctl/tests/test_sycl_queue.py +++ b/dpctl/tests/test_sycl_queue.py @@ -25,25 +25,21 @@ import dpctl import unittest -class TestSyclQueue (unittest.TestCase): - @unittest.skipUnless( - dpctl.has_gpu_queues(), "No OpenCL GPU queues available" - ) - @unittest.skipUnless( - dpctl.has_cpu_queues(), "No OpenCL CPU queues available" - ) - def test_queue_not_equals (self): + +class TestSyclQueue(unittest.TestCase): + @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") + @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available") + def test_queue_not_equals(self): with dpctl.device_context("opencl:gpu") as gpuQ0: with dpctl.device_context("opencl:cpu") as cpuQ: self.assertFalse(cpuQ.equals(gpuQ0)) - @unittest.skipUnless( - dpctl.has_gpu_queues(), "No OpenCL GPU queues available" - ) - def test_queue_equals (self): + @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") + def test_queue_equals(self): with dpctl.device_context("opencl:gpu") as gpuQ0: with dpctl.device_context("opencl:gpu") as gpuQ1: self.assertTrue(gpuQ0.equals(gpuQ1)) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/dpctl/tests/test_sycl_queue_manager.py b/dpctl/tests/test_sycl_queue_manager.py index 0427daf73f..c4b0408173 100644 --- a/dpctl/tests/test_sycl_queue_manager.py +++ b/dpctl/tests/test_sycl_queue_manager.py @@ -26,23 +26,22 @@ import unittest -class TestGetNumPlatforms (unittest.TestCase): - @unittest.skipIf(not dpctl.has_sycl_platforms(), - "No SYCL platforms available") - def test_dpctl_get_num_platforms (self): - if(dpctl.has_sycl_platforms): +class TestGetNumPlatforms(unittest.TestCase): + @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available") + def test_dpctl_get_num_platforms(self): + if dpctl.has_sycl_platforms: self.assertGreaterEqual(dpctl.get_num_platforms(), 1) @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available") -class TestDumpMethods (unittest.TestCase): - def test_dpctl_dump (self): +class TestDumpMethods(unittest.TestCase): + def test_dpctl_dump(self): try: dpctl.dump() except Exception: self.fail("Encountered an exception inside dump().") - def test_dpctl_dump_device_info (self): + def test_dpctl_dump_device_info(self): q = dpctl.get_current_queue() try: q.get_sycl_device().dump_device_info() @@ -51,25 +50,18 @@ def test_dpctl_dump_device_info (self): @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available") -class TestIsInDeviceContext (unittest.TestCase): - - def test_is_in_device_context_outside_device_ctxt (self): +class TestIsInDeviceContext(unittest.TestCase): + def test_is_in_device_context_outside_device_ctxt(self): self.assertFalse(dpctl.is_in_device_context()) - @unittest.skipUnless( - dpctl.has_gpu_queues(), "No OpenCL GPU queues available" - ) - def test_is_in_device_context_inside_device_ctxt (self): + @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") + def test_is_in_device_context_inside_device_ctxt(self): with dpctl.device_context("opencl:gpu:0"): self.assertTrue(dpctl.is_in_device_context()) - @unittest.skipUnless( - dpctl.has_gpu_queues(), "No OpenCL GPU queues available" - ) - @unittest.skipUnless( - dpctl.has_cpu_queues(), "No OpenCL CPU queues available" - ) - def test_is_in_device_context_inside_nested_device_ctxt (self): + @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") + @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available") + def test_is_in_device_context_inside_nested_device_ctxt(self): with dpctl.device_context("opencl:cpu:0"): with dpctl.device_context("opencl:gpu:0"): self.assertTrue(dpctl.is_in_device_context()) @@ -78,72 +70,52 @@ def test_is_in_device_context_inside_nested_device_ctxt (self): @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available") -class TestIsInDeviceContext (unittest.TestCase): - - def test_get_current_device_type_outside_device_ctxt (self): +class TestGetCurrentDevice(unittest.TestCase): + def test_get_current_device_type_outside_device_ctxt(self): self.assertNotEqual(dpctl.get_current_device_type(), None) - def test_get_current_device_type_inside_device_ctxt (self): + def test_get_current_device_type_inside_device_ctxt(self): self.assertNotEqual(dpctl.get_current_device_type(), None) with dpctl.device_context("opencl:gpu:0"): - self.assertEqual( - dpctl.get_current_device_type(), dpctl.device_type.gpu - ) + self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.gpu) self.assertNotEqual(dpctl.get_current_device_type(), None) - @unittest.skipUnless( - dpctl.has_cpu_queues(), "No OpenCL CPU queues available" - ) - def test_get_current_device_type_inside_nested_device_ctxt (self): + @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available") + def test_get_current_device_type_inside_nested_device_ctxt(self): self.assertNotEqual(dpctl.get_current_device_type(), None) with dpctl.device_context("opencl:cpu:0"): - self.assertEqual( - dpctl.get_current_device_type(), dpctl.device_type.cpu - ) + self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.cpu) with dpctl.device_context("opencl:gpu:0"): - self.assertEqual( - dpctl.get_current_device_type(), dpctl.device_type.gpu - ) - self.assertEqual( - dpctl.get_current_device_type(), dpctl.device_type.cpu - ) + self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.gpu) + self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.cpu) self.assertNotEqual(dpctl.get_current_device_type(), None) @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available") -class TestGetCurrentQueueInMultipleThreads (unittest.TestCase): - - def test_num_current_queues_outside_with_clause (self): +class TestGetCurrentQueueInMultipleThreads(unittest.TestCase): + def test_num_current_queues_outside_with_clause(self): self.assertEqual(dpctl.get_num_activated_queues(), 0) - @unittest.skipUnless( - dpctl.has_gpu_queues(), "No OpenCL GPU queues available" - ) - @unittest.skipUnless( - dpctl.has_cpu_queues(), "No OpenCL CPU queues available" - ) - def test_num_current_queues_inside_with_clause (self): + @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") + @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available") + def test_num_current_queues_inside_with_clause(self): with dpctl.device_context("opencl:cpu:0"): self.assertEqual(dpctl.get_num_activated_queues(), 1) with dpctl.device_context("opencl:gpu:0"): self.assertEqual(dpctl.get_num_activated_queues(), 2) self.assertEqual(dpctl.get_num_activated_queues(), 0) + @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") + @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available") + def test_num_current_queues_inside_threads(self): + from threading import Thread - @unittest.skipUnless( - dpctl.has_gpu_queues(), "No OpenCL GPU queues available" - ) - @unittest.skipUnless( - dpctl.has_cpu_queues(), "No OpenCL CPU queues available" - ) - def test_num_current_queues_inside_threads (self): - from threading import Thread, local - def SessionThread (self): + def SessionThread(self): self.assertEqual(dpctl.get_num_activated_queues(), 0) with dpctl.device_context("opencl:gpu:0"): self.assertEqual(dpctl.get_num_activated_queues(), 1) @@ -156,5 +128,5 @@ def SessionThread (self): Session2.start() -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/dpctl/tests/test_sycl_queue_memcpy.py b/dpctl/tests/test_sycl_queue_memcpy.py index 6cb95ea63e..ad4cdaf92d 100644 --- a/dpctl/tests/test_sycl_queue_memcpy.py +++ b/dpctl/tests/test_sycl_queue_memcpy.py @@ -26,16 +26,17 @@ import unittest - -class TestQueueMemcpy (unittest.TestCase): - - def _create_memory (self): +class TestQueueMemcpy(unittest.TestCase): + def _create_memory(self): nbytes = 1024 queue = dpctl.get_current_queue() mobj = dpctl._memory.MemoryUSMShared(nbytes, queue) return mobj - def test_memcpy_copy_usm_to_usm (self): + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_memcpy_copy_usm_to_usm(self): mobj1 = self._create_memory() mobj2 = self._create_memory() q = dpctl.get_current_queue() @@ -43,13 +44,16 @@ def test_memcpy_copy_usm_to_usm (self): mv1 = memoryview(mobj1) mv2 = memoryview(mobj2) - mv1[:3] = b'123' + mv1[:3] = b"123" q.memcpy(mobj2, mobj1, 3) - self.assertEqual(mv2[:3], b'123') + self.assertEqual(mv2[:3], b"123") - def test_memcpy_type_error (self): + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_memcpy_type_error(self): mobj = self._create_memory() q = dpctl.get_current_queue() @@ -66,5 +70,5 @@ def test_memcpy_type_error (self): self.assertEqual(str(cm.exception), "Parameter src should be Memory.") -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/dpctl/tests/test_sycl_usm.py b/dpctl/tests/test_sycl_usm.py index 6a1d5ddc2f..6f938028ce 100644 --- a/dpctl/tests/test_sycl_usm.py +++ b/dpctl/tests/test_sycl_usm.py @@ -27,30 +27,33 @@ from dpctl._memory import MemoryUSMShared, MemoryUSMHost, MemoryUSMDevice -class TestMemory (unittest.TestCase): - - def test_memory_create (self): +class TestMemory(unittest.TestCase): + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_memory_create(self): nbytes = 1024 queue = dpctl.get_current_queue() mobj = MemoryUSMShared(nbytes, queue) self.assertEqual(mobj.nbytes, nbytes) - def _create_memory (self): + def _create_memory(self): nbytes = 1024 queue = dpctl.get_current_queue() mobj = MemoryUSMShared(nbytes, queue) return mobj - def test_memory_without_context (self): + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_memory_without_context(self): mobj = self._create_memory() # Without context - self.assertEqual(mobj._usm_type(), 'shared') + self.assertEqual(mobj._usm_type(), "shared") - @unittest.skipUnless( - dpctl.has_cpu_queues(), "No OpenCL CPU queues available" - ) - def test_memory_cpu_context (self): + @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available") + def test_memory_cpu_context(self): mobj = self._create_memory() # CPU context @@ -58,31 +61,31 @@ def test_memory_cpu_context (self): # type respective to the context in which # memory was created usm_type = mobj._usm_type() - self.assertEqual(usm_type, 'shared') + self.assertEqual(usm_type, "shared") current_queue = dpctl.get_current_queue() # type as view from current queue usm_type = mobj._usm_type(current_queue) # type can be unknown if current queue is # not in the same SYCL context - self.assertTrue(usm_type in ['unknown', 'shared']) + self.assertTrue(usm_type in ["unknown", "shared"]) - @unittest.skipUnless( - dpctl.has_gpu_queues(), "No OpenCL GPU queues available" - ) - def test_memory_gpu_context (self): + @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available") + def test_memory_gpu_context(self): mobj = self._create_memory() # GPU context with dpctl.device_context("opencl:gpu:0"): usm_type = mobj._usm_type() - self.assertEqual(usm_type, 'shared') + self.assertEqual(usm_type, "shared") current_queue = dpctl.get_current_queue() usm_type = mobj._usm_type(current_queue) - self.assertTrue(usm_type in ['unknown', 'shared']) - + self.assertTrue(usm_type in ["unknown", "shared"]) - def test_buffer_protocol (self): + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_buffer_protocol(self): mobj = self._create_memory() mv1 = memoryview(mobj) mv2 = memoryview(mobj) @@ -95,13 +98,19 @@ class TestMemoryUSMBase: MemoryUSMClass = None usm_type = None - def test_create_with_queue (self): + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_create_with_queue(self): q = dpctl.get_current_queue() m = self.MemoryUSMClass(1024, q) self.assertEqual(m.nbytes, 1024) self.assertEqual(m._usm_type(), self.usm_type) - def test_create_without_queue (self): + @unittest.skipUnless( + dpctl.has_sycl_platforms(), "No SYCL devices except the default host device." + ) + def test_create_without_queue(self): m = self.MemoryUSMClass(1024) self.assertEqual(m.nbytes, 1024) self.assertEqual(m._usm_type(), self.usm_type) @@ -111,22 +120,22 @@ class TestMemoryUSMShared(TestMemoryUSMBase, unittest.TestCase): """ Tests for MemoryUSMShared """ MemoryUSMClass = MemoryUSMShared - usm_type = 'shared' + usm_type = "shared" class TestMemoryUSMHost(TestMemoryUSMBase, unittest.TestCase): """ Tests for MemoryUSMHost """ MemoryUSMClass = MemoryUSMHost - usm_type = 'host' + usm_type = "host" class TestMemoryUSMDevice(TestMemoryUSMBase, unittest.TestCase): """ Tests for MemoryUSMDevice """ MemoryUSMClass = MemoryUSMDevice - usm_type = 'device' + usm_type = "device" -if __name__ == '__main__': +if __name__ == "__main__": unittest.main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..2b233b61c9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,2 @@ +[tool.black] +exclude = 'versioneer.py' diff --git a/scripts/build_for_develop.sh b/scripts/build_for_develop.sh index 3d54402846..5144312eee 100755 --- a/scripts/build_for_develop.sh +++ b/scripts/build_for_develop.sh @@ -34,10 +34,6 @@ cp install/lib/*.so dpctl/ mkdir -p dpctl/include cp -r backends/include/* dpctl/include -export DPPL_OPENCL_INTERFACE_LIBDIR=dpctl -export DPPL_OPENCL_INTERFACE_INCLDIR=dpctl/include -# /usr/lib/x86_64-linux-gnu/ -export OpenCL_LIBDIR=${DPCPP_ROOT}/lib export DPPL_SYCL_INTERFACE_LIBDIR=dpctl export DPPL_SYCL_INTERFACE_INCLDIR=dpctl/include diff --git a/setup.py b/setup.py index 51551e099d..d4634e700a 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ ##===---------- setup.py - dpctl.ocldrv interface -----*- Python -*-----===## ## -## Python Data Parallel Processing Library (PyDPPL) +## Data Parallel Control Library (dpCtl) ## ## Copyright 2020 Intel Corporation ## @@ -32,51 +32,61 @@ import numpy as np requirements = [ - 'cffi>=1.0.0', - 'cython', + "cython", ] IS_WIN = False IS_MAC = False IS_LIN = False -if 'linux' in sys.platform: +if "linux" in sys.platform: IS_LIN = True -elif sys.platform == 'darwin': +elif sys.platform == "darwin": IS_MAC = True -elif sys.platform in ['win32', 'cygwin']: +elif sys.platform in ["win32", "cygwin"]: IS_WIN = True else: - assert False, sys.platform + ' not supported' + assert False, sys.platform + " not supported" + +dppl_sycl_interface_lib = os.environ["DPPL_SYCL_INTERFACE_LIBDIR"] +dppl_sycl_interface_include = os.environ["DPPL_SYCL_INTERFACE_INCLDIR"] +sycl_lib = os.environ["ONEAPI_ROOT"] + "\compiler\latest\windows\lib" -dppl_sycl_interface_lib = os.environ['DPPL_SYCL_INTERFACE_LIBDIR'] -dppl_sycl_interface_include = os.environ['DPPL_SYCL_INTERFACE_INCLDIR'] -sycl_lib = os.environ['ONEAPI_ROOT']+"\compiler\latest\windows\lib" def get_sdl_cflags(): if IS_LIN or IS_MAC: - return ['-fstack-protector', '-fPIC', - '-D_FORTIFY_SOURCE=2', '-Wformat', '-Wformat-security',] + return [ + "-fstack-protector", + "-fPIC", + "-D_FORTIFY_SOURCE=2", + "-Wformat", + "-Wformat-security", + ] elif IS_WIN: return [] + def get_sdl_ldflags(): if IS_LIN: - return ['-Wl,-z,noexecstack,-z,relro,-z,now',] + return [ + "-Wl,-z,noexecstack,-z,relro,-z,now", + ] elif IS_MAC: return [] elif IS_WIN: - return ['/NXCompat', '/DynamicBase'] + return ["/NXCompat", "/DynamicBase"] + def get_other_cxxflags(): if IS_LIN: - return ['-O3', '-fsycl', '-std=c++17'] + return ["-O3", "-fsycl", "-std=c++17"] elif IS_MAC: return [] elif IS_WIN: # FIXME: These are specific to MSVC and we should first make sure # what compiler we are using. - return ['/Ox', '/std:c++17'] + return ["/Ox", "/std:c++17"] + def extensions(): # Security flags @@ -86,11 +96,11 @@ def extensions(): librarys = [] if IS_LIN: - libs += ['rt', 'DPPLSyclInterface'] + libs += ["rt", "DPPLSyclInterface"] elif IS_MAC: pass elif IS_WIN: - libs += ['DPPLSyclInterface', 'sycl'] + libs += ["DPPLSyclInterface", "sycl"] if IS_LIN: librarys = [dppl_sycl_interface_lib] @@ -105,47 +115,57 @@ def extensions(): runtime_library_dirs = [] extension_args = { - "depends": [dppl_sycl_interface_include,], + "depends": [ + dppl_sycl_interface_include, + ], "include_dirs": [np.get_include(), dppl_sycl_interface_include], "extra_compile_args": eca + get_other_cxxflags(), "extra_link_args": ela, "libraries": libs, "library_dirs": librarys, "runtime_library_dirs": runtime_library_dirs, - "language": 'c++', + "language": "c++", } extensions = [ - Extension('dpctl._sycl_core', [os.path.join('dpctl', 'sycl_core.pyx'),], - **extension_args), - Extension('dpctl._memory', [os.path.join('dpctl', '_memory.pyx'),], - **extension_args), + Extension( + "dpctl._sycl_core", + [ + os.path.join("dpctl", "_sycl_core.pyx"), + ], + **extension_args + ), + Extension( + "dpctl._memory", + [ + os.path.join("dpctl", "_memory.pyx"), + ], + **extension_args + ), ] exts = cythonize(extensions) return exts + setup( - name='dpctl', + name="dpctl", version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), description="A lightweight Python wrapper for a subset of OpenCL and SYCL.", license="Apache 2.0", author="Intel Corporation", - url='https://github.com/IntelPython/dpCtl', + url="https://github.com/IntelPython/dpCtl", packages=find_packages(include=["*"]), include_package_data=True, - ext_modules = extensions(), + ext_modules=extensions(), setup_requires=requirements, - cffi_modules=[ - "./dpctl/opencl_core.py:ffi" - ], install_requires=requirements, - keywords='dpctl', + keywords="dpctl", classifiers=[ "Development Status :: 3 - Alpha", - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - ] + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + ], )