diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000000..1381b1e63d
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,4 @@
+# $ git config blame.ignoreRevsFile .git-blame-ignore-revs
+
+# Migrate code style to Black
+41ccd65e2e659aa0add0e5ab59f1a46e32cc4c46
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
new file mode 100644
index 0000000000..be581fd2f3
--- /dev/null
+++ b/.github/workflows/black.yml
@@ -0,0 +1,28 @@
+# This is a workflow to format Python code with black formatter
+
+name: black
+
+# Controls when the action will run. Triggers the workflow on push or pull request
+# events but only for the master branch
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  # This workflow contains a single job called "black"
+  black:
+    # The type of runner that the job will run on
+    runs-on: ubuntu-latest
+
+    # Steps represent a sequence of tasks that will be executed as part of the job
+    steps:
+      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+      - uses: actions/checkout@v2
+      # Set up a Python environment for use in actions
+      - uses: actions/setup-python@v2
+
+      # Run black code formatter
+      - uses: psf/black@stable
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000000..e15d41f23c
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,75 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+## [Unreleased]
+### Added
+- Device descriptors "max_compute_units", "max_work_item_dimensions", "max_work_item_sizes", "max_work_group_size", "max_num_sub_groups" and "aspects" for int64 atomics inside dpctl C API and inside the dpctl.SyclDevice class.
+
+### Removed
+- The Legacy OpenCL interface.
+
+## [0.3.6] - 2020-10-06
+### Added
+- Changelog was added for dpctl.
+
+### Fixed
+- Windows build was fixed.
+
+## [0.3.5] - 2020-10-06
+### Added
+- Add a helper function to all Python SyclXXX classes to get the address of the base C API pointer as a long.
+
+### Changed
+- Rename PyDPPL to dpCtl in comments (function name renaming to come later)
+
+### Fixed
+- Fix bugs highlighted by tools.
+- Various code clean ups.
+
+## [0.3.4] - 2020-10-05
+### Added
+- Dump functions were enhanced to print back-end information.
+- dpctl gained support for unint_8 and unsigned long data types.
+- oneAPI Beta 10 tool chain support was added.
+
+### Changed
+- dpctl is now aware of DPC++ Sycl PI back-ends. The functionality is now exposed via the context interface.
+- C API's queue manager was refactored to require back-end.
+- dpct's device_context now requires back-end, device-type, and device-id to be provided in a string format, e.g. opencl:gpu:0.
+
+### Fixed
+- Fixed some important bugs found by static analysis.
+
+## [0.3.3] - 2020-10-02
+### Added
+- Add dpctl.get_curent_device_type().
+
+## [0.3.2] - 2020-09-29
+### Changed
+- Set _cpu_device and _gpu_device to None by default.
+
+## [0.3.1] - 2020-09-28
+### Added
+- Add get include and include headers.
+
+### Changed
+- DPPL shared objects are installed into dpctl.
+
+### Fixed
+- Refactor unit tests.
+
+## [0.3.0] - 2020-09-23
+### Added
+- Adds C and Cython API for portions of Sycl queue, device, context interfaces.
+- Implementing USM memory management.
+
+### Changed
+- Refactored API to expose a minimal sycl::queue interface.
+- Modify cpu_queues, gpu_queues and active_queues to functions.
+- Change static vectors to static pointers to verctors. It disables call for destructors. Destructors are also call in undefined order.
+- Rename package PyDPPL to dpCtl.
+- Use dpcpp.exe on Windows instead of dpcpp-cl.exe deleted in oneAPI beta08.
+
+### Fixed
+- Correct use ERRORLEVEL in conda scripts for Windows.
+- Fix using dppl.has_sycl_platforms() and dppl.has_gpu_queues() functions in skipIf
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ee9a161273..c3076bd5e4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,3 +1,10 @@
-Diptorup Deb
+# Python code style
 
-Todd Anderson
\ No newline at end of file
+## black
+
+We use [black](https://black.readthedocs.io/en/stable/) code formatter.
+
+- Revision: `20.8b1` or branch `stable`.
+- See configuration in `pyproject.toml`.
+
+Run before each commit: `black .`
diff --git a/README.md b/README.md
index 1891b80c33..fd54be2f92 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,8 @@
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+
 What?
 ====
-A lightweight Python package exposing a subset of OpenCL and SYCL
-functionalities.
+A lightweight Python package exposing a subset of SYCL functionalities.
 
 Requirements
 ============
@@ -27,14 +28,17 @@ On Windows to cope with [long file names](https://github.com/IntelPython/dpctl/i
 ```cmd
 conda build --croot=C:/tmp conda-recipe
 ```
+
+:warning: **You could face issues with conda-build=3.20**: Use conda-build=3.18!
+
 3. Install conda package
 ```bash
 conda install dpctl
 ```
 
-Using PyDPPL
-============
-PyDPPL relies on SYCL runtime. With Intel oneAPI installed you should activate it.
+Using dpCtl
+===========
+dpCtl relies on DPC++ runtime. With Intel oneAPI installed you should activate it.
 
 On Windows:
 ```cmd
diff --git a/backends/CMakeLists.txt b/backends/CMakeLists.txt
index 373f0c8f2b..a8d02185f2 100644
--- a/backends/CMakeLists.txt
+++ b/backends/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.3.2 FATAL_ERROR)
-project("PyDPPL - A lightweight SYCL wrapper for Python")
+project("dpCtl - A lightweight SYCL wrapper for Python")
 
 # The function checks is DPCPP_ROOT is valid and points to a dpcpp installation
 function (check_for_dpcpp)
@@ -76,23 +76,14 @@ if(WIN32)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qstd=c++17")
     set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -ggdb3 -DDEBUG ")
 elseif(UNIX)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}                                        \
-       -Wall -Wextra -Winit-self -Wuninitialized -Wmissing-declarations        \
-       -fdiagnostics-color=auto -O3                                            \
-    ")
+    set(SDL_FLAGS "-fstack-protector -fstack-protector-all -fpic -fPIC -D_FORTIFY_SOURCE=2 -Wformat -Wformat-security -fno-strict-overflow -fno-delete-null-pointer-checks")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SDL_FLAGS} -Wall -Wextra -Winit-self -Wuninitialized -Wmissing-declarations -fdiagnostics-color=auto")
     set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -ggdb3 -DDEBUG ")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -fsycl")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SDL_FLAGS} -std=c++17 -fsycl")
 else()
     message(FATAL_ERROR "Unsupported system.")
 endif()
 
-
-set(OpenCL_INCLUDE_DIR "${DPCPP_ROOT}/include/sycl")
-set(OpenCL_LIBRARY "${DPCPP_ROOT}/lib/libOpenCL.so")
-
-message(STATUS "OpenCL_INCLUDE_DIR: ${OpenCL_INCLUDE_DIR}")
-message(STATUS "OpenCL_LIBRARY: ${OpenCL_LIBRARY}")
-
 add_library(
   DPPLSyclInterface
   SHARED
@@ -108,13 +99,6 @@ add_library(
   source/dppl_utils.cpp
 )
 
-# Install DPPLOpenCLInterface
-add_library(
-  DPPLOpenCLInterface
-  SHARED
-  source/dppl_opencl_interface.c
-)
-
 # Install DPPLSyclInterface
 target_include_directories(
   DPPLSyclInterface
@@ -124,18 +108,6 @@ target_include_directories(
   ${NUMPY_INCLUDE_DIR}
 )
 
-target_include_directories(
-    DPPLOpenCLInterface
-    PRIVATE
-    ${CMAKE_SOURCE_DIR}/include/
-)
-
-target_include_directories(
-    DPPLOpenCLInterface
-    PUBLIC
-    ${OpenCL_INCLUDE_DIR}
-)
-
 if(WIN32)
     message(
         STATUS
@@ -152,10 +124,6 @@ if(WIN32)
         PRIVATE ${DPCPP_ROOT}/lib/sycl.lib
         PRIVATE ${DPCPP_ROOT}/lib/OpenCL.lib
     )
-    target_link_libraries(
-        DPPLOpenCLInterface
-        PRIVATE ${DPCPP_ROOT}/lib/OpenCL.lib
-    )
 endif()
 
 install(
@@ -165,14 +133,6 @@ install(
   "${CMAKE_INSTALL_PREFIX}/lib/"
 )
 
-install(
-  TARGETS
-  DPPLOpenCLInterface
-  LIBRARY
-  DESTINATION
-  "${CMAKE_INSTALL_PREFIX}/lib/"
-)
-
 # Install all headers
 file(GLOB HEADERS "${CMAKE_SOURCE_DIR}/include/*.h*")
 foreach(HEADER ${HEADERS})
@@ -185,7 +145,7 @@ foreach(HEADER ${HEADERS})
   install(FILES "${HEADER}" DESTINATION include/Support)
 endforeach()
 
-# Enable to build the PyDPPL backend test cases
+# Enable to build the dpCtl backend test cases
 add_subdirectory(tests)
 
 # Todo : Add build rules for doxygen
diff --git a/backends/include/Support/CBindingWrapping.h b/backends/include/Support/CBindingWrapping.h
index 40ef4027b2..4c77612269 100644
--- a/backends/include/Support/CBindingWrapping.h
+++ b/backends/include/Support/CBindingWrapping.h
@@ -1,6 +1,6 @@
 //===----- Support/CBindingWrapping.h - DPPL-SYCL interface --*-- C ---*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/Support/DllExport.h b/backends/include/Support/DllExport.h
index 28643f3195..de82311320 100644
--- a/backends/include/Support/DllExport.h
+++ b/backends/include/Support/DllExport.h
@@ -1,6 +1,6 @@
 //===---------- Support/DllExport.h - DPPL-SYCL interface ---*--- C ---*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/Support/ExternC.h b/backends/include/Support/ExternC.h
index 60b91de0dd..76baef7871 100644
--- a/backends/include/Support/ExternC.h
+++ b/backends/include/Support/ExternC.h
@@ -1,6 +1,6 @@
 //===------------ Support/ExternC.h - DPPL-SYCL interface ---*--- C ---*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/Support/MemOwnershipAttrs.h b/backends/include/Support/MemOwnershipAttrs.h
index 6f340036ac..f0579fdd46 100644
--- a/backends/include/Support/MemOwnershipAttrs.h
+++ b/backends/include/Support/MemOwnershipAttrs.h
@@ -1,6 +1,6 @@
 //===----- dppl_mem_ownership_attrs.h - DPPL-SYCL interface --*-- C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/dppl_data_types.h b/backends/include/dppl_data_types.h
index 3da14eb5b5..4427b3f893 100644
--- a/backends/include/dppl_data_types.h
+++ b/backends/include/dppl_data_types.h
@@ -1,6 +1,6 @@
-//===---------- dppl_data_types.h - DPPL-SYCL interface ----*---- C ---*---===//
+//===------------------ dppl_data_types.h - dpctl-C_API ----*---- C ---*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/dppl_opencl_interface.h b/backends/include/dppl_opencl_interface.h
deleted file mode 100644
index 52e03ed7de..0000000000
--- a/backends/include/dppl_opencl_interface.h
+++ /dev/null
@@ -1,312 +0,0 @@
-//===-- dppl_opencl_interface.h - DPPL-OpenCL interface -------*- C -*-----===//
-//
-//               Python Data Parallel Processing Library (PyDPPL)
-//
-// Copyright 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains the declaration of a C API to expose a lightweight OpenCL
-/// interface for the Python dpctl package.
-///
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include <stdbool.h>
-#include <stdlib.h>
-
-
-#ifdef _WIN32
-#    ifdef DPPLOpenCLInterface_EXPORTS
-#        define DPPL_API __declspec(dllexport)
-#    else
-#        define DPPL_API __declspec(dllimport)
-#    endif
-#else
-#    define DPPL_API
-#endif
-
-
-enum DP_GLUE_ERROR_CODES
-{
-    DP_GLUE_SUCCESS = 0,
-    DP_GLUE_FAILURE = -1
-};
-
-
-/*!
- *
- */
-struct dp_env
-{
-    unsigned id_;
-    // TODO : Add members to store more device related information such as name
-    void *context;
-    void *device;
-    void *queue;
-    unsigned int max_work_item_dims;
-    size_t max_work_group_size;
-    int support_int64_atomics;
-    int support_float64_atomics;
-    int (*dump_fn) (void *);
-};
-
-typedef struct dp_env* env_t;
-
-
-struct dp_buffer
-{
-    unsigned id_;
-    // This may, for example, be a cl_mem pointer
-    void *buffer_ptr;
-    // Stores the size of the buffer_ptr (e.g sizeof(cl_mem))
-    size_t sizeof_buffer_ptr;
-};
-
-typedef struct dp_buffer* buffer_t;
-
-
-struct dp_kernel
-{
-    unsigned id_;
-    void *kernel;
-    int (*dump_fn) (void *);
-};
-
-typedef struct dp_kernel* kernel_t;
-
-
-struct dp_program
-{
-    unsigned id_;
-    void *program;
-};
-
-typedef struct dp_program* program_t;
-
-
-struct dp_kernel_arg
-{
-    unsigned id_;
-    const void *arg_value;
-    size_t arg_size;
-};
-
-typedef struct dp_kernel_arg* kernel_arg_t;
-
-
-/*! @struct dp_runtime_t
- *  @brief Stores an array of the available OpenCL or Level-0 platform/drivers.
- *
- *  @var dp_runtime_t::num_platforms
- *  Depicts the number of platforms/drivers available on this system
- *
- *  @var dp_runtime_t::platforms_ids
- *  An array of OpenCL platforms.
- *
- */
-struct dp_runtime
-{
-    unsigned id_;
-    unsigned num_platforms;
-    void *platform_ids;
-    bool has_cpu;
-    bool has_gpu;
-    env_t first_cpu_env;
-    env_t first_gpu_env;
-    env_t curr_env;
-    int (*dump_fn) (void *);
-};
-
-typedef struct dp_runtime* runtime_t;
-
-DPPL_API
-int set_curr_env (runtime_t rt, env_t env);
-
-/*!
- * @brief Initializes a new dp_runtime_t object
- *
- * @param[in/out] rt - An uninitialized runtime_t pointer that is initialized
- *                     by the function.
- *
- * @return An error code indicating if the runtime_t object was successfully
- *         initialized.
- */
-DPPL_API
-int create_dp_runtime (runtime_t *rt);
-
-
-/*!
- * @brief Free the runtime and all its resources.
- *
- * @param[in] rt - Pointer to the numba_one_api_runtime_t object to be freed
- *
- * @return An error code indicating if resource freeing was successful.
- */
-DPPL_API
-int destroy_dp_runtime (runtime_t *rt);
-
-
-/*!
- *
- */
-DPPL_API
-int create_dp_rw_mem_buffer (env_t env_t_ptr, size_t buffsize, buffer_t *buff);
-
-
-DPPL_API
-int destroy_dp_rw_mem_buffer (buffer_t *buff);
-
-
-/*!
- *
- */
-DPPL_API
-int write_dp_mem_buffer_to_device (env_t env_t_ptr,
-                                   buffer_t buff,
-                                   bool blocking_copy,
-                                   size_t offset,
-                                   size_t buffersize,
-                                   const void *data_ptr);
-
-
-/*!
- *
- */
-DPPL_API
-int read_dp_mem_buffer_from_device (env_t env_t_ptr,
-                                    buffer_t buff,
-                                    bool blocking_copy,
-                                    size_t offset,
-                                    size_t buffersize,
-                                    void *data_ptr);
-
-
-/*!
- *
- */
-DPPL_API
-int create_dp_program_from_spirv (env_t env_t_ptr,
-                                  const void *il,
-                                  size_t length,
-                                  program_t *program_t_ptr);
-
-
-/*!
- *
- */
-DPPL_API
-int create_dp_program_from_source (env_t env_t_ptr,
-                                   unsigned int count,
-                                   const char **strings,
-                                   const size_t *lengths,
-                                   program_t *program_t_ptr);
-
-/*!
- *
- */
-DPPL_API
-int destroy_dp_program (program_t *program_t_ptr);
-
-
-DPPL_API
-int build_dp_program (env_t env_t_ptr, program_t program_t_ptr);
-
-/*!
- *
- */
-DPPL_API
-int create_dp_kernel (env_t env_t_ptr,
-                      program_t program_ptr,
-                      const char *kernel_name,
-                      kernel_t *kernel_ptr);
-
-
-DPPL_API
-int destroy_dp_kernel (kernel_t *kernel_ptr);
-
-
-/*!
- *
- */
-DPPL_API
-int create_dp_kernel_arg (const void *arg_value,
-                          size_t arg_size,
-                          kernel_arg_t *kernel_arg_t_ptr);
-
-
-/*!
- *
- */
-DPPL_API
-int create_dp_kernel_arg_from_buffer (buffer_t *buffer_t_ptr,
-                                      kernel_arg_t *kernel_arg_t_ptr);
-
-
-/*!
- *
- */
-DPPL_API
-int destroy_dp_kernel_arg (kernel_arg_t *kernel_arg_t_ptr);
-
-
-/*!
- *
- */
-DPPL_API
-int set_args_and_enqueue_dp_kernel (env_t env_t_ptr,
-                                    kernel_t kernel_t_ptr,
-                                    size_t nargs,
-                                    const kernel_arg_t *args,
-                                    unsigned int work_dim,
-                                    const size_t *global_work_offset,
-                                    const size_t *global_work_size,
-                                    const size_t *local_work_size);
-
-
-/*!
- *
- */
-DPPL_API
-int set_args_and_enqueue_dp_kernel_auto_blocking (env_t env_t_ptr,
-                                                  kernel_t kernel_t_ptr,
-                                                  size_t nargs,
-                                                  const kernel_arg_t *args,
-                                                  unsigned int num_dims,
-                                                  size_t *dim_starts,
-                                                  size_t *dim_stops);
-
-
-/*!
- *
- */
-DPPL_API
-int retain_dp_context (env_t env_t_ptr);
-
-
-/*!
- *
- */
-DPPL_API
-int release_dp_context (env_t env_t_ptr);
-
-
-//---- TODO:
-
-// 1. Add release/retain methods for buffers
-
-//---------
diff --git a/backends/include/dppl_sycl_context_interface.h b/backends/include/dppl_sycl_context_interface.h
index 39905f6267..bb0226225f 100644
--- a/backends/include/dppl_sycl_context_interface.h
+++ b/backends/include/dppl_sycl_context_interface.h
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_context_interface.h - DPPL-SYCL interface --*--C++ --*--===//
+//===----------- dppl_sycl_context_interface.h - dpctl-C_API --*--C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -57,13 +57,14 @@ DPPL_API
 bool DPPLContext_IsHost (__dppl_keep const DPPLSyclContextRef CtxRef);
 
 /*!
- * @brief
+ * @brief Returns the sycl backend for the DPPLSyclContextRef pointer.
  *
  * @param    CtxRef         An opaque pointer to a sycl::context.
- * @return   {return}       My Param doc
+ * @return   The sycl backend for the DPPLSyclContextRef returned as 
+ * a DPPLSyclBackendType enum type.
  */
 DPPL_API
-DPPLSyclBEType
+DPPLSyclBackendType
 DPPLContext_GetBackend (__dppl_keep const DPPLSyclContextRef CtxRef);
 
 /*!
diff --git a/backends/include/dppl_sycl_device_interface.h b/backends/include/dppl_sycl_device_interface.h
index f1619ffcae..f677c642fc 100644
--- a/backends/include/dppl_sycl_device_interface.h
+++ b/backends/include/dppl_sycl_device_interface.h
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_device_interface.h - DPPL-SYCL interface ---*---C++ -*---===//
+//===---------- dppl_sycl_device_interface.h - dpctl-C_API ---*---C++ -*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -110,10 +110,10 @@ DPPLDevice_GetDriverInfo (__dppl_keep const DPPLSyclDeviceRef DRef);
  */
 DPPL_API
 uint32_t
-DPPLDevice_GetMaxComputeUnites (__dppl_keep const DPPLSyclDeviceRef DRef);
+DPPLDevice_GetMaxComputeUnits (__dppl_keep const DPPLSyclDeviceRef DRef);
 
 /*!
- * @brief Wrapper for get_info<info::device::max_work_item_dimensions().
+ * @brief Wrapper for get_info<info::device::max_work_item_dimensions>().
  *
  * @param    DRef           Opaque pointer to a sycl::device
  * @return   Returns the valid result if device exists else returns 0.
@@ -123,17 +123,17 @@ uint32_t
 DPPLDevice_GetMaxWorkItemDims (__dppl_keep const DPPLSyclDeviceRef DRef);
 
 /*!
- * @brief Wrapper for get_info<info::device::max_work_item_sizes().
+ * @brief Wrapper for get_info<info::device::max_work_item_sizes>().
  *
  * @param    DRef           Opaque pointer to a sycl::device
- * @return   Returns the valid result if device exists else returns Null.
+ * @return   Returns the valid result if device exists else returns NULL.
  */
 DPPL_API
 __dppl_keep size_t*
 DPPLDevice_GetMaxWorkItemSizes (__dppl_keep const DPPLSyclDeviceRef DRef);
 
 /*!
- * @brief Wrapper for get_info<info::device::max_work_group_size().
+ * @brief Wrapper for get_info<info::device::max_work_group_size>().
  *
  * @param    DRef           Opaque pointer to a sycl::device
  * @return   Returns the valid result if device exists else returns 0.
@@ -152,6 +152,26 @@ DPPL_API
 uint32_t
 DPPLDevice_GetMaxNumSubGroups (__dppl_keep const DPPLSyclDeviceRef DRef);
 
+/*!
+ * @brief Wrapper over device.get_info<info::device::aspect::int64_base_atomics>.
+ *
+ * @param    DRef           Opaque pointer to a sycl::device
+ * @return   Returns true if device has int64_base_atomics else returns false.
+ */
+DPPL_API
+bool
+DPPLDevice_HasInt64BaseAtomics (__dppl_keep const DPPLSyclDeviceRef DRef);
+
+/*!
+ * @brief Wrapper over device.get_info<info::device::aspect::int64_extended_atomics>.
+ *
+ * @param    DRef           Opaque pointer to a sycl::device
+ * @return   Returns true if device has int64_extended_atomics else returns false.
+ */
+DPPL_API
+bool
+DPPLDevice_HasInt64ExtendedAtomics (__dppl_keep const DPPLSyclDeviceRef DRef);
+
 /*!
  * @brief Returns a C string for the device name.
  *
diff --git a/backends/include/dppl_sycl_enum_types.h b/backends/include/dppl_sycl_enum_types.h
index 2a787f995d..0d8d73f091 100644
--- a/backends/include/dppl_sycl_enum_types.h
+++ b/backends/include/dppl_sycl_enum_types.h
@@ -35,7 +35,7 @@ DPPL_C_EXTERN_C_BEGIN
  * @brief Redefinition of DPC++-specific Sycl backend types.
  *
  */
-enum DPPLSyclBEType
+enum DPPLSyclBackendType
 {
     DPPL_UNKNOWN_BACKEND = 0x0,
     DPPL_OPENCL          = 1 << 16,
@@ -57,7 +57,7 @@ enum DPPLSyclDeviceType
     DPPL_AUTOMATIC   = 1 << 4,
     DPPL_HOST_DEVICE = 1 << 5,
     DPPL_ALL         = 1 << 6
-    // IMP: before adding new values here look at DPPLSyclBEType enum. The
+    // IMP: before adding new values here look at DPPLSyclBackendType enum. The
     // values should not overlap.
 };
 
@@ -76,6 +76,7 @@ typedef enum
     DPPL_SHORT,
     DPPL_INT,
     DPPL_UNSIGNED_INT,
+    DPPL_UNSIGNED_INT8,
     DPPL_LONG,
     DPPL_UNSIGNED_LONG,
     DPPL_LONG_LONG,
diff --git a/backends/include/dppl_sycl_event_interface.h b/backends/include/dppl_sycl_event_interface.h
index c97eaf08f3..98bc9dae08 100644
--- a/backends/include/dppl_sycl_event_interface.h
+++ b/backends/include/dppl_sycl_event_interface.h
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_event_interface.h - DPPL-SYCL interface ---*---C++ -*---===//
+//===----------- dppl_sycl_event_interface.h - dpctl-C_API ---*---C++ -*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/dppl_sycl_kernel_interface.h b/backends/include/dppl_sycl_kernel_interface.h
index 64d2f97a30..a48064cf83 100644
--- a/backends/include/dppl_sycl_kernel_interface.h
+++ b/backends/include/dppl_sycl_kernel_interface.h
@@ -1,6 +1,6 @@
-//===---- dppl_sycl_kernel_interface.h - DPPL-SYCL interface --*--C++ --*--===//
+//===------------ dppl_sycl_kernel_interface.h - dpctl-C_API --*--C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/dppl_sycl_platform_interface.h b/backends/include/dppl_sycl_platform_interface.h
index 334bd08fcf..b3df1b5c56 100644
--- a/backends/include/dppl_sycl_platform_interface.h
+++ b/backends/include/dppl_sycl_platform_interface.h
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_platform_interface.h - DPPL-SYCL interface ---*--C++ -*-===//
+//===----------- dppl_sycl_platform_interface.h - dpctl-C_API ---*--C++ -*-===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -34,37 +34,38 @@
 DPPL_C_EXTERN_C_BEGIN
 
 /*!
- * @brief Returns the number of sycl::platform available on the system.
+ * @brief Returns the number of non-host type sycl::platform available on the
+ * system.
  *
  * @return The number of available sycl::platforms.
  */
 DPPL_API
-size_t DPPLPlatform_GetNumPlatforms ();
+size_t DPPLPlatform_GetNumNonHostPlatforms ();
 
 /*!
- * @brief Returns the number of unique sycl backends on the system not counting
- * the host backend.
+ * @brief Returns the number of unique non-host sycl backends on the system.
  *
  * @return   The number of unique sycl backends.
  */
 DPPL_API
-size_t DPPLPlatform_GetNumBackends ();
+size_t DPPLPlatform_GetNumNonHostBackends ();
 
 /*!
- * @brief Returns an array of the unique DPPLSyclBEType values on the system.
+ * @brief Returns an array of the unique non-host DPPLSyclBackendType values on
+ * the system.
  *
- * @return   An array of DPPLSyclBEType enum values.
+ * @return   An array of DPPLSyclBackendType enum values.
  */
 DPPL_API
-__dppl_give enum DPPLSyclBEType* DPPLPlatform_GetListOfBackends ();
+__dppl_give DPPLSyclBackendType* DPPLPlatform_GetListOfNonHostBackends ();
 
 /*!
- * @brief Frees an array of DPPLSyclBEType enum values.
+ * @brief Frees an array of DPPLSyclBackendType enum values.
  *
- * @param    BEArr          An array of DPPLSyclBEType enum values to be freed.
+ * @param    BEArr      An array of DPPLSyclBackendType enum values to be freed.
  */
 DPPL_API
-void DPPLPlatform_DeleteListOfBackends (__dppl_take enum DPPLSyclBEType* BEArr);
+void DPPLPlatform_DeleteListOfBackends (__dppl_take DPPLSyclBackendType* BEArr);
 
 /*!
  * @brief Prints out some selected info about all sycl::platform on the system.
diff --git a/backends/include/dppl_sycl_program_interface.h b/backends/include/dppl_sycl_program_interface.h
index 952156ff79..28056f49bc 100644
--- a/backends/include/dppl_sycl_program_interface.h
+++ b/backends/include/dppl_sycl_program_interface.h
@@ -1,6 +1,6 @@
-//===---- dppl_sycl_program_interface.h - DPPL-SYCL interface --*--C++ --*--===//
+//===----------- dppl_sycl_program_interface.h - dpctl-C_API --*--C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/dppl_sycl_queue_interface.h b/backends/include/dppl_sycl_queue_interface.h
index 2fc9523bc9..5ba2011907 100644
--- a/backends/include/dppl_sycl_queue_interface.h
+++ b/backends/include/dppl_sycl_queue_interface.h
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_queue_interface.h - DPPL-SYCL interface ---*---C++ -*---===//
+//===----------- dppl_sycl_queue_interface.h - dpctl-C_API ---*---C++ -*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -59,10 +59,11 @@ bool DPPLQueue_AreEq (__dppl_keep const DPPLSyclQueueRef QRef1,
  * @brief Returns the Sycl backend for the provided sycl::queue.
  *
  * @param    QRef           An opaque pointer to the sycl queue.
- * @return   A enum DPPLSyclBEType corresponding to the backed for the queue.
+ * @return   A enum DPPLSyclBackendType corresponding to the backed for the
+ * queue.
  */
 DPPL_API
-enum DPPLSyclBEType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef);
+DPPLSyclBackendType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef);
 
 /*!
  * @brief Returns the Sycl context for the queue.
diff --git a/backends/include/dppl_sycl_queue_manager.h b/backends/include/dppl_sycl_queue_manager.h
index 3afa2d3f5a..73822146d4 100644
--- a/backends/include/dppl_sycl_queue_manager.h
+++ b/backends/include/dppl_sycl_queue_manager.h
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_queue_manager.h - DPPL-SYCL interface ---*---C++ ---*---===//
+//===----------- dppl_sycl_queue_manager.h - dpctl-C_API ---*---C++ ---*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -65,8 +65,8 @@ __dppl_give DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue ();
  */
 DPPL_API
 __dppl_give DPPLSyclQueueRef
-DPPLQueueMgr_GetQueue (enum DPPLSyclBEType BETy,
-                       enum DPPLSyclDeviceType DeviceTy,
+DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy,
+                       DPPLSyclDeviceType DeviceTy,
                        size_t DNum);
 
 /*!
@@ -78,7 +78,6 @@ DPPLQueueMgr_GetQueue (enum DPPLSyclBEType BETy,
 DPPL_API
 size_t DPPLQueueMgr_GetNumActivatedQueues ();
 
-
 /*!
  * @brief Get the number of available queues for given backend and device type
  * combination.
@@ -88,8 +87,8 @@ size_t DPPLQueueMgr_GetNumActivatedQueues ();
  * @return   The number of available queues.
  */
 DPPL_API
-size_t DPPLQueueMgr_GetNumQueues (enum DPPLSyclBEType BETy,
-                                  enum DPPLSyclDeviceType DeviceTy);
+size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy,
+                                  DPPLSyclDeviceType DeviceTy);
 
 /*!
  * @brief Returns True if the passed in queue and the current queue are the
@@ -115,8 +114,8 @@ bool DPPLQueueMgr_IsCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef);
 */
 DPPL_API
 __dppl_give DPPLSyclQueueRef
-DPPLQueueMgr_SetAsDefaultQueue (enum DPPLSyclBEType BETy,
-                                enum DPPLSyclDeviceType DeviceTy,
+DPPLQueueMgr_SetAsDefaultQueue (DPPLSyclBackendType BETy,
+                                DPPLSyclDeviceType DeviceTy,
                                 size_t DNum);
 
 /*!
@@ -141,8 +140,8 @@ DPPLQueueMgr_SetAsDefaultQueue (enum DPPLSyclBEType BETy,
  */
 DPPL_API
 __dppl_give DPPLSyclQueueRef
-DPPLQueueMgr_PushQueue (enum DPPLSyclBEType BETy,
-                        enum DPPLSyclDeviceType DeviceTy,
+DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy,
+                        DPPLSyclDeviceType DeviceTy,
                         size_t DNum);
 
 /*!
diff --git a/backends/include/dppl_sycl_types.h b/backends/include/dppl_sycl_types.h
index ce1f74dd85..74f2792f7a 100644
--- a/backends/include/dppl_sycl_types.h
+++ b/backends/include/dppl_sycl_types.h
@@ -1,6 +1,6 @@
-//===---------- dppl_sycl_types.h - DPPL-SYCL interface ---*--- C++ ---*---===//
+//===-------------- dppl_sycl_types.h - dpctl-C_API ----*---- C++ ----*----===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/dppl_sycl_usm_interface.h b/backends/include/dppl_sycl_usm_interface.h
index 6a52fcff42..6b771d7c2d 100644
--- a/backends/include/dppl_sycl_usm_interface.h
+++ b/backends/include/dppl_sycl_usm_interface.h
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_usm_interface.h - DPPL-SYCL interface ---*---C++ -*---===//
+//===------------- dppl_sycl_usm_interface.h - dpctl-C_API ---*---C++ -*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -84,6 +84,6 @@ void DPPLfree_with_context (__dppl_take DPPLSyclUSMRef MRef,
 DPPL_API
 const char *
 DPPLUSM_GetPointerType (__dppl_keep const DPPLSyclUSMRef MRef,
-                        __dppl_keep const DPPLSyclContextRef СRef);
+                        __dppl_keep const DPPLSyclContextRef CRef);
 
 DPPL_C_EXTERN_C_END
diff --git a/backends/include/dppl_utils.h b/backends/include/dppl_utils.h
index be523622ba..b0578173af 100644
--- a/backends/include/dppl_utils.h
+++ b/backends/include/dppl_utils.h
@@ -1,6 +1,6 @@
-//===------------- dppl_utils.h - DPPL-SYCL interface --*-- C++ -----*-----===//
+//===------------------- dppl_utils.h - dpctl-C_API ---*--- C++ -----*-----===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/include/error_check_macros.h b/backends/include/error_check_macros.h
deleted file mode 100644
index 77bd462e5a..0000000000
--- a/backends/include/error_check_macros.h
+++ /dev/null
@@ -1,111 +0,0 @@
-//===----- error_check_macros.h - DPPL-OpenCL interface -------*- C -*-----===//
-//
-//               Python Data Parallel Processing Python (PyDPPL)
-//
-// Copyright 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains a set of macros to check for different OpenCL error
-/// codes.
-///
-//===----------------------------------------------------------------------===//
-
-#pragma once
-
-#include <stdio.h>
-
-// TODO : Add branches to check for OpenCL error codes and print relevant error
-//        messages. Then there would be no need to pass in the message string
-
-// FIXME : The error check macro needs to be improved. Currently, we encounter
-// an error and goto the error label. Directly going to the error label can lead
-// to us not releasing resources prior to returning from the function. To work
-// around this situation, add a stack to store all the objects that should be
-// released prior to returning. The stack gets populated as a function executes
-// and on encountering an error, all objects on the stack get properly released
-// prior to returning. (Look at enqueue_dp_kernel_from_source for a
-// ghastly example where we really need proper resource management.)
-
-// FIXME : memory allocated in a function should be released in the error
-// section
-
-#define CHECK_OPEN_CL_ERROR(x, M) do {                                         \
-    int retval = (x);                                                          \
-    switch(retval) {                                                           \
-    case 0:                                                                    \
-        break;                                                                 \
-    case -36:                                                                  \
-        fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n",   \
-                retval, "[CL_INVALID_COMMAND_QUEUE]command_queue is not a "    \
-                        "valid command-queue.",                                \
-                __LINE__, __FILE__);                                           \
-        goto error;                                                            \
-    case -38:                                                                  \
-        fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n"    \
-                        "%s\n",                                                \
-                retval, "[CL_INVALID_MEM_OBJECT] memory object is not a "      \
-                        "valid OpenCL memory object.",                         \
-                __LINE__, __FILE__,M);                                         \
-        goto error;                                                            \
-    case -45:                                                                  \
-        fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n",   \
-                retval, "[CL_INVALID_PROGRAM_EXECUTABLE] no successfully "     \
-                        "built program executable available for device "       \
-                        "associated with command_queue.",                      \
-                __LINE__, __FILE__);                                           \
-        goto error;                                                            \
-    case -54:                                                                  \
-        fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n",   \
-                retval, "[CL_INVALID_WORK_GROUP_SIZE]",                        \
-                __LINE__, __FILE__);                                           \
-        goto error;                                                            \
-    default:                                                                   \
-        fprintf(stderr, "Open CL Runtime Error: %d (%s) on Line %d in %s\n",   \
-                retval, M, __LINE__, __FILE__);                                \
-        goto error;                                                            \
-    }                                                                          \
-} while(0)
-
-
-#define CHECK_MALLOC_ERROR(type, x) do {                                       \
-    type * ptr = (type*)(x);                                                   \
-    if(ptr == NULL) {                                                          \
-        fprintf(stderr, "Malloc Error for type %s on Line %d in %s",           \
-                #type, __LINE__, __FILE__);                                    \
-        perror(" ");                                                           \
-        free(ptr);                                                             \
-        ptr = NULL;                                                            \
-        goto malloc_error;                                                     \
-    }                                                                          \
-} while(0)
-
-
-#define CHECK_DPGLUE_ERROR(x, M) do {                                          \
-    int retval = (x);                                                          \
-    switch(retval) {                                                           \
-    case 0:                                                                    \
-        break;                                                                 \
-    case -1:                                                                   \
-        fprintf(stderr, "DP_Glue Error: %d (%s) on Line %d in %s\n",           \
-                retval, M, __LINE__, __FILE__);                                \
-        goto error;                                                            \
-    default:                                                                   \
-        fprintf(stderr, "DP_Glue Error: %d (%s) on Line %d in %s\n",           \
-                retval, M, __LINE__, __FILE__);                                \
-        goto error;                                                            \
-    }                                                                          \
-} while(0)
diff --git a/backends/source/dppl_opencl_interface.c b/backends/source/dppl_opencl_interface.c
deleted file mode 100644
index 64b0de33cf..0000000000
--- a/backends/source/dppl_opencl_interface.c
+++ /dev/null
@@ -1,1166 +0,0 @@
-//===------ dppl_opencl_interface.c - DPPL-OpenCL interface ----*- C -*----===//
-//
-//               Python Data Parallel Processing Library (PyDPPL)
-//
-// Copyright 2020 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file implements the data types and functions declared in
-/// dppl_opencl_interface.h.
-///
-//===----------------------------------------------------------------------===//
-#include "dppl_opencl_interface.h"
-#include "error_check_macros.h"
-#include <string.h>
-#include <assert.h>
-#include <CL/cl.h>  /* OpenCL headers */
-
-/*------------------------------- Magic numbers ------------------------------*/
-
-#define RUNTIME_ID   0x6dd5e8c8
-#define ENV_ID       0x6c78fd87
-#define BUFFER_ID    0xc55c47b1
-#define KERNEL_ID    0x032dc08e
-#define PROGRAM_ID   0xc3842d12
-#define KERNELARG_ID 0xd42f630f
-
-#if DEBUG
-
-static void check_runtime_id (runtime_t x)
-{
-    assert(x->id_ == RUNTIME_ID);
-}
-
-static void check_env_id (env_t x)
-{
-    assert(x->id_ == ENV_ID);
-}
-
-static void check_buffer_id (buffer_t x)
-{
-    assert(x->id_ == BUFFER_ID);
-}
-
-static void check_kernel_id (kernel_t x)
-{
-    assert(x->id_ == KERNEL_ID);
-}
-
-static void check_program_id (program_t x)
-{
-    assert(x->id_ == PROGRAM_ID);
-}
-
-static void check_kernelarg_id (kernel_arg_t x)
-{
-    assert(x->id_ == KERNELARG_ID);
-}
-
-#endif
-
-/*------------------------------- Private helpers ----------------------------*/
-
-
-static int get_platform_name (cl_platform_id platform, char **platform_name)
-{
-    cl_int err;
-    size_t n;
-
-    err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, *platform_name, &n);
-    CHECK_OPEN_CL_ERROR(err, "Could not get platform name length.");
-
-    // Allocate memory for the platform name string
-    *platform_name = (char*)malloc(sizeof(char)*n);
-    CHECK_MALLOC_ERROR(char*, *platform_name);
-
-    err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, n, *platform_name,
-            NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get platform name.");
-
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-    return DP_GLUE_FAILURE;
-error:
-    free(*platform_name);
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- *
- */
-static int dump_device_info (void *obj)
-{
-    cl_int err;
-    char *value;
-    size_t size;
-    cl_uint maxComputeUnits;
-    env_t env_t_ptr;
-
-    value = NULL;
-    env_t_ptr = (env_t)obj;
-    cl_device_id device = (cl_device_id)(env_t_ptr->device);
-
-    err = clRetainDevice(device);
-    CHECK_OPEN_CL_ERROR(err, "Could not retain device.");
-
-    // print device name
-    err = clGetDeviceInfo(device, CL_DEVICE_NAME, 0, NULL, &size);
-    CHECK_OPEN_CL_ERROR(err, "Could not get device name.");
-    value = (char*)malloc(size);
-    err = clGetDeviceInfo(device, CL_DEVICE_NAME, size, value, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get device name.");
-    printf("Device: %s\n", value);
-    free(value);
-
-    // print hardware device version
-    err = clGetDeviceInfo(device, CL_DEVICE_VERSION, 0, NULL, &size);
-    CHECK_OPEN_CL_ERROR(err, "Could not get device version.");
-    value = (char*) malloc(size);
-    err = clGetDeviceInfo(device, CL_DEVICE_VERSION, size, value, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get device version.");
-    printf("Hardware version: %s\n", value);
-    free(value);
-
-    // print software driver version
-    clGetDeviceInfo(device, CL_DRIVER_VERSION, 0, NULL, &size);
-    CHECK_OPEN_CL_ERROR(err, "Could not get driver version.");
-    value = (char*) malloc(size);
-    clGetDeviceInfo(device, CL_DRIVER_VERSION, size, value, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get driver version.");
-    printf("Software version: %s\n", value);
-    free(value);
-
-    // print c version supported by compiler for device
-    clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &size);
-    CHECK_OPEN_CL_ERROR(err, "Could not get open cl version.");
-    value = (char*) malloc(size);
-    clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, size, value, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get open cl version.");
-    printf("OpenCL C version: %s\n", value);
-    free(value);
-
-    // print parallel compute units
-    clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
-                    sizeof(maxComputeUnits), &maxComputeUnits, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get number of compute units.");
-    printf("Parallel compute units: %d\n", maxComputeUnits);
-
-    err = clReleaseDevice(device);
-    CHECK_OPEN_CL_ERROR(err, "Could not release device.");
-
-    return DP_GLUE_SUCCESS;
-
-error:
-    free(value);
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- * @brief Helper function to print out information about the platform and
- * devices available to this runtime.
- *
- */
-static int dump_dp_runtime_info (void *obj)
-{
-    size_t i;
-    runtime_t rt;
-
-    rt = (runtime_t)obj;
-#if DEBUG
-    check_runtime_id(rt);
-#endif
-    if(rt) {
-        printf("Number of platforms : %d\n", rt->num_platforms);
-        cl_platform_id *platforms = rt->platform_ids;
-        for(i = 0; i < rt->num_platforms; ++i) {
-            char *platform_name = NULL;
-            get_platform_name(platforms[i], &platform_name);
-            printf("Platform #%zu: %s\n", i, platform_name);
-            free(platform_name);
-        }
-    }
-
-    return DP_GLUE_SUCCESS;
-}
-
-
-/*!
- *
- */
-static int dump_dp_kernel_info (void *obj)
-{
-    cl_int err;
-    char *value;
-    size_t size;
-    cl_uint numKernelArgs;
-    cl_kernel kernel;
-    kernel_t kernel_t_ptr;
-
-    value = NULL;
-    kernel_t_ptr = (kernel_t)obj;
-#if DEBUG
-    check_kernel_id(kernel_t_ptr);
-#endif
-    kernel = (cl_kernel)(kernel_t_ptr->kernel);
-
-    // print kernel function name
-    err = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, 0, NULL, &size);
-    CHECK_OPEN_CL_ERROR(err, "Could not get kernel function name size.");
-    value = (char*)malloc(size);
-    err = clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, size, value, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get kernel function name.");
-    printf("Kernel Function name: %s\n", value);
-    free(value);
-
-    // print the number of kernel args
-    err = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numKernelArgs),
-            &numKernelArgs, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get kernel num args.");
-    printf("Number of kernel arguments : %d\n", numKernelArgs);
-
-    return DP_GLUE_SUCCESS;
-
-error:
-    free(value);
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- *
- */
-static int get_first_device (cl_platform_id* platforms,
-                             cl_uint platformCount,
-                             cl_device_id *device,
-                             cl_device_type device_ty)
-{
-    cl_int status;
-    cl_uint ndevices = 0;
-    unsigned int i;
-
-    for (i = 0; i < platformCount; ++i) {
-        // get all devices of env_ty
-        status = clGetDeviceIDs(platforms[i], device_ty, 0, NULL, &ndevices);
-        // If this platform has no devices of this type then continue
-        if(!ndevices) continue;
-
-        // get the first device
-        status = clGetDeviceIDs(platforms[i], device_ty, 1, device, NULL);
-        CHECK_OPEN_CL_ERROR(status, "Could not get first cl_device_id.");
-
-        // If the first device of this type was discovered, no need to look more
-        if(ndevices) break;
-    }
-
-    if(ndevices)
-        return DP_GLUE_SUCCESS;
-    else
-        return DP_GLUE_FAILURE;
-
-error:
-    return DP_GLUE_FAILURE;
-}
-
-static int support_int64_atomics(cl_device_id *device)
-{
-
-    cl_int err;
-    size_t size;
-    char *value;
-
-    err = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 0, NULL, &size);
-    if (err != CL_SUCCESS ) {
-        printf("Unable to obtain device info for param\n");
-        return DP_GLUE_FAILURE;
-    }
-    value = (char*) malloc(size);
-    clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, size, value, NULL);
-
-    if(strstr(value, "cl_khr_int64_base_atomics") != NULL) {
-        return DP_GLUE_SUCCESS;
-    } else {
-        return DP_GLUE_FAILURE;
-    }
-}
-
-static int support_float64_atomics(cl_device_id *device)
-{
-
-    cl_int err;
-    size_t size;
-    char *value;
-
-    err = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 0, NULL, &size);
-    if (err != CL_SUCCESS ) {
-        printf("Unable to obtain device info for param\n");
-        return DP_GLUE_FAILURE;
-    }
-    value = (char*) malloc(size);
-    clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, size, value, NULL);
-
-    if(strstr(value, "cl_khr_fp64") != NULL) {
-        return DP_GLUE_SUCCESS;
-    } else {
-        return DP_GLUE_FAILURE;
-    }
-}
-
-/*!
- *
- */
-static int create_dp_env_t (cl_platform_id* platforms,
-                            size_t nplatforms,
-                            cl_device_type device_ty,
-                            env_t *env_t_ptr)
-{
-    cl_int err;
-    int err1;
-    env_t env;
-    cl_device_id *device;
-
-    env = NULL;
-    device = NULL;
-
-    // Allocate the env_t object
-    env = (env_t)malloc(sizeof(struct dp_env));
-    CHECK_MALLOC_ERROR(env_t, env);
-    env->id_ = ENV_ID;
-
-    env->context = NULL;
-    env->device = NULL;
-    env->queue = NULL;
-    env->max_work_item_dims = 0;
-    env->max_work_group_size = 0;
-    env->dump_fn = NULL;
-
-    device = (cl_device_id*)malloc(sizeof(cl_device_id));
-
-    err1 = get_first_device(platforms, nplatforms, device, device_ty);
-    CHECK_DPGLUE_ERROR(err1, "Failed inside get_first_device");
-
-    // get the CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS for this device
-    err = clGetDeviceInfo(*device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
-            sizeof(env->max_work_item_dims), &env->max_work_item_dims, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get max work item dims");
-
-    // get the CL_DEVICE_MAX_WORK_GROUP_SIZE for this device
-    err = clGetDeviceInfo(*device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
-            sizeof(env->max_work_group_size), &env->max_work_group_size, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not get max work group size");
-
-    // Create a context and associate it with device
-    env->context = clCreateContext(NULL, 1, device, NULL, NULL, &err);
-    CHECK_OPEN_CL_ERROR(err, "Could not create device context.");
-    // Create a queue and associate it with the context
-    env->queue = clCreateCommandQueueWithProperties((cl_context)env->context,
-            *device, 0, &err);
-
-    CHECK_OPEN_CL_ERROR(err, "Could not create command queue.");
-
-    env->device = *device;
-    env ->dump_fn = dump_device_info;
-
-    if (DP_GLUE_SUCCESS == support_int64_atomics(device)) {
-        env->support_int64_atomics = 1;
-    } else {
-        env->support_int64_atomics = 0;
-    }
-
-    if (DP_GLUE_SUCCESS == support_float64_atomics(device)) {
-        env->support_float64_atomics = 1;
-    } else {
-        env->support_float64_atomics = 0;
-    }
-
-    free(device);
-    *env_t_ptr = env;
-
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-    return DP_GLUE_FAILURE;
-error:
-    free(env);
-    *env_t_ptr = NULL;
-    return DP_GLUE_FAILURE;
-}
-
-
-static int destroy_dp_env_t (env_t *env_t_ptr)
-{
-    cl_int err;
-#if DEBUG
-    check_env_id(*env_t_ptr);
-#endif
-    err = clReleaseCommandQueue((cl_command_queue)(*env_t_ptr)->queue);
-    CHECK_OPEN_CL_ERROR(err, "Could not release command queue.");
-    err = clReleaseDevice((cl_device_id)(*env_t_ptr)->device);
-    CHECK_OPEN_CL_ERROR(err, "Could not release device.");
-    err = clReleaseContext((cl_context)(*env_t_ptr)->context);
-    CHECK_OPEN_CL_ERROR(err, "Could not release context.");
-
-    free(*env_t_ptr);
-
-    return DP_GLUE_SUCCESS;
-
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- * @brief Initialize the runtime object.
- */
-static int init_runtime_t_obj (runtime_t rt)
-{
-    cl_int status;
-    int ret;
-    cl_platform_id *platforms;
-#if DEBUG
-    check_runtime_id(rt);
-#endif
-    // get count of available platforms
-    status = clGetPlatformIDs(0, NULL, &(rt->num_platforms));
-    CHECK_OPEN_CL_ERROR(status, "Could not get platform count.");
-
-    if(!rt->num_platforms) {
-        fprintf(stderr, "No OpenCL platforms found.\n");
-        goto error;
-    }
-
-    // Allocate memory for the platforms array
-    rt->platform_ids = (cl_platform_id*)malloc(
-                           sizeof(cl_platform_id)*rt->num_platforms
-                       );
-    CHECK_MALLOC_ERROR(cl_platform_id, rt->platform_ids);
-
-    // Get the platforms
-    status = clGetPlatformIDs(rt->num_platforms, rt->platform_ids, NULL);
-    CHECK_OPEN_CL_ERROR(status, "Could not get platform ids");
-    // Cast rt->platforms to a pointer of type cl_platform_id, as we cannot do
-    // pointer arithmetic on void*.
-    platforms = (cl_platform_id*)rt->platform_ids;
-    // Get the first cpu device on this platform
-    ret = create_dp_env_t(platforms, rt->num_platforms,
-                          CL_DEVICE_TYPE_CPU, &rt->first_cpu_env);
-    rt->has_cpu = !ret;
-
-#if DEBUG
-    if(rt->has_cpu)
-        printf("DEBUG: CPU device acquired...\n");
-    else
-        printf("DEBUG: No CPU available on the system\n");
-#endif
-
-    // Get the first gpu device on this platform
-    ret = create_dp_env_t(platforms, rt->num_platforms,
-                          CL_DEVICE_TYPE_GPU, &rt->first_gpu_env);
-    rt->has_gpu = !ret;
-
-#if DEBUG
-    if(rt->has_gpu)
-        printf("DEBUG: GPU device acquired...\n");
-    else
-        printf("DEBUG: No GPU available on the system.\n");
-#endif
-
-    if(rt->has_gpu)
-        rt->curr_env = rt->first_gpu_env;
-    else if(rt->has_cpu)
-        rt->curr_env = rt->first_cpu_env;
-    else
-        goto error;
-
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-
-    return DP_GLUE_FAILURE;
-error:
-    free(rt->platform_ids);
-
-    return DP_GLUE_FAILURE;
-}
-
-/*-------------------------- End of private helpers --------------------------*/
-
-int set_curr_env (runtime_t rt, env_t env)
-{
-    if(env && rt) {
-        rt->curr_env = env;
-        return DP_GLUE_SUCCESS;
-    }
-    return DP_GLUE_FAILURE;
-}
-
-/*!
- * @brief Initializes a new dp_runtime_t object
- *
- */
-int create_dp_runtime (runtime_t *rt)
-{
-    int err;
-    runtime_t rtobj;
-
-    rtobj = NULL;
-    // Allocate a new struct dp_runtime object
-    rtobj = (runtime_t)malloc(sizeof(struct dp_runtime));
-    CHECK_MALLOC_ERROR(runtime_t, rt);
-
-    rtobj->id_ = RUNTIME_ID;
-    rtobj->num_platforms = 0;
-    rtobj->platform_ids  = NULL;
-    err = init_runtime_t_obj(rtobj);
-    CHECK_DPGLUE_ERROR(err, "Could not initialize runtime object.");
-    rtobj->dump_fn = dump_dp_runtime_info;
-
-    *rt = rtobj;
-#if DEBUG
-    printf("DEBUG: Created an new dp_runtime object\n");
-#endif
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-    return DP_GLUE_FAILURE;
-error:
-    free(rtobj);
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- * @brief Free the runtime and all its resources.
- *
- */
-int destroy_dp_runtime (runtime_t *rt)
-{
-    int err;
-#if DEBUG
-    check_runtime_id(*rt);
-#endif
-
-#if DEBUG
-    printf("DEBUG: Going to destroy the dp_runtime object\n");
-#endif
-    // free the first_cpu_device
-    if((*rt)->first_cpu_env) {
-        err = destroy_dp_env_t(&(*rt)->first_cpu_env);
-        CHECK_DPGLUE_ERROR(err, "Could not destroy first_cpu_device.");
-    }
-
-    // free the first_gpu_device
-    if((*rt)->first_gpu_env) {
-        err = destroy_dp_env_t(&(*rt)->first_gpu_env);
-        CHECK_DPGLUE_ERROR(err, "Could not destroy first_gpu_device.");
-    }
-
-    // free the platforms
-    free((cl_platform_id*)(*rt)->platform_ids);
-    // free the runtime_t object
-    free(*rt);
-
-#if DEBUG
-    printf("DEBUG: Destroyed the new dp_runtime object\n");
-#endif
-    return DP_GLUE_SUCCESS;
-
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- *
- */
-int retain_dp_context (env_t env_t_ptr)
-{
-    cl_int err;
-    cl_context context;
-#if DEBUG
-    check_env_id(env_t_ptr);
-#endif
-    context = (cl_context)(env_t_ptr->context);
-    err = clRetainContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Failed when calling clRetainContext.");
-
-    return DP_GLUE_SUCCESS;
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- *
- */
-int release_dp_context (env_t env_t_ptr)
-{
-    cl_int err;
-    cl_context context;
-#if DEBUG
-    check_env_id(env_t_ptr);
-#endif
-    context = (cl_context)(env_t_ptr->context);
-    err = clReleaseContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Failed when calling clRetainContext.");
-
-    return DP_GLUE_SUCCESS;
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-int create_dp_rw_mem_buffer (env_t env_t_ptr,
-                             size_t buffsize,
-                             buffer_t *buffer_t_ptr)
-{
-    cl_int err;
-    buffer_t buff;
-    cl_context context;
-#if DEBUG
-    check_env_id(env_t_ptr);
-#endif
-    buff = NULL;
-
-    // Get the context from the device
-    context = (cl_context)(env_t_ptr->context);
-    err = clRetainContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Failed to retain context.");
-
-    // Allocate a dp_buffer object
-    buff = (buffer_t)malloc(sizeof(struct dp_buffer));
-    CHECK_MALLOC_ERROR(buffer_t, buffer_t_ptr);
-
-    buff->id_ = BUFFER_ID;
-
-    // Create the OpenCL buffer.
-    // NOTE : Copying of data from host to device needs to happen explicitly
-    // using clEnqueue[Write|Read]Buffer. This would change in the future.
-    buff->buffer_ptr = clCreateBuffer(context, CL_MEM_READ_WRITE, buffsize,
-                                      NULL, &err);
-    CHECK_OPEN_CL_ERROR(err, "Failed to create CL buffer.");
-
-    buff->sizeof_buffer_ptr = sizeof(cl_mem);
-#if DEBUG
-    printf("DEBUG: CL RW buffer created...\n");
-#endif
-    *buffer_t_ptr = buff;
-    err = clReleaseContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Failed to release context.");
-
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-    return DP_GLUE_FAILURE;
-error:
-    free(buff);
-    return DP_GLUE_FAILURE;
-}
-
-
-int destroy_dp_rw_mem_buffer (buffer_t *buff)
-{
-    cl_int err;
-#if DEBUG
-    check_buffer_id(*buff);
-#endif
-    err = clReleaseMemObject((cl_mem)(*buff)->buffer_ptr);
-    CHECK_OPEN_CL_ERROR(err, "Failed to release CL buffer.");
-    free(*buff);
-
-#if DEBUG
-    printf("DEBUG: CL buffer destroyed...\n");
-#endif
-
-    return DP_GLUE_SUCCESS;
-
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-int write_dp_mem_buffer_to_device (env_t env_t_ptr,
-                                   buffer_t buffer_t_ptr,
-                                   bool blocking,
-                                   size_t offset,
-                                   size_t buffersize,
-                                   const void *data_ptr)
-{
-    cl_int err;
-    cl_command_queue queue;
-    cl_mem mem;
-#if DEBUG
-    check_env_id(env_t_ptr);
-    check_buffer_id(buffer_t_ptr);
-#endif
-    queue = (cl_command_queue)env_t_ptr->queue;
-    mem = (cl_mem)buffer_t_ptr->buffer_ptr;
-
-#if DEBUG
-    assert(mem && "buffer memory is NULL");
-#endif
-
-    err = clRetainMemObject(mem);
-    CHECK_OPEN_CL_ERROR(err, "Failed to retain the command queue.");
-    err = clRetainCommandQueue(queue);
-    CHECK_OPEN_CL_ERROR(err, "Failed to retain the buffer memory object.");
-
-    // Not using any events for the time being. Eventually we want to figure
-    // out the event dependencies using parfor analysis.
-    err = clEnqueueWriteBuffer(queue, mem, blocking?CL_TRUE:CL_FALSE,
-            offset, buffersize, data_ptr, 0, NULL, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Failed to write to CL buffer.");
-
-    err = clReleaseCommandQueue(queue);
-    CHECK_OPEN_CL_ERROR(err, "Failed to release the command queue.");
-    err = clReleaseMemObject(mem);
-    CHECK_OPEN_CL_ERROR(err, "Failed to release the buffer memory object.");
-
-#if DEBUG
-    printf("DEBUG: CL buffer written to device...\n");
-#endif
-    //--- TODO: Implement a version that uses clEnqueueMapBuffer
-
-    return DP_GLUE_SUCCESS;
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-int read_dp_mem_buffer_from_device (env_t env_t_ptr,
-                                    buffer_t buffer_t_ptr,
-                                    bool blocking,
-                                    size_t offset,
-                                    size_t buffersize,
-                                    void *data_ptr)
-{
-    cl_int err;
-    cl_command_queue queue;
-    cl_mem mem;
-#if DEBUG
-    check_env_id(env_t_ptr);
-    check_buffer_id(buffer_t_ptr);
-#endif
-    queue = (cl_command_queue)env_t_ptr->queue;
-    mem = (cl_mem)buffer_t_ptr->buffer_ptr;
-
-    err = clRetainMemObject(mem);
-    CHECK_OPEN_CL_ERROR(err, "Failed to retain the command queue.");
-    err = clRetainCommandQueue(queue);
-    CHECK_OPEN_CL_ERROR(err, "Failed to retain the command queue.");
-
-    // Not using any events for the time being. Eventually we want to figure
-    // out the event dependencies using parfor analysis.
-    err = clEnqueueReadBuffer(queue, mem, blocking?CL_TRUE:CL_FALSE,
-            offset, buffersize, data_ptr, 0, NULL, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Failed to read from CL buffer.");
-
-    err = clReleaseCommandQueue(queue);
-    CHECK_OPEN_CL_ERROR(err, "Failed to release the command queue.");
-    err = clReleaseMemObject(mem);
-    CHECK_OPEN_CL_ERROR(err, "Failed to release the buffer memory object.");
-
-#if DEBUG
-    printf("DEBUG: CL buffer read from device...\n");
-#endif
-    fflush(stdout);
-    //--- TODO: Implement a version that uses clEnqueueMapBuffer
-
-    return DP_GLUE_SUCCESS;
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-int create_dp_program_from_spirv (env_t env_t_ptr,
-                                  const void *il,
-                                  size_t length,
-                                  program_t *program_t_ptr)
-{
-    cl_int err;
-    cl_context context;
-    program_t prog;
-#if DUMP_SPIRV
-    FILE *write_file;
-#endif
-#if DEBUG
-    check_env_id(env_t_ptr);
-#endif
-    prog = NULL;
-
-#if DUMP_SPIRV
-    write_file = fopen("latest.spirv","wb");
-    fwrite(il,length,1,write_file);
-    fclose(write_file);
-#endif
-
-    prog = (program_t)malloc(sizeof(struct dp_program));
-    CHECK_MALLOC_ERROR(program_t, program_t_ptr);
-
-    prog->id_ = PROGRAM_ID;
-
-    context = (cl_context)env_t_ptr->context;
-
-    err = clRetainContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Could not retain context");
-    // Create a program with a SPIR-V file
-    prog->program = clCreateProgramWithIL(context, il, length, &err);
-    CHECK_OPEN_CL_ERROR(err, "Could not create program with IL");
-#if DEBUG
-    printf("DEBUG: CL program created from spirv of length %zu...\n", length);
-#endif
-
-    *program_t_ptr = prog;
-
-    err = clReleaseContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Could not release context");
-
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-    return DP_GLUE_FAILURE;
-error:
-    free(prog);
-    return DP_GLUE_FAILURE;
-}
-
-
-int create_dp_program_from_source (env_t env_t_ptr,
-                                   unsigned int count,
-                                   const char **strings,
-                                   const size_t *lengths,
-                                   program_t *program_t_ptr)
-{
-    cl_int err;
-    cl_context context;
-    program_t prog;
-#if DEBUG
-    check_env_id(env_t_ptr);
-#endif
-    prog = NULL;
-    prog = (program_t)malloc(sizeof(struct dp_program));
-    CHECK_MALLOC_ERROR(program_t, program_t_ptr);
-
-    prog->id_ = PROGRAM_ID;
-
-    context = (cl_context)env_t_ptr->context;
-
-    err = clRetainContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Could not retain context");
-    // Create a program with string source files
-    prog->program = clCreateProgramWithSource(context, count, strings,
-            lengths, &err);
-    CHECK_OPEN_CL_ERROR(err, "Could not create program with source");
-#if DEBUG
-    printf("DEBUG: CL program created from source...\n");
-#endif
-
-    *program_t_ptr = prog;
-
-    err = clReleaseContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Could not release context");
-
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-    return DP_GLUE_FAILURE;
-error:
-    free(prog);
-    return DP_GLUE_FAILURE;
-}
-
-
-int destroy_dp_program (program_t *program_ptr)
-{
-    cl_int err;
-#if DEBUG
-    check_program_id(*program_ptr);
-#endif
-    err = clReleaseProgram((cl_program)(*program_ptr)->program);
-    CHECK_OPEN_CL_ERROR(err, "Failed to release CL program.");
-    free(*program_ptr);
-
-#if DEBUG
-    printf("DEBUG: CL program destroyed...\n");
-#endif
-
-    return DP_GLUE_SUCCESS;
-
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-int build_dp_program (env_t env_t_ptr, program_t program_t_ptr)
-{
-    cl_int err;
-    cl_device_id device;
-#if DEBUG
-    check_env_id(env_t_ptr);
-    check_program_id(program_t_ptr);
-#endif
-    device = (cl_device_id)env_t_ptr->device;
-    err = clRetainDevice(device);
-    CHECK_OPEN_CL_ERROR(err, "Could not retain device");
-    // Build (compile) the program for the device
-    err = clBuildProgram((cl_program)program_t_ptr->program, 1, &device, NULL,
-            NULL, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not build program");
-#if DEBUG
-    printf("DEBUG: CL program successfully built.\n");
-#endif
-    err = clReleaseDevice(device);
-    CHECK_OPEN_CL_ERROR(err, "Could not release device");
-
-    return DP_GLUE_SUCCESS;
-
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- *
- */
-int create_dp_kernel (env_t env_t_ptr,
-                      program_t program_t_ptr,
-                      const char *kernel_name,
-                      kernel_t *kernel_ptr)
-{
-    cl_int err;
-    cl_context context;
-    kernel_t ker;
-#if DEBUG
-    check_env_id(env_t_ptr);
-#endif
-    ker = NULL;
-    ker = (kernel_t)malloc(sizeof(struct dp_kernel));
-    CHECK_MALLOC_ERROR(kernel_t, kernel_ptr);
-
-    ker->id_ = KERNEL_ID;
-
-    context = (cl_context)(env_t_ptr->context);
-    err = clRetainContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Could not retain context");
-    ker->kernel = clCreateKernel((cl_program)(program_t_ptr->program),
-            kernel_name, &err);
-    CHECK_OPEN_CL_ERROR(err, "Could not create kernel");
-    err = clReleaseContext(context);
-    CHECK_OPEN_CL_ERROR(err, "Could not release context");
-#if DEBUG
-    printf("DEBUG: CL kernel created\n");
-#endif
-    ker->dump_fn = dump_dp_kernel_info;
-    *kernel_ptr = ker;
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-    return DP_GLUE_FAILURE;
-error:
-    free(ker);
-    return DP_GLUE_FAILURE;
-}
-
-
-int destroy_dp_kernel (kernel_t *kernel_ptr)
-{
-    cl_int err;
-#if DEBUG
-    check_kernel_id(*kernel_ptr);
-#endif
-    err = clReleaseKernel((cl_kernel)(*kernel_ptr)->kernel);
-    CHECK_OPEN_CL_ERROR(err, "Failed to release CL kernel.");
-    free(*kernel_ptr);
-
-#if DEBUG
-    printf("DEBUG: CL kernel destroyed...\n");
-#endif
-
-    return DP_GLUE_SUCCESS;
-
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- *
- */
-int create_dp_kernel_arg (const void *arg_value,
-                          size_t arg_size,
-                          kernel_arg_t *kernel_arg_t_ptr)
-{
-    kernel_arg_t kernel_arg;
-
-    kernel_arg = NULL;
-    kernel_arg = (kernel_arg_t)malloc(sizeof(struct dp_kernel_arg));
-    CHECK_MALLOC_ERROR(kernel_arg_t, kernel_arg);
-
-    kernel_arg->id_ = KERNELARG_ID;
-    kernel_arg->arg_size = arg_size;
-    kernel_arg->arg_value = arg_value;
-
-#if DEBUG
-    printf("DEBUG: Kernel arg created\n");
-//    void **tp = (void**)kernel_arg->arg_value;
-//    printf("DEBUG: create_kernel_arg %p (size %ld, addr %p)\n",
-//            kernel_arg, kernel_arg->arg_size, *tp);
-#endif
-
-    *kernel_arg_t_ptr = kernel_arg;
-
-    return DP_GLUE_SUCCESS;
-
-malloc_error:
-    return DP_GLUE_FAILURE;
-}
-
-/*!
- *
- */
-int create_dp_kernel_arg_from_buffer (buffer_t *buffer_t_ptr,
-                                      kernel_arg_t *kernel_arg_t_ptr)
-{
-#if DEBUG
-    check_buffer_id(*buffer_t_ptr);
-#endif
-    return create_dp_kernel_arg(&((*buffer_t_ptr)->buffer_ptr),
-                                (*buffer_t_ptr)->sizeof_buffer_ptr,
-                                kernel_arg_t_ptr);
-}
-
-/*!
- *
- */
-int destroy_dp_kernel_arg (kernel_arg_t *kernel_arg_t_ptr)
-{
-    free(*kernel_arg_t_ptr);
-
-#if DEBUG
-    printf("DEBUG: Kernel arg destroyed...\n");
-#endif
-
-    return DP_GLUE_SUCCESS;
-}
-
-
-/*!
- *
- */
-int set_args_and_enqueue_dp_kernel (env_t env_t_ptr,
-                                    kernel_t kernel_t_ptr,
-                                    size_t nargs,
-                                    const kernel_arg_t *array_of_args,
-                                    unsigned int work_dim,
-                                    const size_t *global_work_offset,
-                                    const size_t *global_work_size,
-                                    const size_t *local_work_size)
-{
-    size_t i;
-    cl_int err;
-    cl_kernel kernel;
-    cl_command_queue queue;
-
-    err = 0;
-#if DEBUG
-    check_env_id(env_t_ptr);
-    check_kernel_id(kernel_t_ptr);
-#endif
-    kernel = (cl_kernel)kernel_t_ptr->kernel;
-    queue = (cl_command_queue)env_t_ptr->queue;
-#if DEBUG
-    kernel_t_ptr->dump_fn(kernel_t_ptr);
-#endif
-    // Set the kernel arguments
-    for(i = 0; i < nargs; ++i) {
-#if DEBUG
-        printf("DEBUG: clSetKernelArgs for arg # %zu\n", i);
-#endif
-        kernel_arg_t this_arg = array_of_args[i];
-#if DEBUG
-        check_kernelarg_id(this_arg);
-        void **tp = (void**)this_arg->arg_value;
-        printf("DEBUG: clSetKernelArgs for arg # %zu (size %zu, addr %p)\n", i,
-                this_arg->arg_size, *tp);
-#endif
-        err = clSetKernelArg(kernel, i, this_arg->arg_size,
-                             this_arg->arg_value);
-        CHECK_OPEN_CL_ERROR(err, "Could not set arguments to the kernel");
-    }
-
-    // Execute the kernel. Not using events for the time being.
-    err = clEnqueueNDRangeKernel(queue, kernel, work_dim, global_work_offset,
-            global_work_size, local_work_size, 0, NULL, NULL);
-    CHECK_OPEN_CL_ERROR(err, "Could not enqueue the kernel");
-
-    err = clFinish(queue);
-    CHECK_OPEN_CL_ERROR(err, "Failed while waiting for queue to finish");
-#if DEBUG
-    printf("DEBUG: CL Kernel Finish...\n");
-#endif
-    return DP_GLUE_SUCCESS;
-
-error:
-    return DP_GLUE_FAILURE;
-}
-
-
-/*!
- *
- */
-int set_args_and_enqueue_dp_kernel_auto_blocking (env_t env_t_ptr,
-                                                  kernel_t kernel_t_ptr,
-                                                  size_t nargs,
-                                                  const kernel_arg_t *args,
-                                                  unsigned int num_dims,
-                                                  size_t *dim_starts,
-                                                  size_t *dim_stops)
-{
-    size_t *global_work_size;
-//    size_t *local_work_size;
-    int err;
-    unsigned i;
-
-    global_work_size = (size_t*)malloc(sizeof(size_t) * num_dims);
-//    local_work_size  = (size_t*)malloc(sizeof(size_t) * num_dims);
-    CHECK_MALLOC_ERROR(size_t, global_work_size);
-//    CHECK_MALLOC_ERROR(size_t, local_work_size);
-
-    assert(num_dims > 0 && num_dims < 4);
-    for (i = 0; i < num_dims; ++i) {
-        global_work_size[i] = dim_stops[i] - dim_starts[i] + 1;
-    }
-
-    err = set_args_and_enqueue_dp_kernel(env_t_ptr,
-                                         kernel_t_ptr,
-                                         nargs,
-                                         args,
-                                         num_dims,
-                                         NULL,
-                                         global_work_size,
-                                         NULL);
-    free(global_work_size);
-//    free(local_work_size);
-    return err;
-
-malloc_error:
-    free(global_work_size);
-//    free(local_work_size);
-    return DP_GLUE_FAILURE;
-}
diff --git a/backends/source/dppl_sycl_context_interface.cpp b/backends/source/dppl_sycl_context_interface.cpp
index a004d8d1d4..78c71f5ad0 100644
--- a/backends/source/dppl_sycl_context_interface.cpp
+++ b/backends/source/dppl_sycl_context_interface.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_context_interface.cpp - DPPL-SYCL interface --*- C++ -*-===//
+//===------- dppl_sycl_context_interface.cpp - dpctl-C_API  ---*--- C++ -*-===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -55,7 +55,7 @@ void DPPLContext_Delete (__dppl_take DPPLSyclContextRef CtxRef)
     delete unwrap(CtxRef);
 }
 
-DPPLSyclBEType
+DPPLSyclBackendType
 DPPLContext_GetBackend (__dppl_keep const DPPLSyclContextRef CtxRef)
 {
     auto BE = unwrap(CtxRef)->get_platform().get_backend();
diff --git a/backends/source/dppl_sycl_device_interface.cpp b/backends/source/dppl_sycl_device_interface.cpp
index 0c3bb77af2..0dbf2affe1 100644
--- a/backends/source/dppl_sycl_device_interface.cpp
+++ b/backends/source/dppl_sycl_device_interface.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_device_interface.cpp - DPPL-SYCL interface --*- C++ -*--===//
+//===------ dppl_sycl_device_interface.cpp - dpctl-C_API  ---*--- C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -60,22 +60,22 @@ void dump_device_info (const device & Device)
     switch(devTy)
     {
     case info::device_type::cpu:
-    ss << "cpu" << '\n';
-    break;
+        ss << "cpu" << '\n';
+        break;
     case info::device_type::gpu:
-    ss << "gpu" << '\n';
-    break;
+        ss << "gpu" << '\n';
+        break;
     case info::device_type::accelerator:
-    ss << "accelerator" << '\n';
-    break;
+        ss << "accelerator" << '\n';
+        break;
     case info::device_type::custom:
-    ss << "custom" << '\n';
-    break;
+        ss << "custom" << '\n';
+        break;
     case info::device_type::host:
-    ss << "host" << '\n';
-    break;
+        ss << "host" << '\n';
+        break;
     default:
-    ss << "unknown" << '\n';
+        ss << "unknown" << '\n';
     }
 
     std::cout << ss.str();
@@ -104,8 +104,8 @@ void DPPLDevice_Delete (__dppl_take DPPLSyclDeviceRef DRef)
 bool DPPLDevice_IsAccelerator (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
-        return unwrap(DRef)->is_accelerator();
+    if (D) {
+        return D->is_accelerator();
     }
     return false;
 }
@@ -113,18 +113,17 @@ bool DPPLDevice_IsAccelerator (__dppl_keep const DPPLSyclDeviceRef DRef)
 bool DPPLDevice_IsCPU (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
-        return unwrap(DRef)->is_cpu();
+    if (D) {
+        return D->is_cpu();
     }
     return false;
-
 }
 
 bool DPPLDevice_IsGPU (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
-        return unwrap(DRef)->is_gpu();
+    if (D) {
+        return D->is_gpu();
     }
     return false;
 }
@@ -133,29 +132,28 @@ bool DPPLDevice_IsGPU (__dppl_keep const DPPLSyclDeviceRef DRef)
 bool DPPLDevice_IsHost (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
-        return unwrap(DRef)->is_host();
+    if (D) {
+        return D->is_host();
     }
     return false;
 }
 
 
 uint32_t
-DPPLDevice_GetMaxComputeUnites (__dppl_keep const DPPLSyclDeviceRef DRef)
+DPPLDevice_GetMaxComputeUnits (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
+    if (D) {
         return D->get_info<info::device::max_compute_units>();
     }
     return 0;
-
 }
 
 uint32_t
 DPPLDevice_GetMaxWorkItemDims (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
+    if (D) {
         return D->get_info<info::device::max_work_item_dimensions>();
     }
     return 0;
@@ -166,7 +164,7 @@ DPPLDevice_GetMaxWorkItemSizes (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     size_t *sizes = nullptr;
     auto D = unwrap(DRef);
-    if(D) {
+    if (D) {
         auto id_sizes = D->get_info<info::device::max_work_item_sizes>();
         sizes = new size_t[3];
         for(auto i = 0ul; i < 3; ++i) {
@@ -180,7 +178,7 @@ size_t
 DPPLDevice_GetMaxWorkGroupSize (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
+    if (D) {
         return D->get_info<info::device::max_work_group_size>();
     }
     return 0;
@@ -190,18 +188,38 @@ uint32_t
 DPPLDevice_GetMaxNumSubGroups (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
+    if (D) {
         return D->get_info<info::device::max_num_sub_groups>();
     }
     return 0;
 }
 
+bool
+DPPLDevice_HasInt64BaseAtomics (__dppl_keep const DPPLSyclDeviceRef DRef)
+{
+    auto D = unwrap(DRef);
+    if (D) {
+        return D->has(aspect::int64_base_atomics);
+    }
+    return false;
+}
+
+bool
+DPPLDevice_HasInt64ExtendedAtomics (__dppl_keep const DPPLSyclDeviceRef DRef)
+{
+    auto D = unwrap(DRef);
+    if (D) {
+        return D->has(aspect::int64_extended_atomics);
+    }
+    return false;
+}
+
 __dppl_give const char*
 DPPLDevice_GetName (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
-        auto name = unwrap(DRef)->get_info<info::device::name>();
+    if (D) {
+        auto name = D->get_info<info::device::name>();
         auto cstr_name = new char [name.length()+1];
         std::strcpy (cstr_name, name.c_str());
         return cstr_name;
@@ -213,8 +231,8 @@ __dppl_give const char*
 DPPLDevice_GetVendorName (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
-        auto vendor = unwrap(DRef)->get_info<info::device::name>();
+    if (D) {
+        auto vendor = D->get_info<info::device::vendor>();
         auto cstr_vendor = new char [vendor.length()+1];
         std::strcpy (cstr_vendor, vendor.c_str());
         return cstr_vendor;
@@ -226,8 +244,8 @@ __dppl_give const char*
 DPPLDevice_GetDriverInfo (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
-        auto driver = unwrap(DRef)->get_info<info::device::driver_version>();
+    if (D) {
+        auto driver = D->get_info<info::device::driver_version>();
         auto cstr_driver = new char [driver.length()+1];
         std::strcpy (cstr_driver, driver.c_str());
         return cstr_driver;
@@ -238,7 +256,7 @@ DPPLDevice_GetDriverInfo (__dppl_keep const DPPLSyclDeviceRef DRef)
 bool DPPLDevice_IsHostUnifiedMemory (__dppl_keep const DPPLSyclDeviceRef DRef)
 {
     auto D = unwrap(DRef);
-    if(D) {
+    if (D) {
         return D->get_info<info::device::host_unified_memory>();
     }
     return false;
diff --git a/backends/source/dppl_sycl_event_interface.cpp b/backends/source/dppl_sycl_event_interface.cpp
index 69a739483f..ae8356adba 100644
--- a/backends/source/dppl_sycl_event_interface.cpp
+++ b/backends/source/dppl_sycl_event_interface.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_event_interface.cpp - DPPL-SYCL interface --*- C++ -*---===//
+//===------ dppl_sycl_event_interface.cpp - dpctl-C_API  ---*--- C++ --*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/source/dppl_sycl_kernel_interface.cpp b/backends/source/dppl_sycl_kernel_interface.cpp
index 56fa9380cb..e030974bc3 100644
--- a/backends/source/dppl_sycl_kernel_interface.cpp
+++ b/backends/source/dppl_sycl_kernel_interface.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_kernel_interface.cpp - DPPL-SYCL interface --*-- C++ -*-===//
+//===------ dppl_sycl_kernel_interface.cpp - dpctl-C_API  ---*--- C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/source/dppl_sycl_platform_interface.cpp b/backends/source/dppl_sycl_platform_interface.cpp
index 4fff1d0bfd..2aa0af7ed4 100644
--- a/backends/source/dppl_sycl_platform_interface.cpp
+++ b/backends/source/dppl_sycl_platform_interface.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_platform_interface.cpp - DPPL-SYCL interface --*- C++ -*-===//
+//===------ dppl_sycl_platform_interface.cpp - dpctl-C_API  --*-- C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -36,27 +36,26 @@ using namespace cl::sycl;
 
 namespace
 {
-std::set<DPPLSyclBEType>
-get_set_of_backends ()
+std::set<DPPLSyclBackendType>
+get_set_of_non_hostbackends ()
 {
-    std::set<DPPLSyclBEType> be_set;
+    std::set<DPPLSyclBackendType> be_set;
     for (auto p : platform::get_platforms()) {
 		if(p.is_host())
-			continue;
+            continue;
         auto be = p.get_backend();
         switch (be)
         {
         case backend::host:
-            be_set.insert(DPPLSyclBEType::DPPL_HOST);
             break;
         case backend::cuda:
-            be_set.insert(DPPLSyclBEType::DPPL_CUDA);
+            be_set.insert(DPPLSyclBackendType::DPPL_CUDA);
             break;
         case backend::level_zero:
-            be_set.insert(DPPLSyclBEType::DPPL_LEVEL_ZERO);
+            be_set.insert(DPPLSyclBackendType::DPPL_LEVEL_ZERO);
             break;
         case backend::opencl:
-            be_set.insert(DPPLSyclBEType::DPPL_OPENCL);
+            be_set.insert(DPPLSyclBackendType::DPPL_OPENCL);
             break;
         default:
             break;
@@ -128,22 +127,22 @@ void DPPLPlatform_DumpInfo ()
             switch (devTy)
             {
             case info::device_type::cpu:
-            ss << "cpu" << '\n';
-            break;
+                ss << "cpu" << '\n';
+                break;
             case info::device_type::gpu:
-            ss << "gpu" << '\n';
-            break;
+                ss << "gpu" << '\n';
+                break;
             case info::device_type::accelerator:
-            ss << "accelerator" << '\n';
-            break;
+                ss << "accelerator" << '\n';
+                break;
             case info::device_type::custom:
-            ss << "custom" << '\n';
-            break;
+                ss << "custom" << '\n';
+                break;
             case info::device_type::host:
-            ss << "host" << '\n';
-            break;
+                ss << "host" << '\n';
+                break;
             default:
-            ss << "unknown" << '\n';
+                ss << "unknown" << '\n';
             }
         }
         std::cout << ss.str();
@@ -154,24 +153,30 @@ void DPPLPlatform_DumpInfo ()
 /*!
 * Returns the number of sycl::platform on the system.
 */
-size_t DPPLPlatform_GetNumPlatforms ()
+size_t DPPLPlatform_GetNumNonHostPlatforms ()
 {
-    return platform::get_platforms().size();
+	auto nNonHostPlatforms = 0ul;
+	for (auto &p : platform::get_platforms()) {
+		if (p.is_host())
+			continue;
+		++nNonHostPlatforms;
+	}
+    return nNonHostPlatforms;
 }
 
-size_t DPPLPlatform_GetNumBackends ()
+size_t DPPLPlatform_GetNumNonHostBackends ()
 {
-    return get_set_of_backends().size();
+    return get_set_of_non_hostbackends().size();
 }
 
-__dppl_give enum DPPLSyclBEType *DPPLPlatform_GetListOfBackends ()
+__dppl_give DPPLSyclBackendType *DPPLPlatform_GetListOfNonHostBackends ()
 {
-    auto be_set = get_set_of_backends();
+    auto be_set = get_set_of_non_hostbackends();
 
     if (be_set.empty())
         return nullptr;
 
-    DPPLSyclBEType *BEArr = new DPPLSyclBEType[be_set.size()];
+    DPPLSyclBackendType *BEArr = new DPPLSyclBackendType[be_set.size()];
 
     auto i = 0ul;
     for (auto be : be_set) {
@@ -182,7 +187,7 @@ __dppl_give enum DPPLSyclBEType *DPPLPlatform_GetListOfBackends ()
     return BEArr;
 }
 
-void DPPLPlatform_DeleteListOfBackends (__dppl_take enum DPPLSyclBEType *BEArr)
+void DPPLPlatform_DeleteListOfBackends (__dppl_take DPPLSyclBackendType *BEArr)
 {
     delete[] BEArr;
 }
diff --git a/backends/source/dppl_sycl_program_interface.cpp b/backends/source/dppl_sycl_program_interface.cpp
index 01d615a08b..edb8429c3f 100644
--- a/backends/source/dppl_sycl_program_interface.cpp
+++ b/backends/source/dppl_sycl_program_interface.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_program_interface.cpp - DPPL-SYCL interface --*-- C++ -*-===//
+//===----- dppl_sycl_program_interface.cpp - dpctl-C_API  ---*--- C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/source/dppl_sycl_queue_interface.cpp b/backends/source/dppl_sycl_queue_interface.cpp
index 2121a2292c..0231df8cf8 100644
--- a/backends/source/dppl_sycl_queue_interface.cpp
+++ b/backends/source/dppl_sycl_queue_interface.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_queue_interface.cpp - DPPL-SYCL interface --*- C++ -*---===//
+//===------ dppl_sycl_queue_interface.cpp - dpctl-C_API  ---*--- C++ --*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -27,6 +27,8 @@
 #include "dppl_sycl_queue_interface.h"
 #include "dppl_sycl_context_interface.h"
 #include "Support/CBindingWrapping.h"
+#include <exception>
+#include <stdexcept>
 
 #include <CL/sycl.hpp>                /* SYCL headers   */
 
@@ -72,6 +74,9 @@ bool set_kernel_arg (handler &cgh, size_t idx, __dppl_keep void *Arg,
     case DPPL_UNSIGNED_INT:
         cgh.set_arg(idx, *(unsigned int*)Arg);
         break;
+    case DPPL_UNSIGNED_INT8:
+        cgh.set_arg(idx, *(uint8_t*)Arg);
+        break;
     case DPPL_LONG:
         cgh.set_arg(idx, *(long*)Arg);
         break;
@@ -128,11 +133,18 @@ bool DPPLQueue_AreEq (__dppl_keep const DPPLSyclQueueRef QRef1,
     return (*unwrap(QRef1) == *unwrap(QRef2));
 }
 
-enum DPPLSyclBEType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef)
+DPPLSyclBackendType DPPLQueue_GetBackend (__dppl_keep DPPLSyclQueueRef QRef)
 {
     auto Q = unwrap(QRef);
-    auto C = Q->get_context();
-    return DPPLContext_GetBackend(wrap(&C));
+    try {
+        auto C = Q->get_context();
+        return DPPLContext_GetBackend(wrap(&C));
+    }
+    catch (runtime_error &re) {
+        std::cerr << re.what() << '\n';
+        // store error message
+        return DPPL_UNKNOWN_BACKEND;
+    }
 }
 
 __dppl_give DPPLSyclDeviceRef
@@ -197,11 +209,11 @@ DPPLQueue_SubmitRange (__dppl_keep const DPPLSyclKernelRef KRef,
                                          "dimensions.");
             }
         });
-    } catch (runtime_error re) {
+    } catch (runtime_error &re) {
         // \todo fix error handling
         std::cerr << re.what() << '\n';
         return nullptr;
-    } catch (std::runtime_error sre) {
+    } catch (std::runtime_error &sre) {
         std::cerr << sre.what() << '\n';
         return nullptr;
     }
@@ -258,11 +270,11 @@ DPPLQueue_SubmitNDRange(__dppl_keep const DPPLSyclKernelRef KRef,
                                          "dimensions.");
             }
         });
-    } catch (runtime_error re) {
+    } catch (runtime_error &re) {
         // \todo fix error handling
         std::cerr << re.what() << '\n';
         return nullptr;
-    } catch (std::runtime_error sre) {
+    } catch (std::runtime_error &sre) {
         std::cerr << sre.what() << '\n';
         return nullptr;
     }
diff --git a/backends/source/dppl_sycl_queue_manager.cpp b/backends/source/dppl_sycl_queue_manager.cpp
index ffe5abcff0..f708b4aea2 100644
--- a/backends/source/dppl_sycl_queue_manager.cpp
+++ b/backends/source/dppl_sycl_queue_manager.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_queue_manager.cpp - DPPL-SYCL interface --*- C++ -*---===//
+//===--------- dppl_sycl_queue_manager.cpp - dpctl-C_API  --*-- C++ ---*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -25,7 +25,6 @@
 //===----------------------------------------------------------------------===//
 #include "dppl_sycl_queue_manager.h"
 #include "Support/CBindingWrapping.h"
-#include <exception>
 #include <string>
 #include <vector>
 
@@ -54,7 +53,8 @@ class QMgrHelper
 public:
     using QVec = vector_class<queue>;
 
-    static QVec* init_queues (backend BE, info::device_type DTy) {
+    static QVec* init_queues (backend BE, info::device_type DTy)
+    {
         QVec *queues = new QVec();
         auto Platforms = platform::get_platforms();
         for (auto &p : Platforms) {
@@ -65,26 +65,72 @@ class QMgrHelper
             if (Devices.size() == 1) {
                 auto d = Devices[0];
                 auto devty = d.get_info<info::device::device_type>();
-                auto Ctx = context(d);
                 if(devty == DTy && be == BE) {
-                      queues->emplace_back(Ctx, d);
+                    auto Ctx = context(d);
+                    queues->emplace_back(Ctx, d);
                     break;
                 }
             }
             else {
-                auto Ctx = context(Devices);
+                vector_class<device> SelectedDevices;
                 for(auto &d : Devices) {
                     auto devty = d.get_info<info::device::device_type>();
                     if(devty == DTy && be == BE) {
-                        queues->emplace_back(Ctx, d);
+                        SelectedDevices.push_back(d);
+
+                        // Workaround for situations when in some environments
+                        // get_devices() returns each device TWICE. Then it fails in call
+                        // for context constructor with all doubled devices.
+                        // So use only one first device.
                         break;
                     }
                 }
+                if (SelectedDevices.size() > 0) {
+                    auto Ctx = context(SelectedDevices);
+                    auto d = SelectedDevices[0];
+                    queues->emplace_back(Ctx, d);
+                }
             }
         }
         return queues;
     }
 
+    static QVec* init_active_queues ()
+    {
+        QVec *active_queues;
+        try {
+            auto def_device = std::move(default_selector().select_device());
+            auto BE = def_device.get_platform().get_backend();
+            auto DevTy = def_device.get_info<info::device::device_type>();
+
+            // \todo : We need to have a better way to match the default device
+            // to what SYCL returns based on the same scoring logic. Just
+            // storing the first device is not correct when we will have
+            // multiple devices of same type.
+            if(BE == backend::opencl &&
+                DevTy == info::device_type::cpu) {
+                active_queues = new QVec({get_opencl_cpu_queues()[0]});
+            }
+            else if(BE == backend::opencl &&
+                DevTy == info::device_type::gpu) {
+                active_queues = new QVec({get_opencl_gpu_queues()[0]});
+            }
+            else if(BE == backend::level_zero &&
+                DevTy == info::device_type::gpu) {
+                active_queues =  new QVec({get_level0_gpu_queues()[0]});
+            }
+            else {
+                active_queues =  new QVec();
+            }
+        }
+        catch (runtime_error &re) {
+            // \todo Handle the error
+            active_queues = new QVec();
+        }
+
+        return active_queues;
+    }
+
     static QVec& get_opencl_cpu_queues ()
     {
         static QVec* queues = init_queues(backend::opencl,
@@ -108,14 +154,13 @@ class QMgrHelper
 
     static QVec& get_active_queues ()
     {
-        thread_local static QVec* active_queues =
-            new QVec({default_selector()});
+        thread_local static QVec *active_queues = init_active_queues();
         return *active_queues;
     }
 
     static __dppl_give DPPLSyclQueueRef
-    getQueue (enum DPPLSyclBEType BETy,
-              enum DPPLSyclDeviceType DeviceTy,
+    getQueue (DPPLSyclBackendType BETy,
+              DPPLSyclDeviceType DeviceTy,
               size_t DNum);
 
     static __dppl_give DPPLSyclQueueRef
@@ -124,13 +169,13 @@ class QMgrHelper
     static bool isCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef);
 
     static __dppl_give DPPLSyclQueueRef
-    setAsDefaultQueue (enum DPPLSyclBEType BETy,
-                       enum DPPLSyclDeviceType DeviceTy,
+    setAsDefaultQueue (DPPLSyclBackendType BETy,
+                       DPPLSyclDeviceType DeviceTy,
                        size_t DNum);
 
     static __dppl_give DPPLSyclQueueRef
-    pushSyclQueue (enum DPPLSyclBEType BETy,
-                   enum DPPLSyclDeviceType DeviceTy,
+    pushSyclQueue (DPPLSyclBackendType BETy,
+                   DPPLSyclDeviceType DeviceTy,
                    size_t DNum);
 
     static void
@@ -146,7 +191,7 @@ class QMgrHelper
  */
 DPPLSyclQueueRef QMgrHelper::getCurrentQueue ()
 {
-    auto activated_q = get_active_queues();
+    auto &activated_q = get_active_queues();
     if(activated_q.empty()) {
         // \todo handle error
         std::cerr << "No currently active queues.\n";
@@ -163,15 +208,15 @@ DPPLSyclQueueRef QMgrHelper::getCurrentQueue ()
  * be used for that purpose.
  */
 __dppl_give DPPLSyclQueueRef
-QMgrHelper::getQueue (enum DPPLSyclBEType BETy,
-                      enum DPPLSyclDeviceType DeviceTy,
+QMgrHelper::getQueue (DPPLSyclBackendType BETy,
+                      DPPLSyclDeviceType DeviceTy,
                       size_t DNum)
 {
     queue *QRef = nullptr;
 
     switch (BETy|DeviceTy)
     {
-    case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU:
+    case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU:
     {
         auto cpuQs = get_opencl_cpu_queues();
         if (DNum >= cpuQs.size()) {
@@ -183,7 +228,7 @@ QMgrHelper::getQueue (enum DPPLSyclBEType BETy,
         QRef = new queue(cpuQs[DNum]);
         break;
     }
-    case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU:
+    case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU:
     {
         auto gpuQs = get_opencl_gpu_queues();
         if (DNum >= gpuQs.size()) {
@@ -195,7 +240,7 @@ QMgrHelper::getQueue (enum DPPLSyclBEType BETy,
         QRef = new queue(gpuQs[DNum]);
         break;
     }
-    case DPPLSyclBEType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU:
+    case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU:
     {
         auto l0GpuQs = get_level0_gpu_queues();
         if (DNum >= l0GpuQs.size()) {
@@ -222,7 +267,7 @@ QMgrHelper::getQueue (enum DPPLSyclBEType BETy,
  */
 bool QMgrHelper::isCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef)
 {
-    auto activated_q = get_active_queues();
+    auto &activated_q = get_active_queues();
     if(activated_q.empty()) {
         // \todo handle error
         std::cerr << "No currently active queues.\n";
@@ -238,8 +283,8 @@ bool QMgrHelper::isCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef)
  * sycl::queue corresponding to the device type and device number.
  */
 __dppl_give DPPLSyclQueueRef
-QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy,
-                               enum DPPLSyclDeviceType DeviceTy,
+QMgrHelper::setAsDefaultQueue (DPPLSyclBackendType BETy,
+                               DPPLSyclDeviceType DeviceTy,
                                size_t DNum)
 {
     queue *QRef = nullptr;
@@ -251,7 +296,7 @@ QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy,
 
     switch (BETy|DeviceTy)
     {
-    case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU:
+    case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU:
     {
         auto oclcpu_q = get_opencl_cpu_queues();
         if (DNum >= oclcpu_q.size()) {
@@ -263,7 +308,7 @@ QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy,
         activeQ[0] = oclcpu_q[DNum];
         break;
     }
-   case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU:
+   case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU:
     {
         auto oclgpu_q = get_opencl_gpu_queues();
         if (DNum >= oclgpu_q.size()) {
@@ -275,7 +320,7 @@ QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy,
         activeQ[0] = oclgpu_q[DNum];
         break;
     }
-    case DPPLSyclBEType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU:
+    case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU:
     {
         auto l0gpu_q = get_level0_gpu_queues();
         if (DNum >= l0gpu_q.size()) {
@@ -305,8 +350,8 @@ QMgrHelper::setAsDefaultQueue (enum DPPLSyclBEType BETy,
  * purpose.
  */
 __dppl_give DPPLSyclQueueRef
-QMgrHelper::pushSyclQueue (enum DPPLSyclBEType BETy,
-                           enum DPPLSyclDeviceType DeviceTy,
+QMgrHelper::pushSyclQueue (DPPLSyclBackendType BETy,
+                           DPPLSyclDeviceType DeviceTy,
                            size_t DNum)
 {
     queue *QRef = nullptr;
@@ -318,7 +363,7 @@ QMgrHelper::pushSyclQueue (enum DPPLSyclBEType BETy,
 
     switch (BETy|DeviceTy)
     {
-    case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU:
+    case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU:
     {
         if (DNum >= get_opencl_cpu_queues().size()) {
             // \todo handle error
@@ -330,7 +375,7 @@ QMgrHelper::pushSyclQueue (enum DPPLSyclBEType BETy,
         QRef = new queue(activeQ[activeQ.size()-1]);
         break;
     }
-    case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU:
+    case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU:
     {
         if (DNum >= get_opencl_gpu_queues().size()) {
             // \todo handle error
@@ -342,7 +387,7 @@ QMgrHelper::pushSyclQueue (enum DPPLSyclBEType BETy,
         QRef = new queue(activeQ[get_active_queues().size()-1]);
         break;
     }
-    case DPPLSyclBEType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU:
+    case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU:
     {
         if (DNum >= get_level0_gpu_queues().size()) {
             // \todo handle error
@@ -405,20 +450,20 @@ size_t DPPLQueueMgr_GetNumActivatedQueues ()
  * Returns the number of available queues for a specific backend and device
  * type combination.
  */
-size_t DPPLQueueMgr_GetNumQueues (enum DPPLSyclBEType BETy,
-                                  enum DPPLSyclDeviceType DeviceTy)
+size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy,
+                                  DPPLSyclDeviceType DeviceTy)
 {
     switch (BETy|DeviceTy)
     {
-    case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU:
+    case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_CPU:
     {
         return QMgrHelper::get_opencl_cpu_queues().size();
     }
-    case DPPLSyclBEType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU:
+    case DPPLSyclBackendType::DPPL_OPENCL | DPPLSyclDeviceType::DPPL_GPU:
     {
         return QMgrHelper::get_opencl_gpu_queues().size();
     }
-    case DPPLSyclBEType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU:
+    case DPPLSyclBackendType::DPPL_LEVEL_ZERO | DPPLSyclDeviceType::DPPL_GPU:
     {
         return QMgrHelper::get_level0_gpu_queues().size();
     }
@@ -443,8 +488,8 @@ DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue ()
  * Returns a copy of a sycl::queue corresponding to the specified device type
  * and device number. A runtime_error gets thrown if no such device exists.
  */
-DPPLSyclQueueRef DPPLQueueMgr_GetQueue (enum DPPLSyclBEType BETy,
-                                        enum DPPLSyclDeviceType DeviceTy,
+DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy,
+                                        DPPLSyclDeviceType DeviceTy,
                                         size_t DNum)
 {
     return QMgrHelper::getQueue(BETy, DeviceTy, DNum);
@@ -464,8 +509,8 @@ bool DPPLQueueMgr_IsCurrentQueue (__dppl_keep const DPPLSyclQueueRef QRef)
  * device, Null is returned.
  */
 __dppl_give DPPLSyclQueueRef
-DPPLQueueMgr_SetAsDefaultQueue (enum DPPLSyclBEType BETy,
-                                enum DPPLSyclDeviceType DeviceTy,
+DPPLQueueMgr_SetAsDefaultQueue (DPPLSyclBackendType BETy,
+                                DPPLSyclDeviceType DeviceTy,
                                 size_t DNum)
 {
     return QMgrHelper::setAsDefaultQueue(BETy, DeviceTy, DNum);
@@ -475,8 +520,8 @@ DPPLQueueMgr_SetAsDefaultQueue (enum DPPLSyclBEType BETy,
  * \see QMgrHelper::pushSyclQueue()
  */
 __dppl_give DPPLSyclQueueRef
-DPPLQueueMgr_PushQueue (enum DPPLSyclBEType BETy,
-                        enum DPPLSyclDeviceType DeviceTy,
+DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy,
+                        DPPLSyclDeviceType DeviceTy,
                         size_t DNum)
 {
     return QMgrHelper::pushSyclQueue(BETy, DeviceTy, DNum);
diff --git a/backends/source/dppl_sycl_usm_interface.cpp b/backends/source/dppl_sycl_usm_interface.cpp
index 54ff95efc6..959398f462 100644
--- a/backends/source/dppl_sycl_usm_interface.cpp
+++ b/backends/source/dppl_sycl_usm_interface.cpp
@@ -1,6 +1,6 @@
-//===--- dppl_sycl_usm_interface.cpp - DPPL-SYCL interface --*- C++ -*---===//
+//===------- dppl_sycl_usm_interface.cpp - dpctl-C_API  ---*--- C++ ---*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -73,10 +73,10 @@ void DPPLfree_with_queue (__dppl_take DPPLSyclUSMRef MRef,
 }
 
 void DPPLfree_with_context (__dppl_take DPPLSyclUSMRef MRef,
-                            __dppl_keep const DPPLSyclContextRef СRef)
+                            __dppl_keep const DPPLSyclContextRef CRef)
 {
     auto Ptr = unwrap(MRef);
-    auto C = unwrap(СRef);
+    auto C = unwrap(CRef);
     free(Ptr, *C);
 }
 
diff --git a/backends/source/dppl_utils.cpp b/backends/source/dppl_utils.cpp
index f18bd94f78..6468809070 100644
--- a/backends/source/dppl_utils.cpp
+++ b/backends/source/dppl_utils.cpp
@@ -1,6 +1,6 @@
-//===--------- dppl_utils.cpp - DPPL-SYCL interface ----*---- C++ ----*----===//
+//===-------------- dppl_utils.cpp - dpctl-C_API  ----*---- C++ -----*-----===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -33,4 +33,4 @@ void DPPLCString_Delete (__dppl_take const char* str)
 void DPPLSize_t_Array_Delete (__dppl_take size_t* arr)
 {
     delete[] arr;
-}
\ No newline at end of file
+}
diff --git a/backends/tests/CMakeLists.txt b/backends/tests/CMakeLists.txt
index 86233ef7d5..d0efc5ebfe 100644
--- a/backends/tests/CMakeLists.txt
+++ b/backends/tests/CMakeLists.txt
@@ -3,9 +3,9 @@ string(COMPARE EQUAL "${GTEST_LIB_DIR}" "" no_gtest_lib_dir)
 
 if(${no_gtest_incl_dir} OR ${no_gtest_lib_dir})
     message(WARNING
-            "GTest is needed to test PyDPPL's backend test cases.  Pass in \
+            "GTest is needed to test dpCtl's C API test cases.  Pass in \
             -DGTEST_INCLUDE_DIR and -DGTEST_LIB_DIR when you configure Cmake if\
-             you wish to run PyDPPL backend tests."
+             you wish to run dpCtl backend tests."
     )
 else()
   # We need thread support for gtest
@@ -22,7 +22,7 @@ else()
 
   link_directories(${GTEST_LIB_DIR})
 
-  set(PYDPPL_BACKEND_TEST_CASES
+  set(DPCTL_C_API_TEST_CASES
     test_sycl_device_interface
     test_sycl_kernel_interface
     test_sycl_platform_interface
@@ -39,7 +39,7 @@ else()
     file(COPY ${tf} DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
   endforeach()
 
-  foreach(TEST_CASE ${PYDPPL_BACKEND_TEST_CASES})
+  foreach(TEST_CASE ${DPCTL_C_API_TEST_CASES})
     add_executable(${TEST_CASE} EXCLUDE_FROM_ALL ${TEST_CASE}.cpp)
     target_link_libraries(
       ${TEST_CASE} ${CMAKE_THREAD_LIBS_INIT} gtest DPPLSyclInterface
diff --git a/backends/tests/test_sycl_device_interface.cpp b/backends/tests/test_sycl_device_interface.cpp
index 4f209d5b4f..43da260e39 100644
--- a/backends/tests/test_sycl_device_interface.cpp
+++ b/backends/tests/test_sycl_device_interface.cpp
@@ -1,4 +1,4 @@
-//===----- test_sycl_device_interface.cpp - DPPL-SYCL interface -*- C++ -*-===//
+//===----- test_sycl_device_interface.cpp - dpctl-C_API interface -*- C++ -*-===//
 //
 //               Python Data Parallel Processing Library (PyDPPL)
 //
@@ -20,7 +20,7 @@
 ///
 /// \file
 /// This file has unit test cases for functions defined in
-/// dppl_sycl_kernel_interface.h.
+/// dppl_sycl_device_interface.h.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -39,6 +39,7 @@ struct TestDPPLSyclDeviceInterface : public ::testing::Test
 {
     DPPLSyclDeviceRef OpenCL_cpu = nullptr;
     DPPLSyclDeviceRef OpenCL_gpu = nullptr;
+    DPPLSyclDeviceRef OpenCL_Level0_gpu = nullptr;
 
     TestDPPLSyclDeviceInterface ()
     {
@@ -53,12 +54,19 @@ struct TestDPPLSyclDeviceInterface : public ::testing::Test
             OpenCL_gpu = DPPLQueue_GetDevice(Q);
             DPPLQueue_Delete(Q);
         }
+
+        if(DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU)) {
+            auto Q = DPPLQueueMgr_GetQueue(DPPL_LEVEL_ZERO, DPPL_GPU, 0);
+            OpenCL_Level0_gpu = DPPLQueue_GetDevice(Q);
+            DPPLQueue_Delete(Q);
+        }
     }
 
     ~TestDPPLSyclDeviceInterface ()
     {
         DPPLDevice_Delete(OpenCL_cpu);
         DPPLDevice_Delete(OpenCL_gpu);
+        DPPLDevice_Delete(OpenCL_Level0_gpu);
     }
 
 };
@@ -83,22 +91,41 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetDriverInfo)
     DPPLCString_Delete(DriverInfo);
 }
 
-TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxComputeUnites)
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetDriverInfo)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto DriverInfo = DPPLDevice_GetDriverInfo(OpenCL_Level0_gpu);
+    EXPECT_TRUE(DriverInfo != nullptr);
+    DPPLCString_Delete(DriverInfo);
+}
+
+TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxComputeUnits)
 {
     if(!OpenCL_cpu)
         GTEST_SKIP_("Skipping as no OpenCL CPU device found.");
 
-    auto n = DPPLDevice_GetMaxComputeUnites(OpenCL_cpu);
-    EXPECT_TRUE(n != 0);
+    auto n = DPPLDevice_GetMaxComputeUnits(OpenCL_cpu);
+    EXPECT_TRUE(n > 0);
 }
 
-TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxComputeUnites)
+TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxComputeUnits)
 {
     if(!OpenCL_gpu)
         GTEST_SKIP_("Skipping as no OpenCL GPU device found.");
 
-    auto n = DPPLDevice_GetMaxComputeUnites(OpenCL_gpu);
-    EXPECT_TRUE(n != 0);
+    auto n = DPPLDevice_GetMaxComputeUnits(OpenCL_gpu);
+    EXPECT_TRUE(n > 0);
+}
+
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxComputeUnits)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto n = DPPLDevice_GetMaxComputeUnits(OpenCL_Level0_gpu);
+    EXPECT_TRUE(n > 0);
 }
 
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxWorkItemDims)
@@ -107,7 +134,7 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxWorkItemDims)
         GTEST_SKIP_("Skipping as no OpenCL CPU device found.");
 
     auto n = DPPLDevice_GetMaxWorkItemDims(OpenCL_cpu);
-    EXPECT_TRUE(n != 0);
+    EXPECT_TRUE(n > 0);
 }
 
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkItemDims)
@@ -116,26 +143,55 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkItemDims)
         GTEST_SKIP_("Skipping as no OpenCL GPU device found.");
 
     auto n = DPPLDevice_GetMaxWorkItemDims(OpenCL_gpu);
-    EXPECT_TRUE(n != 0);
+    EXPECT_TRUE(n > 0);
+}
+
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxWorkItemDims)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto n = DPPLDevice_GetMaxWorkItemDims(OpenCL_Level0_gpu);
+    EXPECT_TRUE(n > 0);
 }
 
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxWorkItemSizes)
 {
     if(!OpenCL_cpu)
-        GTEST_SKIP_("Skipping as no OpenCL GPU device found.");
+        GTEST_SKIP_("Skipping as no OpenCL CPU device found.");
 
     auto item_sizes = DPPLDevice_GetMaxWorkItemSizes(OpenCL_cpu);
     EXPECT_TRUE(item_sizes != nullptr);
     DPPLSize_t_Array_Delete(item_sizes);
 }
 
+TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkItemSizes)
+{
+    if(!OpenCL_gpu)
+        GTEST_SKIP_("Skipping as no OpenCL GPU device found.");
+
+    auto item_sizes = DPPLDevice_GetMaxWorkItemSizes(OpenCL_gpu);
+    EXPECT_TRUE(item_sizes != nullptr);
+    DPPLSize_t_Array_Delete(item_sizes);
+}
+
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxWorkItemSizes)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto item_sizes = DPPLDevice_GetMaxWorkItemSizes(OpenCL_Level0_gpu);
+    EXPECT_TRUE(item_sizes != nullptr);
+    DPPLSize_t_Array_Delete(item_sizes);
+}
+
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxWorkGroupSize)
 {
     if(!OpenCL_cpu)
         GTEST_SKIP_("Skipping as no OpenCL CPU device found.");
 
     auto n = DPPLDevice_GetMaxWorkGroupSize(OpenCL_cpu);
-    EXPECT_TRUE(n != 0);
+    EXPECT_TRUE(n > 0);
 }
 
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkGroupSize)
@@ -144,7 +200,16 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxWorkGroupSize)
         GTEST_SKIP_("Skipping as no OpenCL GPU device found.");
 
     auto n = DPPLDevice_GetMaxWorkGroupSize(OpenCL_gpu);
-    EXPECT_TRUE(n != 0);
+    EXPECT_TRUE(n > 0);
+}
+
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxWorkGroupSize)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto n = DPPLDevice_GetMaxWorkGroupSize(OpenCL_Level0_gpu);
+    EXPECT_TRUE(n > 0);
 }
 
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxNumSubGroups)
@@ -153,7 +218,7 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetMaxNumSubGroups)
         GTEST_SKIP_("Skipping as no OpenCL CPU device found.");
 
     auto n = DPPLDevice_GetMaxNumSubGroups(OpenCL_cpu);
-    EXPECT_TRUE(n != 0);
+    EXPECT_TRUE(n > 0);
 }
 
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxNumSubGroups)
@@ -162,7 +227,88 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetMaxNumSubGroups)
         GTEST_SKIP_("Skipping as no OpenCL GPU device found.");
 
     auto n = DPPLDevice_GetMaxNumSubGroups(OpenCL_gpu);
-    EXPECT_TRUE(n != 0);
+    EXPECT_TRUE(n > 0);
+}
+
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetMaxNumSubGroups)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto n = DPPLDevice_GetMaxNumSubGroups(OpenCL_Level0_gpu);
+    EXPECT_TRUE(n > 0);
+}
+
+//TODO: Update when DPC++ properly supports aspects
+TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_HasInt64BaseAtomics)
+{
+    if(!OpenCL_cpu)
+        GTEST_SKIP_("Skipping as no OpenCL CPU device found.");
+
+    auto atomics = DPPLDevice_HasInt64BaseAtomics(OpenCL_cpu);
+    auto D = reinterpret_cast<device*>(OpenCL_cpu);
+    auto has_atomics= D->has(aspect::int64_base_atomics);
+    EXPECT_TRUE(has_atomics == atomics);
+}
+
+//TODO: Update when DPC++ properly supports aspects
+TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_HasInt64BaseAtomics)
+{
+    if(!OpenCL_gpu)
+        GTEST_SKIP_("Skipping as no OpenCL GPU device found.");
+
+    auto atomics = DPPLDevice_HasInt64BaseAtomics(OpenCL_gpu);
+    auto D = reinterpret_cast<device*>(OpenCL_gpu);
+    auto has_atomics= D->has(aspect::int64_base_atomics);
+    EXPECT_TRUE(has_atomics == atomics);
+}
+
+//TODO: Update when DPC++ properly supports aspects
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_HasInt64BaseAtomics)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto atomics = DPPLDevice_HasInt64BaseAtomics(OpenCL_Level0_gpu);
+    auto D = reinterpret_cast<device*>(OpenCL_Level0_gpu);
+    auto has_atomics= D->has(aspect::int64_base_atomics);
+    EXPECT_TRUE(has_atomics == atomics);
+}
+
+//TODO: Update when DPC++ properly supports aspects
+TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_HasInt64ExtendedAtomics)
+{
+    if(!OpenCL_cpu)
+        GTEST_SKIP_("Skipping as no OpenCL CPU device found.");
+
+    auto atomics = DPPLDevice_HasInt64ExtendedAtomics(OpenCL_cpu);
+    auto D = reinterpret_cast<device*>(OpenCL_cpu);
+    auto has_atomics= D->has(aspect::int64_extended_atomics);
+    EXPECT_TRUE(has_atomics == atomics);
+}
+
+//TODO: Update when DPC++ properly supports aspects
+TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_HasInt64ExtendedAtomics)
+{
+    if(!OpenCL_gpu)
+        GTEST_SKIP_("Skipping as no OpenCL GPU device found.");
+
+    auto atomics = DPPLDevice_HasInt64ExtendedAtomics(OpenCL_gpu);
+    auto D = reinterpret_cast<device*>(OpenCL_gpu);
+    auto has_atomics= D->has(aspect::int64_extended_atomics);
+    EXPECT_TRUE(has_atomics == atomics);
+}
+
+//TODO: Update when DPC++ properly supports aspects
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_HasInt64ExtendedAtomics)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto atomics = DPPLDevice_HasInt64ExtendedAtomics(OpenCL_Level0_gpu);
+    auto D = reinterpret_cast<device*>(OpenCL_Level0_gpu);
+    auto has_atomics= D->has(aspect::int64_extended_atomics);
+    EXPECT_TRUE(has_atomics == atomics);
 }
 
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetName)
@@ -185,6 +331,16 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetName)
     DPPLCString_Delete(DevName);
 }
 
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetName)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto DevName = DPPLDevice_GetName(OpenCL_Level0_gpu);
+    EXPECT_TRUE(DevName != nullptr);
+    DPPLCString_Delete(DevName);
+}
+
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_GetVendorName)
 {
     if(!OpenCL_cpu)
@@ -205,6 +361,16 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_GetVendorName)
     DPPLCString_Delete(VendorName);
 }
 
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_GetVendorName)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    auto VendorName = DPPLDevice_GetVendorName(OpenCL_Level0_gpu);
+    EXPECT_TRUE(VendorName != nullptr);
+    DPPLCString_Delete(VendorName);
+}
+
 TEST_F (TestDPPLSyclDeviceInterface, CheckOCLCPU_IsCPU)
 {
     if(!OpenCL_cpu)
@@ -221,10 +387,18 @@ TEST_F (TestDPPLSyclDeviceInterface, CheckOCLGPU_IsGPU)
     EXPECT_TRUE(DPPLDevice_IsGPU(OpenCL_gpu));
 }
 
+TEST_F (TestDPPLSyclDeviceInterface, CheckLevel0GPU_IsGPU)
+{
+    if(!OpenCL_Level0_gpu)
+        GTEST_SKIP_("Skipping as no Level0 GPU device found.");
+
+    EXPECT_TRUE(DPPLDevice_IsGPU(OpenCL_Level0_gpu));
+}
+
 int
 main (int argc, char** argv)
 {
     ::testing::InitGoogleTest(&argc, argv);
     int ret = RUN_ALL_TESTS();
     return ret;
-}
\ No newline at end of file
+}
diff --git a/backends/tests/test_sycl_kernel_interface.cpp b/backends/tests/test_sycl_kernel_interface.cpp
index b07592c202..8ef4d8d951 100644
--- a/backends/tests/test_sycl_kernel_interface.cpp
+++ b/backends/tests/test_sycl_kernel_interface.cpp
@@ -1,6 +1,6 @@
-//===---- test_sycl_program_interface.cpp - DPPL-SYCL interface -*- C++ -*-===//
+//===-------- test_sycl_program_interface.cpp - dpctl-C_API -*- C++ ---*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/tests/test_sycl_platform_interface.cpp b/backends/tests/test_sycl_platform_interface.cpp
index 06891cf6f0..fed815e9bf 100644
--- a/backends/tests/test_sycl_platform_interface.cpp
+++ b/backends/tests/test_sycl_platform_interface.cpp
@@ -1,6 +1,6 @@
-//===--- test_sycl_platform_interface.cpp - DPPL-SYCL interface -*- C++ -*-===//
+//===------- test_sycl_platform_interface.cpp - dpctl-C_API --*-- C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -31,26 +31,31 @@ struct TestDPPLSyclPlatformInterface : public ::testing::Test
 
 TEST_F (TestDPPLSyclPlatformInterface, CheckGetNumPlatforms)
 {
-    auto nplatforms = DPPLPlatform_GetNumPlatforms();
+    auto nplatforms = DPPLPlatform_GetNumNonHostPlatforms();
     EXPECT_GE(nplatforms, 0);
 }
 
 TEST_F (TestDPPLSyclPlatformInterface, GetNumBackends)
 {
-    auto nbackends = DPPLPlatform_GetNumBackends();
+    auto nbackends = DPPLPlatform_GetNumNonHostBackends();
     EXPECT_GE(nbackends, 0);
 }
 
 TEST_F (TestDPPLSyclPlatformInterface, GetListOfBackends)
 {
-    auto nbackends = DPPLPlatform_GetNumBackends();
-    auto backends = DPPLPlatform_GetListOfBackends();
-	EXPECT_TRUE(backends != nullptr);
+    auto nbackends = DPPLPlatform_GetNumNonHostBackends();
+
+    if(!nbackends)
+      GTEST_SKIP_("No non host backends available");
+
+    auto backends = DPPLPlatform_GetListOfNonHostBackends();
+	  EXPECT_TRUE(backends != nullptr);
     for(auto i = 0ul; i < nbackends; ++i) {
         EXPECT_TRUE(
-          backends[i] == DPPLSyclBEType::DPPL_CUDA   ||
-          backends[i] == DPPLSyclBEType::DPPL_OPENCL ||
-          backends[i] == DPPLSyclBEType::DPPL_LEVEL_ZERO);
+          backends[i] == DPPLSyclBackendType::DPPL_CUDA   ||
+          backends[i] == DPPLSyclBackendType::DPPL_OPENCL ||
+          backends[i] == DPPLSyclBackendType::DPPL_LEVEL_ZERO
+          );
     }
 	DPPLPlatform_DeleteListOfBackends(backends);
 }
diff --git a/backends/tests/test_sycl_program_interface.cpp b/backends/tests/test_sycl_program_interface.cpp
index 027a81f6c3..b32f7b7ab5 100644
--- a/backends/tests/test_sycl_program_interface.cpp
+++ b/backends/tests/test_sycl_program_interface.cpp
@@ -1,6 +1,6 @@
-//===---- test_sycl_program_interface.cpp - DPPL-SYCL interface -*- C++ -*-===//
+//===---------- test_sycl_program_interface.cpp - dpctl-C_API --*-- C++ -*-===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
diff --git a/backends/tests/test_sycl_queue_interface.cpp b/backends/tests/test_sycl_queue_interface.cpp
index 0a1711e622..2d68bdcdec 100644
--- a/backends/tests/test_sycl_queue_interface.cpp
+++ b/backends/tests/test_sycl_queue_interface.cpp
@@ -1,6 +1,6 @@
-//===---- test_sycl_queue_interface.cpp - DPPL-SYCL interface -*- C++ --*--===//
+//===-------- test_sycl_queue_interface.cpp - dpctl-C_API ---*--- C++ --*--===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -31,32 +31,48 @@
 #include "dppl_sycl_queue_interface.h"
 #include "dppl_sycl_queue_manager.h"
 #include "dppl_sycl_usm_interface.h"
-
 #include "Support/CBindingWrapping.h"
-
+#include <CL/sycl.hpp>
 #include <gtest/gtest.h>
 
+using namespace cl::sycl;
+
 namespace
 {
-    constexpr size_t SIZE = 1024;
+constexpr size_t SIZE = 1024;
 
-    DEFINE_SIMPLE_CONVERSION_FUNCTIONS(void, DPPLSyclUSMRef);
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(void, DPPLSyclUSMRef);
 
-    void add_kernel_checker (const float *a, const float *b, const float *c)
-    {
-        // Validate the data
-        for(auto i = 0ul; i < SIZE; ++i) {
-            EXPECT_EQ(c[i], a[i] + b[i]);
-        }
+void add_kernel_checker (const float *a, const float *b, const float *c)
+{
+    // Validate the data
+    for(auto i = 0ul; i < SIZE; ++i) {
+        EXPECT_EQ(c[i], a[i] + b[i]);
     }
+}
 
-    void axpy_kernel_checker (const float *a, const float *b, const float *c,
-                              float d)
-    {
-        for(auto i = 0ul; i < SIZE; ++i) {
-            EXPECT_EQ(c[i], a[i] + d*b[i]);
+void axpy_kernel_checker (const float *a, const float *b, const float *c,
+                            float d)
+{
+    for(auto i = 0ul; i < SIZE; ++i) {
+        EXPECT_EQ(c[i], a[i] + d*b[i]);
+    }
+}
+
+bool has_devices ()
+{
+    bool ret = false;
+    for (auto &p : platform::get_platforms()) {
+        if (p.is_host())
+            continue;
+        if(!p.get_devices().empty()) {
+            ret = true;
+            break;
         }
     }
+    return ret;
+}
+
 }
 
 struct TestDPPLSyclQueueInterface : public ::testing::Test
@@ -89,30 +105,30 @@ struct TestDPPLSyclQueueInterface : public ::testing::Test
 
 TEST_F (TestDPPLSyclQueueInterface, CheckAreEq)
 {
-    auto Q1 = DPPLQueueMgr_GetCurrentQueue();
-    auto Q2 = DPPLQueueMgr_GetCurrentQueue();
-    EXPECT_TRUE(DPPLQueue_AreEq(Q1, Q2));
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
 
-    auto nOclGPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBEType::DPPL_OPENCL,
+    auto nOclGPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBackendType::DPPL_OPENCL,
                                              DPPLSyclDeviceType::DPPL_GPU);
-    auto nOclCPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBEType::DPPL_OPENCL,
-                                             DPPLSyclDeviceType::DPPL_CPU);
-    {
     if(!nOclGPU)
-        GTEST_SKIP_("No OpenCL GPUs available.\n");
+        GTEST_SKIP_("Skipping: No OpenCL GPUs available.\n");
+
+    auto Q1 = DPPLQueueMgr_GetCurrentQueue();
+    auto Q2 = DPPLQueueMgr_GetCurrentQueue();
+    EXPECT_TRUE(DPPLQueue_AreEq(Q1, Q2));
 
     auto Def_Q = DPPLQueueMgr_SetAsDefaultQueue(
-                    DPPLSyclBEType::DPPL_OPENCL,
+                    DPPLSyclBackendType::DPPL_OPENCL,
                     DPPLSyclDeviceType::DPPL_GPU,
                     0
                  );
     auto OclGPU_Q0 = DPPLQueueMgr_PushQueue(
-                        DPPLSyclBEType::DPPL_OPENCL,
+                        DPPLSyclBackendType::DPPL_OPENCL,
                         DPPLSyclDeviceType::DPPL_GPU,
                         0
                     );
     auto OclGPU_Q1 = DPPLQueueMgr_PushQueue(
-                        DPPLSyclBEType::DPPL_OPENCL,
+                        DPPLSyclBackendType::DPPL_OPENCL,
                         DPPLSyclDeviceType::DPPL_GPU,
                         0
                     );
@@ -124,30 +140,39 @@ TEST_F (TestDPPLSyclQueueInterface, CheckAreEq)
     DPPLQueue_Delete(OclGPU_Q1);
     DPPLQueueMgr_PopQueue();
     DPPLQueueMgr_PopQueue();
-    }
+}
+
+TEST_F (TestDPPLSyclQueueInterface, CheckAreEq2)
+{
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
 
-    {
+    auto nOclGPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBackendType::DPPL_OPENCL,
+                                             DPPLSyclDeviceType::DPPL_GPU);
+    auto nOclCPU = DPPLQueueMgr_GetNumQueues(DPPLSyclBackendType::DPPL_OPENCL,
+                                             DPPLSyclDeviceType::DPPL_CPU);
     if(!nOclGPU || !nOclCPU)
         GTEST_SKIP_("OpenCL GPUs and CPU not available.\n");
     auto GPU_Q = DPPLQueueMgr_PushQueue(
-                    DPPLSyclBEType::DPPL_OPENCL,
+                    DPPLSyclBackendType::DPPL_OPENCL,
                     DPPLSyclDeviceType::DPPL_GPU,
                     0
                 );
     auto CPU_Q = DPPLQueueMgr_PushQueue(
-                    DPPLSyclBEType::DPPL_OPENCL,
+                    DPPLSyclBackendType::DPPL_OPENCL,
                     DPPLSyclDeviceType::DPPL_CPU,
                     0
                 );
     EXPECT_FALSE(DPPLQueue_AreEq(GPU_Q, CPU_Q));
     DPPLQueueMgr_PopQueue();
     DPPLQueueMgr_PopQueue();
-    }
-
 }
 
 TEST_F (TestDPPLSyclQueueInterface, CheckGetBackend)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     auto Q1 = DPPLQueueMgr_GetCurrentQueue();
     auto BE = DPPLQueue_GetBackend(Q1);
     EXPECT_TRUE((BE == DPPL_OPENCL) ||
@@ -178,6 +203,9 @@ TEST_F (TestDPPLSyclQueueInterface, CheckGetBackend)
 
 TEST_F (TestDPPLSyclQueueInterface, CheckGetContext)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     auto Q1 = DPPLQueueMgr_GetCurrentQueue();
     auto Ctx = DPPLQueue_GetContext(Q1);
     ASSERT_TRUE(Ctx != nullptr);
@@ -212,6 +240,9 @@ TEST_F (TestDPPLSyclQueueInterface, CheckGetContext)
 
 TEST_F (TestDPPLSyclQueueInterface, CheckGetDevice)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     auto Q1 = DPPLQueueMgr_GetCurrentQueue();
     auto D = DPPLQueue_GetDevice(Q1);
     ASSERT_TRUE(D != nullptr);
@@ -249,10 +280,13 @@ TEST_F (TestDPPLSyclQueueInterface, CheckGetDevice)
 
 TEST_F (TestDPPLSyclQueueInterface, CheckSubmit)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     auto nOpenCLGpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU);
 
     if(!nOpenCLGpuQ)
-        GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n");
+        GTEST_SKIP_("Skipping: No OpenCL GPU device.\n");
 
     auto Queue  = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0);
     auto CtxRef = DPPLQueue_GetContext(Queue);
diff --git a/backends/tests/test_sycl_queue_manager.cpp b/backends/tests/test_sycl_queue_manager.cpp
index 675fc01c4c..55f8cb725d 100644
--- a/backends/tests/test_sycl_queue_manager.cpp
+++ b/backends/tests/test_sycl_queue_manager.cpp
@@ -1,6 +1,6 @@
-//===--- test_sycl_queue_manager.cpp - DPPL-SYCL interface --*- C++ ---*---===//
+//===------- test_sycl_queue_manager.cpp - dpctl-C_API ---*--- C++ ----*---===//
 //
-//               Python Data Parallel Processing Library (PyDPPL)
+//               Data Parallel Control Library (dpCtl)
 //
 // Copyright 2020 Intel Corporation
 //
@@ -37,26 +37,41 @@ using namespace cl::sycl;
 
 namespace
 {
-    void foo (size_t & num)
-    {
-        auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0);
-        auto q2 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0);
-        // Capture the number of active queues in first
-        num = DPPLQueueMgr_GetNumActivatedQueues();
-        DPPLQueueMgr_PopQueue();
-        DPPLQueueMgr_PopQueue();
-        DPPLQueue_Delete(q1);
-        DPPLQueue_Delete(q2);
-    }
+void foo (size_t & num)
+{
+    auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0);
+    auto q2 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0);
+    // Capture the number of active queues in first
+    num = DPPLQueueMgr_GetNumActivatedQueues();
+    DPPLQueueMgr_PopQueue();
+    DPPLQueueMgr_PopQueue();
+    DPPLQueue_Delete(q1);
+    DPPLQueue_Delete(q2);
+}
 
-    void bar (size_t & num)
-    {
-        auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0);
-        // Capture the number of active queues in second
-        num = DPPLQueueMgr_GetNumActivatedQueues();
-        DPPLQueueMgr_PopQueue();
-        DPPLQueue_Delete(q1);
+void bar (size_t & num)
+{
+    auto q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0);
+    // Capture the number of active queues in second
+    num = DPPLQueueMgr_GetNumActivatedQueues();
+    DPPLQueueMgr_PopQueue();
+    DPPLQueue_Delete(q1);
+}
+
+bool has_devices ()
+{
+    bool ret = false;
+    for (auto &p : platform::get_platforms()) {
+        if (p.is_host())
+            continue;
+        if(!p.get_devices().empty()) {
+            ret = true;
+            break;
+        }
     }
+    return ret;
+}
+
 }
 
 struct TestDPPLSyclQueueManager : public ::testing::Test
@@ -65,6 +80,9 @@ struct TestDPPLSyclQueueManager : public ::testing::Test
 
 TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetCurrentQueue)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     DPPLSyclQueueRef q;
     ASSERT_NO_THROW(q = DPPLQueueMgr_GetCurrentQueue());
     ASSERT_TRUE(q != nullptr);
@@ -73,9 +91,12 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetCurrentQueue)
 
 TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLCpuQ)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     auto nOpenCLCpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU);
     if(!nOpenCLCpuQ)
-        GTEST_SKIP_("Skipping as no OpenCL CPU device found.");
+        GTEST_SKIP_("Skipping: No OpenCL CPU device found.");
 
     auto q = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_CPU, 0);
     EXPECT_TRUE(q != nullptr);
@@ -94,9 +115,12 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLCpuQ)
 
 TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLGpuQ)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     auto nOpenCLGpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU);
     if(!nOpenCLGpuQ)
-        GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n");
+        GTEST_SKIP_("Skipping: No OpenCL GPU device found.\n");
 
     auto q = DPPLQueueMgr_GetQueue(DPPL_OPENCL, DPPL_GPU, 0);
     EXPECT_TRUE(q != nullptr);
@@ -115,9 +139,12 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetOpenCLGpuQ)
 
 TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetLevel0GpuQ)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     auto nL0GpuQ = DPPLQueueMgr_GetNumQueues(DPPL_LEVEL_ZERO, DPPL_GPU);
     if(!nL0GpuQ)
-        GTEST_SKIP_("Skipping as no OpenCL GPU device found.\n");
+        GTEST_SKIP_("Skipping: No OpenCL GPU device found.\n");
 
     auto q = DPPLQueueMgr_GetQueue(DPPL_LEVEL_ZERO, DPPL_GPU, 0);
     EXPECT_TRUE(q != nullptr);
@@ -136,6 +163,9 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLGetLevel0GpuQ)
 
 TEST_F (TestDPPLSyclQueueManager, CheckGetNumActivatedQueues)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
+
     size_t num0, num1, num2, num4;
 
     auto nOpenCLCpuQ = DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU);
@@ -172,6 +202,8 @@ TEST_F (TestDPPLSyclQueueManager, CheckGetNumActivatedQueues)
 
 TEST_F (TestDPPLSyclQueueManager, CheckDPPLDumpDeviceInfo)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
     auto q = DPPLQueueMgr_GetCurrentQueue();
     EXPECT_NO_FATAL_FAILURE(DPPLDevice_DumpInfo(DPPLQueue_GetDevice(q)));
     EXPECT_NO_FATAL_FAILURE(DPPLQueue_Delete(q));
@@ -179,20 +211,39 @@ TEST_F (TestDPPLSyclQueueManager, CheckDPPLDumpDeviceInfo)
 
 TEST_F (TestDPPLSyclQueueManager, CheckIsCurrentQueue)
 {
+    if(!has_devices())
+        GTEST_SKIP_("Skipping: No Sycl devices.\n");
     if(!DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU))
-        GTEST_SKIP_("No OpenCL GPU.\n");
+        GTEST_SKIP_("Skipping: No OpenCL GPU.\n");
 
     auto Q0 = DPPLQueueMgr_GetCurrentQueue();
     EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q0));
-    auto Q = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0);
-    EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q));
-    EXPECT_FALSE(DPPLQueueMgr_IsCurrentQueue(Q0));
-    DPPLQueue_Delete(Q);
+    auto Q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0);
+    EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q1));
+    DPPLQueue_Delete(Q1);
     DPPLQueueMgr_PopQueue();
     EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q0));
     DPPLQueue_Delete(Q0);
 }
 
+TEST_F (TestDPPLSyclQueueManager, CheckIsCurrentQueue2)
+{
+    if(!DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_CPU) ||
+       !DPPLQueueMgr_GetNumQueues(DPPL_OPENCL, DPPL_GPU))
+        GTEST_SKIP_("Skipping: No OpenCL GPU and OpenCL CPU.\n");
+
+    auto Q1 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_GPU, 0);
+    EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q1));
+    auto Q2 = DPPLQueueMgr_PushQueue(DPPL_OPENCL, DPPL_CPU, 0);
+    EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q2));
+    EXPECT_FALSE(DPPLQueueMgr_IsCurrentQueue(Q1));
+    DPPLQueue_Delete(Q2);
+    DPPLQueueMgr_PopQueue();
+    EXPECT_TRUE(DPPLQueueMgr_IsCurrentQueue(Q1));
+    DPPLQueue_Delete(Q1);
+    DPPLQueueMgr_PopQueue();
+}
+
 int
 main (int argc, char** argv)
 {
diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index a951f8a1f6..1d811447a8 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -3,7 +3,7 @@ IF ERRORLEVEL 1 exit 1
 REM conda uses %ERRORLEVEL% but FPGA scripts can set it. So it should be reseted.
 set ERRORLEVEL=
 
-set "CC=dpcpp.exe"
+set "CC=clang-cl.exe"
 set "CXX=dpcpp.exe"
 
 rmdir /S /Q build_cmake
@@ -17,7 +17,7 @@ rmdir /S /Q "%INSTALL_PREFIX%"
 
 cmake -G Ninja ^
     -DCMAKE_BUILD_TYPE=Release ^
-    "-DCMAKE_INSTALL_PREFIX=%LIBRARY_PREFIX%" ^
+    "-DCMAKE_INSTALL_PREFIX=%INSTALL_PREFIX%" ^
     "-DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX%" ^
     "-DDPCPP_ROOT=%DPCPP_ROOT%" ^
     "%SRC_DIR%/backends"
@@ -29,17 +29,12 @@ IF %ERRORLEVEL% NEQ 0 exit 1
 
 cd ..
 xcopy install\lib\*.lib dpctl /E /Y
-xcopy install\lib\*.dll dpctl /E /Y
+xcopy install\bin\*.dll dpctl /E /Y
 
 mkdir dpctl\include
 xcopy backends\include dpctl\include /E /Y
 
 
-REM required by _opencl_core (dpctl.ocldrv)
-set "DPPL_OPENCL_INTERFACE_LIBDIR=dpctl"
-set "DPPL_OPENCL_INTERFACE_INCLDIR=dpctl\include"
-set "OpenCL_LIBDIR=%DPCPP_ROOT%\lib"
-
 REM required by _sycl_core(dpctl)
 set "DPPL_SYCL_INTERFACE_LIBDIR=dpctl"
 set "DPPL_SYCL_INTERFACE_INCLDIR=dpctl\include"
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index 72c28d711d..607da35268 100755
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -40,10 +40,6 @@ cp install/lib/*.so dpctl/
 mkdir -p dpctl/include
 cp -r backends/include/* dpctl/include
 
-# required by dpctl.opencl_core
-export DPPL_OPENCL_INTERFACE_LIBDIR=dpctl
-export DPPL_OPENCL_INTERFACE_INCLDIR=dpctl/include
-export OpenCL_LIBDIR=${DPCPP_ROOT}/lib
 
 # required by dpctl.sycl_core
 export DPPL_SYCL_INTERFACE_LIBDIR=dpctl
@@ -53,6 +49,5 @@ export DPPL_SYCL_INTERFACE_INCLDIR=dpctl/include
 # FIXME: How to pass this using setup.py? This flags is needed when
 # dpcpp compiles the generated cpp file.
 export CFLAGS="-fPIC -O3 ${CFLAGS}"
-export LDFLAGS="-L ${OpenCL_LIBDIR} ${LDFLAGS}"
 ${PYTHON} setup.py clean --all
 ${PYTHON} setup.py build install
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 48ff634421..acf485898a 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -17,7 +17,6 @@ requirements:
         - {{ compiler('cxx') }}
     host:
         - setuptools
-        - cffi >=1.0.0
         - cython
         - cmake
         - python
@@ -27,10 +26,9 @@ requirements:
     run:
         - python
         - numpy >=1.17
-        - cffi >=1.0.0
 
 about:
-    home: https://github.com/IntelPython/PyDPPL.git
+    home: https://github.com/IntelPython/dpCtl.git
     license: Apache-2.0
     license_file: LICENSE
     summary: 'A lightweight Python wrapper for a subset of OpenCL and SYCL API.'
diff --git a/conda-recipe/run_test.bat b/conda-recipe/run_test.bat
index 992277b497..ed3a395aec 100644
--- a/conda-recipe/run_test.bat
+++ b/conda-recipe/run_test.bat
@@ -8,8 +8,5 @@ set ERRORLEVEL=
 "%PYTHON%" -c "import dpctl"
 IF %ERRORLEVEL% NEQ 0 exit 1
 
-"%PYTHON%" -c "import dpctl.ocldrv"
-IF %ERRORLEVEL% NEQ 0 exit 1
-
 "%PYTHON%" -m unittest -v dpctl.tests
 IF %ERRORLEVEL% NEQ 0 exit 1
diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh
index 775783ce50..ff46be6632 100644
--- a/conda-recipe/run_test.sh
+++ b/conda-recipe/run_test.sh
@@ -6,5 +6,4 @@ set -e
 source ${ONEAPI_ROOT}/compiler/latest/env/vars.sh || true
 
 ${PYTHON} -c "import dpctl"
-${PYTHON} -c "import dpctl.ocldrv"
 ${PYTHON} -m unittest -v dpctl.tests
diff --git a/dpctl/__init__.py b/dpctl/__init__.py
index 333867f89c..af9aa93076 100644
--- a/dpctl/__init__.py
+++ b/dpctl/__init__.py
@@ -22,7 +22,7 @@
 ## This top-level dpctl module.
 ##
 ##===----------------------------------------------------------------------===##
-'''
+"""
     Data Parallel Control (dpCtl)
 
     dpCtl provides a lightweight Python abstraction over DPC++/SYCL and
@@ -33,7 +33,7 @@
 
     dpCtl's intended usage is as a common SYCL interoperability layer for
     different Python libraries and applications. The OpenCL support inside
-    PyDPPL is slated to be deprecated and then removed in future releases
+    dpCtl is slated to be deprecated and then removed in future releases
     of the library.
 
     Currently, only a small subset of DPC++ runtime objects are exposed
@@ -43,12 +43,13 @@
 
     Please use `pydoc dpctl.ocldrv` to look at the current API for dpctl.ocldrv.
 
-'''
+"""
 __author__ = "Intel Corp."
 
 from ._sycl_core import *
 from ._version import get_versions
 
+
 def get_include():
     """
     Return the directory that contains the dpCtl *.h header files.
@@ -57,7 +58,9 @@ def get_include():
     this function to locate the appropriate include directory.
     """
     import os.path
-    return os.path.join(os.path.dirname(__file__), 'include')
 
-__version__ = get_versions()['version']
+    return os.path.join(os.path.dirname(__file__), "include")
+
+
+__version__ = get_versions()["version"]
 del get_versions
diff --git a/dpctl/backend.pxd b/dpctl/_backend.pxd
similarity index 82%
rename from dpctl/backend.pxd
rename to dpctl/_backend.pxd
index c7cecae87a..584ae79fd8 100644
--- a/dpctl/backend.pxd
+++ b/dpctl/_backend.pxd
@@ -28,20 +28,22 @@
 # cython: language_level=3
 
 from libcpp cimport bool
+from libc.stdint cimport uint32_t
 
 
 cdef extern from "dppl_utils.h":
     cdef void DPPLCString_Delete (const char *str)
+    cdef void DPPLSize_t_Array_Delete (size_t *arr)
 
 cdef extern from "dppl_sycl_enum_types.h":
-    cdef enum _backend_type 'DPPLSyclBEType':
+    cdef enum _backend_type 'DPPLSyclBackendType':
         _OPENCL          'DPPL_OPENCL'
         _HOST            'DPPL_HOST'
         _LEVEL_ZERO      'DPPL_LEVEL_ZERO'
         _CUDA            'DPPL_CUDA'
         _UNKNOWN_BACKEND 'DPPL_UNKNOWN_BACKEND'
 
-    ctypedef _backend_type DPPLSyclBEType
+    ctypedef _backend_type DPPLSyclBackendType
 
     cdef enum _device_type 'DPPLSyclDeviceType':
         _GPU         'DPPL_GPU'
@@ -57,7 +59,8 @@ cdef extern from "dppl_sycl_enum_types.h":
         _UNSIGNED_CHAR      'DPPL_UNSIGNED_CHAR',
         _SHORT              'DPPL_SHORT',
         _INT                'DPPL_INT',
-        _UNSIGNED_INT       'DPPL_INT',
+        _UNSIGNED_INT       'DPPL_UNSIGNED_INT',
+        _UNSIGNED_INT8      'DPPL_UNSIGNED_INT8',
         _LONG               'DPPL_LONG',
         _UNSIGNED_LONG      'DPPL_UNSIGNED_LONG',
         _LONG_LONG          'DPPL_LONG_LONG',
@@ -96,10 +99,17 @@ cdef extern from "dppl_sycl_device_interface.h":
     cdef bool DPPLDevice_IsCPU (const DPPLSyclDeviceRef DRef)
     cdef bool DPPLDevice_IsGPU (const DPPLSyclDeviceRef DRef)
     cdef bool DPPLDevice_IsHost (const DPPLSyclDeviceRef DRef)
-    cdef const char* DPPLDevice_GetDriverInfo (const DPPLSyclDeviceRef DRef)
-    cdef const char* DPPLDevice_GetName (const DPPLSyclDeviceRef DRef)
-    cdef const char* DPPLDevice_GetVendorName (const DPPLSyclDeviceRef DRef)
+    cpdef const char *DPPLDevice_GetDriverInfo (const DPPLSyclDeviceRef DRef)
+    cpdef const char *DPPLDevice_GetName (const DPPLSyclDeviceRef DRef)
+    cpdef const char *DPPLDevice_GetVendorName (const DPPLSyclDeviceRef DRef)
     cdef bool DPPLDevice_IsHostUnifiedMemory (const DPPLSyclDeviceRef DRef)
+    cpdef uint32_t DPPLDevice_GetMaxComputeUnits (const DPPLSyclDeviceRef DRef)
+    cpdef uint32_t DPPLDevice_GetMaxWorkItemDims (const DPPLSyclDeviceRef DRef)
+    cpdef size_t *DPPLDevice_GetMaxWorkItemSizes (const DPPLSyclDeviceRef DRef)
+    cpdef size_t DPPLDevice_GetMaxWorkGroupSize (const DPPLSyclDeviceRef DRef)
+    cpdef uint32_t DPPLDevice_GetMaxNumSubGroups (const DPPLSyclDeviceRef DRef)
+    cpdef bool DPPLDevice_HasInt64BaseAtomics (const DPPLSyclDeviceRef DRef)
+    cpdef bool DPPLDevice_HasInt64ExtendedAtomics (const DPPLSyclDeviceRef DRef)
 
 
 cdef extern from "dppl_sycl_event_interface.h":
@@ -114,17 +124,18 @@ cdef extern from "dppl_sycl_kernel_interface.h":
 
 
 cdef extern from "dppl_sycl_platform_interface.h":
-    cdef size_t DPPLPlatform_GetNumPlatforms ()
+    cdef size_t DPPLPlatform_GetNumNonHostPlatforms ()
     cdef void DPPLPlatform_DumpInfo ()
-    cdef size_t DPPLPlatform_GetNumBackends ()
-    cdef DPPLSyclBEType *DPPLPlatform_GetListOfBackends ()
-    cdef void DPPLPlatform_DeleteListOfBackends (DPPLSyclBEType * BEs)
+    cdef size_t DPPLPlatform_GetNumNonHostBackends ()
+    cdef DPPLSyclBackendType *DPPLPlatform_GetListOfNonHostBackends ()
+    cdef void DPPLPlatform_DeleteListOfBackends (DPPLSyclBackendType * BEs)
 
 
 cdef extern from "dppl_sycl_context_interface.h":
     cdef bool DPPLContext_AreEq (const DPPLSyclContextRef CtxRef1,
                                  const DPPLSyclContextRef CtxRef2)
-    cdef DPPLSyclBEType DPPLContext_GetBackend (const DPPLSyclContextRef CtxRef)
+    cdef DPPLSyclBackendType DPPLContext_GetBackend (
+            const DPPLSyclContextRef CtxRef)
     cdef void DPPLContext_Delete (DPPLSyclContextRef CtxRef)
 
 
@@ -148,7 +159,7 @@ cdef extern from "dppl_sycl_queue_interface.h":
     cdef bool DPPLQueue_AreEq (const DPPLSyclQueueRef QRef1,
                                const DPPLSyclQueueRef QRef2)
     cdef void DPPLQueue_Delete (DPPLSyclQueueRef QRef)
-    cdef DPPLSyclBEType DPPLQueue_GetBackend (const DPPLSyclQueueRef Q)
+    cdef DPPLSyclBackendType DPPLQueue_GetBackend (const DPPLSyclQueueRef Q)
     cdef DPPLSyclContextRef DPPLQueue_GetContext (const DPPLSyclQueueRef Q)
     cdef DPPLSyclDeviceRef DPPLQueue_GetDevice (const DPPLSyclQueueRef Q)
     cdef DPPLSyclEventRef  DPPLQueue_SubmitRange (
@@ -179,19 +190,19 @@ cdef extern from "dppl_sycl_queue_interface.h":
 
 cdef extern from "dppl_sycl_queue_manager.h":
     cdef DPPLSyclQueueRef DPPLQueueMgr_GetCurrentQueue ()
-    cdef size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBEType BETy,
+    cdef size_t DPPLQueueMgr_GetNumQueues (DPPLSyclBackendType BETy,
                                            DPPLSyclDeviceType DeviceTy)
     cdef size_t DPPLQueueMgr_GetNumActivatedQueues ()
-    cdef DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclBEType BETy,
+    cdef DPPLSyclQueueRef DPPLQueueMgr_GetQueue (DPPLSyclBackendType BETy,
                                                  DPPLSyclDeviceType DeviceTy,
                                                  size_t DNum)
     cdef bool DPPLQueueMgr_IsCurrentQueue (const DPPLSyclQueueRef QRef)
     cdef void DPPLQueueMgr_PopQueue ()
-    cdef DPPLSyclQueueRef DPPLQueueMgr_PushQueue (DPPLSyclBEType BETy,
+    cdef DPPLSyclQueueRef DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy,
                                                   DPPLSyclDeviceType DeviceTy,
                                                   size_t DNum)
     cdef DPPLSyclQueueRef DPPLQueueMgr_SetAsDefaultQueue (
-                              DPPLSyclBEType BETy,
+                              DPPLSyclBackendType BETy,
                               DPPLSyclDeviceType DeviceTy,
                               size_t DNum
                           )
diff --git a/dpctl/_memory.pxd b/dpctl/_memory.pxd
index 3c868125ca..2ab5066c8d 100644
--- a/dpctl/_memory.pxd
+++ b/dpctl/_memory.pxd
@@ -21,7 +21,7 @@
 # distutils: language = c++
 # cython: language_level=3
 
-from .backend cimport DPPLSyclUSMRef
+from ._backend cimport DPPLSyclUSMRef
 from ._sycl_core cimport SyclQueue
 
 
diff --git a/dpctl/_memory.pyx b/dpctl/_memory.pyx
index 1671d3bb7c..96259b0451 100644
--- a/dpctl/_memory.pyx
+++ b/dpctl/_memory.pyx
@@ -29,7 +29,7 @@
 # cython: language_level=3
 
 import dpctl
-from dpctl.backend cimport *
+from dpctl._backend cimport *
 from ._sycl_core cimport SyclContext, SyclQueue
 
 from cpython cimport Py_buffer
diff --git a/dpctl/_sycl_core.pxd b/dpctl/_sycl_core.pxd
index a95e5f28c5..89a74dca57 100644
--- a/dpctl/_sycl_core.pxd
+++ b/dpctl/_sycl_core.pxd
@@ -27,7 +27,8 @@
 # distutils: language = c++
 # cython: language_level=3
 
-from .backend cimport *
+from ._backend cimport *
+from libc.stdint cimport uint32_t
 
 
 cdef class SyclContext:
@@ -48,10 +49,28 @@ cdef class SyclDevice:
     cdef const char *_vendor_name
     cdef const char *_device_name
     cdef const char *_driver_version
+    cdef uint32_t _max_compute_units
+    cdef uint32_t _max_work_item_dims
+    cdef size_t *_max_work_item_sizes
+    cdef size_t _max_work_group_size
+    cdef uint32_t _max_num_sub_groups
+    cdef bool _int64_base_atomics
+    cdef bool _int64_extended_atomics
 
     @staticmethod
     cdef SyclDevice _create (DPPLSyclDeviceRef dref)
     cdef DPPLSyclDeviceRef get_device_ref (self)
+    cpdef get_device_name (self)
+    cpdef get_device_type (self)
+    cpdef get_vendor_name (self)
+    cpdef get_driver_version (self)
+    cpdef get_max_compute_units (self)
+    cpdef get_max_work_item_dims (self)
+    cpdef get_max_work_item_sizes (self)
+    cpdef get_max_work_group_size (self)
+    cpdef get_max_num_sub_groups (self)
+    cpdef has_int64_base_atomics (self)
+    cpdef has_int64_extended_atomics (self)
 
 
 cdef class SyclEvent:
@@ -118,3 +137,6 @@ cdef class SyclQueue:
     cpdef void wait (self)
     cdef DPPLSyclQueueRef get_queue_ref (self)
     cpdef memcpy (self, dest, src, int count)
+
+
+cpdef SyclQueue get_current_queue()
diff --git a/dpctl/sycl_core.pyx b/dpctl/_sycl_core.pyx
similarity index 82%
rename from dpctl/sycl_core.pyx
rename to dpctl/_sycl_core.pyx
index ea1916fc46..129728894a 100644
--- a/dpctl/sycl_core.pyx
+++ b/dpctl/_sycl_core.pyx
@@ -29,7 +29,7 @@
 from __future__ import print_function
 from enum import Enum, auto
 import logging
-from .backend cimport *
+from ._backend cimport *
 from ._memory cimport Memory
 from libc.stdlib cimport malloc, free
 
@@ -105,6 +105,15 @@ cdef class SyclContext:
     cdef DPPLSyclContextRef get_context_ref (self):
         return self._ctxt_ref
 
+    def addressof_ref (self):
+        """Returns the address of the DPPLSyclContextRef pointer as a
+        long.
+
+        Returns:
+            The address of the DPPLSyclContextRef object used to create this
+            SyclContext cast to a long.
+        """
+        return int(<size_t>self._ctx_ref)
 
 cdef class SyclDevice:
     ''' Wrapper class for a Sycl Device
@@ -117,6 +126,13 @@ cdef class SyclDevice:
         ret._vendor_name = DPPLDevice_GetVendorName(dref)
         ret._device_name = DPPLDevice_GetName(dref)
         ret._driver_version = DPPLDevice_GetDriverInfo(dref)
+        ret._max_compute_units = DPPLDevice_GetMaxComputeUnits(dref)
+        ret._max_work_item_dims = DPPLDevice_GetMaxWorkItemDims(dref)
+        ret._max_work_item_sizes = DPPLDevice_GetMaxWorkItemSizes(dref)
+        ret._max_work_group_size = DPPLDevice_GetMaxWorkGroupSize(dref)
+        ret._max_num_sub_groups = DPPLDevice_GetMaxNumSubGroups(dref)
+        ret._int64_base_atomics = DPPLDevice_HasInt64BaseAtomics(dref)
+        ret._int64_extended_atomics = DPPLDevice_HasInt64ExtendedAtomics(dref)
         return ret
 
     def __dealloc__ (self):
@@ -124,18 +140,19 @@ cdef class SyclDevice:
         DPPLCString_Delete(self._device_name)
         DPPLCString_Delete(self._vendor_name)
         DPPLCString_Delete(self._driver_version)
+        DPPLSize_t_Array_Delete(self._max_work_item_sizes)
 
     def dump_device_info (self):
         ''' Print information about the SYCL device.
         '''
         DPPLDevice_DumpInfo(self._device_ref)
 
-    def get_device_name (self):
+    cpdef get_device_name (self):
         ''' Returns the name of the device as a string
         '''
         return self._device_name.decode()
 
-    def get_device_type (self):
+    cpdef get_device_type (self):
         ''' Returns the type of the device as a `device_type` enum
         '''
         if DPPLDevice_IsGPU(self._device_ref):
@@ -145,12 +162,12 @@ cdef class SyclDevice:
         else:
             raise ValueError("Unknown device type.")
 
-    def get_vendor_name (self):
+    cpdef get_vendor_name (self):
         ''' Returns the device vendor name as a string
         '''
         return self._vendor_name.decode()
 
-    def get_driver_version (self):
+    cpdef get_driver_version (self):
         ''' Returns the OpenCL software driver version as a string
             in the form: major number.minor number, if this SYCL
             device is an OpenCL device. Returns a string class
@@ -158,11 +175,72 @@ cdef class SyclDevice:
         '''
         return self._driver_version.decode()
 
+    cpdef has_int64_base_atomics (self):
+        ''' Returns true if device has int64_base_atomics else returns false.
+        '''
+        return self._int64_base_atomics
+
+    cpdef has_int64_extended_atomics (self):
+        ''' Returns true if device has int64_extended_atomics else returns false.
+        '''
+        return self._int64_extended_atomics
+
+    cpdef get_max_compute_units (self):
+        ''' Returns the number of parallel compute units
+            available to the device. The minimum value is 1.
+        '''
+        return self._max_compute_units
+
+    cpdef get_max_work_item_dims (self):
+        ''' Returns the maximum dimensions that specify
+            the global and local work-item IDs used by the
+            data parallel execution model. The minimum
+            value is 3 if this SYCL device is not of device
+            type info::device_type::custom.
+        '''
+        return self._max_work_item_dims
+
+    cpdef get_max_work_item_sizes (self):
+        ''' Returns the maximum number of work-items
+            that are permitted in each dimension of the
+            work-group of the nd_range. The minimum
+            value is (1; 1; 1) for devices that are not of
+            device type info::device_type::custom.
+        '''
+        max_work_item_sizes = []
+        for n in range(3):
+            max_work_item_sizes.append(self._max_work_item_sizes[n])
+        return tuple(max_work_item_sizes)
+
+    cpdef get_max_work_group_size (self):
+        ''' Returns the maximum number of work-items
+            that are permitted in a work-group executing a
+            kernel on a single compute unit. The minimum
+            value is 1.
+        '''
+        return self._max_work_group_size
+
+    cpdef get_max_num_sub_groups (self):
+        ''' Returns the maximum number of sub-groups
+            in a work-group for any kernel executed on the
+            device. The minimum value is 1.
+        '''
+        return self._max_num_sub_groups
+
     cdef DPPLSyclDeviceRef get_device_ref (self):
         ''' Returns the DPPLSyclDeviceRef pointer for this class.
         '''
         return self._device_ref
 
+    def addressof_ref (self):
+        """Returns the address of the DPPLSyclDeviceRef pointer as a
+        long.
+
+        Returns:
+            The address of the DPPLSyclDeviceRef object used to create this
+            SyclDevice cast to a long.
+        """
+        return int(<size_t>self._device_ref)
 
 cdef class SyclEvent:
     ''' Wrapper class for a Sycl Event
@@ -187,6 +265,16 @@ cdef class SyclEvent:
     cpdef void wait (self):
         DPPLEvent_Wait(self._event_ref)
 
+    def addressof_ref (self):
+        """Returns the address of the C API DPPLSyclEventRef pointer as
+        a long.
+
+        Returns:
+            The address of the DPPLSyclEventRef object used to create this
+            SyclEvent cast to a long.
+        """
+        return int(<size_t>self._event_ref)
+
 
 cdef class SyclKernel:
     ''' Wraps a sycl::kernel object created from an OpenCL interoperability
@@ -219,6 +307,15 @@ cdef class SyclKernel:
         '''
         return self._kernel_ref
 
+    def addressof_ref (self):
+        """Returns the address of the C API DPPLSyclKernelRef pointer
+        as a long.
+
+        Returns:
+            The address of the DPPLSyclKernelRef object used to create this
+            SyclKernel cast to a long.
+        """
+        return int(<size_t>self._kernel_ref)
 
 cdef class SyclProgram:
     ''' Wraps a sycl::program object created from an OpenCL interoperability
@@ -250,6 +347,16 @@ cdef class SyclProgram:
         name = kernel_name.encode('utf8')
         return DPPLProgram_HasKernel(self._program_ref, name)
 
+    def addressof_ref (self):
+        """Returns the address of the C API DPPLSyclProgramRef pointer
+        as a long.
+
+        Returns:
+            The address of the DPPLSyclProgramRef object used to create this
+            SyclProgram cast to a long.
+        """
+        return int(<size_t>self._program_ref)
+
 import ctypes
 
 cdef class SyclQueue:
@@ -258,6 +365,8 @@ cdef class SyclQueue:
 
     @staticmethod
     cdef SyclQueue _create (DPPLSyclQueueRef qref):
+        if qref is NULL:
+            raise SyclQueueCreationError("Queue creation failed.")
         cdef SyclQueue ret = SyclQueue.__new__(SyclQueue)
         ret._context = SyclContext._create(DPPLQueue_GetContext(qref))
         ret._device = SyclDevice._create(DPPLQueue_GetDevice(qref))
@@ -294,9 +403,15 @@ cdef class SyclQueue:
             elif isinstance(arg, ctypes.c_uint):
                 kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
                 kargty[idx] = _arg_data_type._UNSIGNED_INT
+            elif isinstance(arg, ctypes.c_uint8):
+                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
+                kargty[idx] = _arg_data_type._UNSIGNED_INT8
             elif isinstance(arg, ctypes.c_long):
                 kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
                 kargty[idx] = _arg_data_type._LONG
+            elif isinstance(arg, ctypes.c_ulong):
+                kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
+                kargty[idx] = _arg_data_type._UNSIGNED_LONG
             elif isinstance(arg, ctypes.c_longlong):
                 kargs[idx] = <void*><size_t>(ctypes.addressof(arg))
                 kargty[idx] = _arg_data_type._LONG_LONG
@@ -353,7 +468,7 @@ cdef class SyclQueue:
     def get_sycl_backend (self):
         """ Returns the Sycl bakend associated with the queue.
         """
-        cdef DPPLSyclBEType BE = DPPLQueue_GetBackend(self._queue_ref)
+        cdef DPPLSyclBackendType BE = DPPLQueue_GetBackend(self._queue_ref)
         if BE == _backend_type._OPENCL:
             return backend_type.opencl
         elif BE == _backend_type._LEVEL_ZERO:
@@ -374,6 +489,16 @@ cdef class SyclQueue:
     cdef DPPLSyclQueueRef get_queue_ref (self):
         return self._queue_ref
 
+    def addressof_ref (self):
+        """Returns the address of the C API DPPLSyclQueueRef pointer as
+        a long.
+
+        Returns:
+            The address of the DPPLSyclQueueRef object used to create this
+            SyclQueue cast to a long.
+        """
+        return int(<size_t>self._queue_ref)
+
     cpdef SyclEvent submit (self, SyclKernel kernel, list args, list gS,       \
                             list lS = None, list dEvents = None):
 
@@ -523,15 +648,6 @@ cdef class _SyclRTManager:
             device_type.gpu : _device_type._GPU,
         }
 
-    cdef _raise_queue_creation_error (self, str be, str dev, int devid, fname):
-        e = SyclQueueCreationError(
-                "Queue creation failed for :", be, dev, devid
-            )
-        e.fname = fname
-        e.code = -1
-        raise e
-
-
     def _set_as_current_queue (self, backend_ty, device_ty, device_id):
         cdef DPPLSyclQueueRef queue_ref
 
@@ -540,11 +656,6 @@ cdef class _SyclRTManager:
             try :
                 devTy = self._device_str_ty_dict[device_ty]
                 queue_ref = DPPLQueueMgr_PushQueue(beTy, devTy, device_id)
-                if queue_ref is NULL:
-                    self._raise_queue_creation_error(
-                        backend_ty, device_ty, device_id,
-                        "DPPLQueueMgr_PushQueue"
-                    )
                 return SyclQueue._create(queue_ref)
             except KeyError:
                 raise UnsupportedDeviceError("Device can only be gpu or cpu")
@@ -563,7 +674,7 @@ cdef class _SyclRTManager:
     def print_available_backends (self):
         """ Prints the available backends.
         """
-        print(self._backend_ty_dict.keys())
+        print(self._backend_str_ty_dict.keys())
 
     def get_current_backend (self):
         """ Returns the backend for the current queue as `backend_type` enum
@@ -575,7 +686,7 @@ cdef class _SyclRTManager:
         '''
         return self.get_current_queue().get_sycl_device().get_device_type()
 
-    def get_current_queue (self):
+    cpdef SyclQueue get_current_queue (self):
         ''' Returns the activated SYCL queue as a PyCapsule.
         '''
         return SyclQueue._create(DPPLQueueMgr_GetCurrentQueue())
@@ -586,9 +697,9 @@ cdef class _SyclRTManager:
         return DPPLQueueMgr_GetNumActivatedQueues()
 
     def get_num_platforms (self):
-        ''' Returns the number of available SYCL/OpenCL platforms.
+        ''' Returns the number of available non-host SYCL platforms.
         '''
-        return DPPLPlatform_GetNumPlatforms()
+        return DPPLPlatform_GetNumNonHostPlatforms()
 
     def get_num_queues (self, backend_ty, device_ty):
         cdef size_t num = 0
@@ -640,7 +751,7 @@ cdef class _SyclRTManager:
             return False
 
     def has_sycl_platforms (self):
-        cdef size_t num_platforms = DPPLPlatform_GetNumPlatforms()
+        cdef size_t num_platforms = DPPLPlatform_GetNumNonHostPlatforms()
         if num_platforms:
             return True
         else:
@@ -656,9 +767,15 @@ cdef class _SyclRTManager:
     def set_default_queue (self, backend_ty, device_ty, device_id):
         cdef DPPLSyclQueueRef ret
         try :
-            beTy = self._backend_ty_dict[backend_ty]
+            if isinstance(backend_ty, str):
+                beTy = self._backend_str_ty_dict[backend_ty]
+            else:
+                beTy = self._backend_enum_ty_dict[backend_ty]
             try :
-                devTy = self._device_ty_dict[device_ty]
+                if isinstance(device_ty, str):
+                    devTy = self._device_str_ty_dict[device_ty]
+                else:
+                    devTyp = self._device_enum_ty_dist[device_ty]
                 ret = DPPLQueueMgr_SetAsDefaultQueue(beTy, devTy, device_id)
                 if ret is NULL:
                     self._raise_queue_creation_error(
@@ -679,7 +796,6 @@ _mgr = _SyclRTManager()
 
 # Global bound functions
 dump                     = _mgr.dump
-get_current_queue        = _mgr.get_current_queue
 get_current_device_type  = _mgr.get_current_device_type
 get_num_platforms        = _mgr.get_num_platforms
 get_num_activated_queues = _mgr.get_num_activated_queues
@@ -690,6 +806,10 @@ has_sycl_platforms       = _mgr.has_sycl_platforms
 set_default_queue        = _mgr.set_default_queue
 is_in_device_context     = _mgr.is_in_device_context
 
+cpdef SyclQueue get_current_queue():
+    ''' Obtain current Sycl Queue from Data Parallel Control package '''
+    return _mgr.get_current_queue()
+
 
 def create_program_from_source (SyclQueue q, unicode source, unicode copts=""):
     ''' Creates a Sycl interoperability program from an OpenCL source string.
@@ -776,6 +896,7 @@ def device_context (str queue_str="opencl:gpu:0"):
     # calling get_current_context, or use the returned context object directly.
 
     # If set_context is unable to create a new context an exception is raised.
+    ctxt = None
     try:
         attrs = queue_str.split(':')
         nattrs = len(attrs)
@@ -786,7 +907,6 @@ def device_context (str queue_str="opencl:gpu:0"):
                              "device_number defaults to 0")
         if nattrs == 2:
             attrs.append("0")
-        ctxt = None
         ctxt = _mgr._set_as_current_queue(attrs[0], attrs[1], int(attrs[2]))
         yield ctxt
     finally:
diff --git a/dpctl/_version.py b/dpctl/_version.py
index 39363ca3aa..395c950102 100644
--- a/dpctl/_version.py
+++ b/dpctl/_version.py
@@ -1,4 +1,3 @@
-
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
 # feature). Distribution tarballs (built by setup.py sdist) and build
@@ -58,17 +57,18 @@ class NotThisMethod(Exception):
 
 def register_vcs_handler(vcs, method):  # decorator
     """Decorator to mark a method as the handler for a particular VCS."""
+
     def decorate(f):
         """Store f in HANDLERS[vcs][method]."""
         if vcs not in HANDLERS:
             HANDLERS[vcs] = {}
         HANDLERS[vcs][method] = f
         return f
+
     return decorate
 
 
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
-                env=None):
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
     """Call the given command(s)."""
     assert isinstance(commands, list)
     p = None
@@ -76,10 +76,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
         try:
             dispcmd = str([c] + args)
             # remember shell=False, so use git.cmd on windows, not just git
-            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
-                                 stdout=subprocess.PIPE,
-                                 stderr=(subprocess.PIPE if hide_stderr
-                                         else None))
+            p = subprocess.Popen(
+                [c] + args,
+                cwd=cwd,
+                env=env,
+                stdout=subprocess.PIPE,
+                stderr=(subprocess.PIPE if hide_stderr else None),
+            )
             break
         except EnvironmentError:
             e = sys.exc_info()[1]
@@ -116,16 +119,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
     for i in range(3):
         dirname = os.path.basename(root)
         if dirname.startswith(parentdir_prefix):
-            return {"version": dirname[len(parentdir_prefix):],
-                    "full-revisionid": None,
-                    "dirty": False, "error": None, "date": None}
+            return {
+                "version": dirname[len(parentdir_prefix) :],
+                "full-revisionid": None,
+                "dirty": False,
+                "error": None,
+                "date": None,
+            }
         else:
             rootdirs.append(root)
             root = os.path.dirname(root)  # up a level
 
     if verbose:
-        print("Tried directories %s but none started with prefix %s" %
-              (str(rootdirs), parentdir_prefix))
+        print(
+            "Tried directories %s but none started with prefix %s"
+            % (str(rootdirs), parentdir_prefix)
+        )
     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
 
 
@@ -181,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
     TAG = "tag: "
-    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)])
     if not tags:
         # Either we're using git < 1.8.3, or there really are no tags. We use
         # a heuristic: assume all version tags have a digit. The old git %d
@@ -190,7 +199,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
         # between branches and tags. By ignoring refnames without digits, we
         # filter out many common branch names like "release" and
         # "stabilization", as well as "HEAD" and "master".
-        tags = set([r for r in refs if re.search(r'\d', r)])
+        tags = set([r for r in refs if re.search(r"\d", r)])
         if verbose:
             print("discarding '%s', no digits" % ",".join(refs - tags))
     if verbose:
@@ -198,19 +207,26 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
     for ref in sorted(tags):
         # sorting will prefer e.g. "2.0" over "2.0rc1"
         if ref.startswith(tag_prefix):
-            r = ref[len(tag_prefix):]
+            r = ref[len(tag_prefix) :]
             if verbose:
                 print("picking %s" % r)
-            return {"version": r,
-                    "full-revisionid": keywords["full"].strip(),
-                    "dirty": False, "error": None,
-                    "date": date}
+            return {
+                "version": r,
+                "full-revisionid": keywords["full"].strip(),
+                "dirty": False,
+                "error": None,
+                "date": date,
+            }
     # no suitable tags, so version is "0+unknown", but full hex is still there
     if verbose:
         print("no suitable tags, using unknown + full revision id")
-    return {"version": "0+unknown",
-            "full-revisionid": keywords["full"].strip(),
-            "dirty": False, "error": "no suitable tags", "date": None}
+    return {
+        "version": "0+unknown",
+        "full-revisionid": keywords["full"].strip(),
+        "dirty": False,
+        "error": "no suitable tags",
+        "date": None,
+    }
 
 
 @register_vcs_handler("git", "pieces_from_vcs")
@@ -225,8 +241,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     if sys.platform == "win32":
         GITS = ["git.cmd", "git.exe"]
 
-    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
-                          hide_stderr=True)
+    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
     if rc != 0:
         if verbose:
             print("Directory %s not under git control" % root)
@@ -234,10 +249,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
 
     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
     # if there isn't one, this yields HEX[-dirty] (no NUM)
-    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
-                                          "--always", "--long",
-                                          "--match", "%s*" % tag_prefix],
-                                   cwd=root)
+    describe_out, rc = run_command(
+        GITS,
+        [
+            "describe",
+            "--tags",
+            "--dirty",
+            "--always",
+            "--long",
+            "--match",
+            "%s*" % tag_prefix,
+        ],
+        cwd=root,
+    )
     # --long was added in git-1.5.5
     if describe_out is None:
         raise NotThisMethod("'git describe' failed")
@@ -260,17 +284,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     dirty = git_describe.endswith("-dirty")
     pieces["dirty"] = dirty
     if dirty:
-        git_describe = git_describe[:git_describe.rindex("-dirty")]
+        git_describe = git_describe[: git_describe.rindex("-dirty")]
 
     # now we have TAG-NUM-gHEX or HEX
 
     if "-" in git_describe:
         # TAG-NUM-gHEX
-        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
         if not mo:
             # unparseable. Maybe git-describe is misbehaving?
-            pieces["error"] = ("unable to parse git-describe output: '%s'"
-                               % describe_out)
+            pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
             return pieces
 
         # tag
@@ -279,10 +302,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
             if verbose:
                 fmt = "tag '%s' doesn't start with prefix '%s'"
                 print(fmt % (full_tag, tag_prefix))
-            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
-                               % (full_tag, tag_prefix))
+            pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
+                full_tag,
+                tag_prefix,
+            )
             return pieces
-        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+        pieces["closest-tag"] = full_tag[len(tag_prefix) :]
 
         # distance: number of commits since tag
         pieces["distance"] = int(mo.group(2))
@@ -293,13 +318,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
     else:
         # HEX: no tags
         pieces["closest-tag"] = None
-        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
-                                    cwd=root)
+        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
         pieces["distance"] = int(count_out)  # total number of commits
 
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
-                       cwd=root)[0].strip()
+    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[
+        0
+    ].strip()
     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
 
     return pieces
@@ -330,8 +355,7 @@ def render_pep440(pieces):
                 rendered += ".dirty"
     else:
         # exception #1
-        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
-                                          pieces["short"])
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
         if pieces["dirty"]:
             rendered += ".dirty"
     return rendered
@@ -445,11 +469,13 @@ def render_git_describe_long(pieces):
 def render(pieces, style):
     """Render the given version pieces into the requested style."""
     if pieces["error"]:
-        return {"version": "unknown",
-                "full-revisionid": pieces.get("long"),
-                "dirty": None,
-                "error": pieces["error"],
-                "date": None}
+        return {
+            "version": "unknown",
+            "full-revisionid": pieces.get("long"),
+            "dirty": None,
+            "error": pieces["error"],
+            "date": None,
+        }
 
     if not style or style == "default":
         style = "pep440"  # the default
@@ -469,9 +495,13 @@ def render(pieces, style):
     else:
         raise ValueError("unknown style '%s'" % style)
 
-    return {"version": rendered, "full-revisionid": pieces["long"],
-            "dirty": pieces["dirty"], "error": None,
-            "date": pieces.get("date")}
+    return {
+        "version": rendered,
+        "full-revisionid": pieces["long"],
+        "dirty": pieces["dirty"],
+        "error": None,
+        "date": pieces.get("date"),
+    }
 
 
 def get_versions():
@@ -485,8 +515,7 @@ def get_versions():
     verbose = cfg.verbose
 
     try:
-        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
-                                          verbose)
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
     except NotThisMethod:
         pass
 
@@ -495,13 +524,16 @@ def get_versions():
         # versionfile_source is the relative path from the top of the source
         # tree (where the .git directory might live) to this file. Invert
         # this to find the root from __file__.
-        for i in cfg.versionfile_source.split('/'):
+        for i in cfg.versionfile_source.split("/"):
             root = os.path.dirname(root)
     except NameError:
-        return {"version": "0+unknown", "full-revisionid": None,
-                "dirty": None,
-                "error": "unable to find root of source tree",
-                "date": None}
+        return {
+            "version": "0+unknown",
+            "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to find root of source tree",
+            "date": None,
+        }
 
     try:
         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
@@ -515,6 +547,10 @@ def get_versions():
     except NotThisMethod:
         pass
 
-    return {"version": "0+unknown", "full-revisionid": None,
-            "dirty": None,
-            "error": "unable to compute version", "date": None}
+    return {
+        "version": "0+unknown",
+        "full-revisionid": None,
+        "dirty": None,
+        "error": "unable to compute version",
+        "date": None,
+    }
diff --git a/dpctl/ocldrv.py b/dpctl/ocldrv.py
deleted file mode 100644
index 87af22cbd7..0000000000
--- a/dpctl/ocldrv.py
+++ /dev/null
@@ -1,709 +0,0 @@
-##===------------- ocldrv.py - dpctl.ocldrv module ------*- Python -*------===##
-##
-##                      Data Parallel Control (dpCtl)
-##
-## Copyright 2020 Intel Corporation
-##
-## Licensed under the Apache License, Version 2.0 (the "License");
-## you may not use this file except in compliance with the License.
-## You may obtain a copy of the License at
-##
-##    http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-##
-##===----------------------------------------------------------------------===##
-###
-### \file
-### This file exposes Python classes for different OpenCL classes that are
-### exposed by the _opencl_core CFFI extension module.
-##===----------------------------------------------------------------------===##
-''' The dpctl.ocldrv module contains a set of Python wrapper classes for
-    OpenCL objects. The module has wrappers for cl_context, cl_device,
-    cl_mem, cl_program, and cl_kernel objects.
-
-    The two main user-visible API classes are Runtime, DeviceArray, and
-    DeviceEnv and Runtime. The other classes are only used by the Numba
-    JIT compiler.
-
-    Global data members:
-        runtime        - An instance of the Runtime class.
-        has_cpu_device - A flag set to True when an OpenCL CPU device is found
-                         on the system.
-        has_cpu_device - A flag set to True when an OpenCL GPU device is found
-                         on the system.
-
-'''
-
-from __future__ import absolute_import, division, print_function
-
-from contextlib import contextmanager
-import ctypes
-import logging
-
-from numpy import ndarray
-
-from ._opencl_core import ffi, lib
-
-
-__author__ = "Intel Corp."
-
-_logger = logging.getLogger(__name__)
-
-# create console handler and set level to debug
-_ch = logging.StreamHandler()
-_ch.setLevel(logging.WARNING)
-# create formatter
-_formatter = logging.Formatter('DPPL-%(levelname)s - %(message)s')
-# add formatter to ch
-_ch.setFormatter(_formatter)
-# add ch to logger
-_logger.addHandler(_ch)
-
-
-##########################################################################
-# Exception classes
-##########################################################################
-
-
-class DpplDriverError(Exception):
-    """ The exception is raised when dpctl.ocldrv cannot find an OpenCL Driver.
-    """
-    pass
-
-
-class DeviceNotFoundError(Exception):
-    """ The exception is raised when the requested type of OpenCL device is
-        not available or not supported by dpctl.ocldrv.
-    """
-    pass
-
-
-class UnsupportedTypeError(Exception):
-    """ The exception is raised when an unsupported type is encountered when
-        creating an OpenCL KernelArg. Only DeviceArray or numpy.ndarray types
-        are supported.
-    """
-    pass
-
-
-##########################################################################
-# Helper functions
-##########################################################################
-
-
-def _raise_driver_error(fname, errcode):
-    e = DpplDriverError("Could not find an OpenCL Driver. Ensure OpenCL \
-                         driver is installed.")
-    e.fname = fname
-    e.code = errcode
-    raise e
-
-
-def _raise_device_not_found_error(fname):
-    e = DeviceNotFoundError("OpenCL device not available on the system.")
-    e.fname = fname
-    raise e
-
-
-def _raise_unsupported_type_error(fname):
-    e = UnsupportedTypeError("Type needs to be DeviceArray or numpy.ndarray.")
-    e.fname = fname
-    raise e
-
-
-def _raise_unsupported_kernel_arg_error(fname):
-    e = (UnsupportedTypeError("Type needs to be DeviceArray or a supported "
-                              "ctypes type."))
-    e.fname = fname
-    raise e
-
-
-def _is_supported_ctypes_raw_obj(obj):
-    return isinstance(obj, (ctypes.c_ssize_t,
-                            ctypes.c_double,
-                            ctypes.c_float,
-                            ctypes.c_uint8,
-                            ctypes.c_size_t))
-
-##########################################################################
-# DeviceArray class
-##########################################################################
-
-
-class DeviceArray:
-    ''' A Python wrapper for an OpenCL cl_men buffer with read-write access. A
-        DeviceArray can only be created from a NumPy ndarray.
-    '''
-    _buffObj  = None
-    _ndarray  = None
-    _buffSize = None
-    _dataPtr  = None
-
-    def __init__(self, env_ptr, arr):
-        ''' Creates a new DeviceArray from an ndarray.
-
-            Note that DeviceArray creation only allocates the cl_mem buffer
-            and does not actually move the data to the device. Data copy from
-            host to device is done when the DeviceArray instance is passed as
-            an argument to DeviceEnv.copy_array_to_device().
-        '''
-
-        # We only support device buffers for ndarray and ctypes (for basic
-        # types like int, etc)
-        if not isinstance(arr, ndarray):
-            _raise_unsupported_type_error("DeviceArray constructor")
-
-        # create a dp_buffer_t object
-        self._buffObj  = ffi.new("buffer_t *")
-        self._ndarray  = arr
-        self._buffSize = arr.itemsize * arr.size
-        self._dataPtr  = ffi.cast("void *", arr.ctypes.data)
-        retval = (lib.create_dp_rw_mem_buffer(env_ptr,
-                                              self._buffSize,
-                                              self._buffObj))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error("create_dp_rw_mem_buffer", -1)
-
-    def __del__(self):
-        ''' Destroy the DeviceArray and release the OpenCL buffer.'''
-
-        retval = (lib.destroy_dp_rw_mem_buffer(self._buffObj))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error("destroy_dp_rw_mem_buffer", -1)
-
-    def get_buffer_obj(self):
-        ''' Returns a cdata wrapper object encapsulating an OpenCL buffer.
-        '''
-
-        return self._buffObj
-
-    def get_buffer_size(self):
-        ''' Returns the size of the OpenCL buffer in bytes.
-        '''
-
-        return self._buffSize
-
-    def get_buffer_ptr(self):
-        ''' Returns a cdata wrapper over the actual OpenCL cl_mem pointer.
-        '''
-
-        return self.get_buffer_obj()[0].buffer_ptr
-
-    def get_data_ptr(self):
-        ''' Returns the data pointer for the NumPy ndarray used to create
-            the DeviceArray object.
-        '''
-
-        return self._dataPtr
-
-    def get_ndarray(self):
-        ''' Returns the NumPy ndarray used to create the DeviceArray object.
-        '''
-
-        return self._ndarray
-
-##########################################################################
-# Program class
-##########################################################################
-
-
-class Program():
-
-    def __init__(self, device_env, spirv_module):
-        self._prog_t_obj = ffi.new("program_t *")
-        retval = (lib.create_dp_program_from_spirv(device_env.get_env_ptr(),
-                                                   spirv_module,
-                                                   len(spirv_module),
-                                                   self._prog_t_obj))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error(
-                "create_dp_program_from_spirv", -1)
-
-        retval = (lib.build_dp_program(device_env.get_env_ptr(),
-                                       self._prog_t_obj[0]))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error("build_dp_program", -1)
-
-    def __del__(self):
-        retval = (lib.destroy_dp_program(self._prog_t_obj))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error("destroy_dp_program", -1)
-
-    def get_prog_t_obj(self):
-        return self._prog_t_obj[0]
-
-
-##########################################################################
-# Kernel class
-##########################################################################
-
-
-class Kernel():
-
-    def __init__(self, device_env, prog_t_obj, kernel_name):
-        self._kernel_t_obj = ffi.new("kernel_t *")
-        retval = (lib.create_dp_kernel(device_env.get_env_ptr(),
-                                       prog_t_obj.get_prog_t_obj(),
-                                       kernel_name.encode(),
-                                       self._kernel_t_obj))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error("create_dp_kernel", -1)
-
-    def __del__(self):
-        retval = (lib.destroy_dp_kernel(self._kernel_t_obj))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error("destroy_dp_kernel", -1)
-
-    def get_kernel_t_obj(self):
-        return self._kernel_t_obj[0]
-
-    def dump(self):
-        retval = self._kernel_t_obj.dump_fn(self._kernel_t_obj)
-        if retval == -1:
-            _raise_driver_error("kernel dump_fn", -1)
-
-##########################################################################
-# KernelArg class
-##########################################################################
-
-
-class KernelArg():
-
-    def __init__(self, arg, void_p_arg=False):
-        self.arg = arg
-        self.kernel_arg_t = ffi.new("kernel_arg_t *")
-        if void_p_arg is True:
-            self.ptr_to_arg_p = ffi.new("void **")
-            self.ptr_to_arg_p[0] = ffi.cast("void *", 0)
-            retval = (lib.create_dp_kernel_arg(self.ptr_to_arg_p,
-                                               ffi.sizeof(self.ptr_to_arg_p),
-                                               self.kernel_arg_t))
-            if(retval):
-                _raise_driver_error("create_dp_kernel_arg", -1)
-        else:
-            if isinstance(arg, DeviceArray):
-                self.ptr_to_arg_p = ffi.new("void **")
-                self.ptr_to_arg_p[0] = arg.get_buffer_obj()[0].buffer_ptr
-                retval = (lib.create_dp_kernel_arg(
-                                self.ptr_to_arg_p,
-                                arg.get_buffer_obj()[0].sizeof_buffer_ptr,
-                                self.kernel_arg_t))
-                if(retval):
-                    _raise_driver_error("create_dp_kernel_arg", -1)
-            else:
-                # it has to be of type ctypes
-                if getattr(arg, '__module__', None) == "ctypes":
-                    self.ptr_to_arg_p = ffi.cast("void *",
-                                                 ctypes.addressof(arg))
-                    retval = (lib.create_dp_kernel_arg(self.ptr_to_arg_p,
-                                                       ctypes.sizeof(arg),
-                                                       self.kernel_arg_t))
-                    if(retval):
-                        _raise_driver_error("create_dp_kernel_arg", -1)
-                else:
-                    _logger.warning("Unsupported Type %s", type(arg))
-                    _raise_unsupported_kernel_arg_error("KernelArg init")
-
-    def __del__(self):
-        retval = (lib.destroy_dp_kernel_arg(self.kernel_arg_t))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error("destroy_dp_kernel_arg", -1)
-
-    def get_kernel_arg_obj(self):
-        return self.kernel_arg_t[0]
-
-
-##########################################################################
-# DeviceEnv class
-##########################################################################
-
-
-class DeviceEnv():
-    ''' A Python wrapper over an OpenCL cl_context object.
-    '''
-
-    def __init__(self, env_t_obj):
-        self._env_ptr = env_t_obj
-
-    def __del__(self):
-        pass
-
-    def retain_context(self):
-        ''' Increment the reference count of the OpenCL context object.
-        '''
-
-        retval = (lib.retain_dp_context(self._env_ptr.context))
-        if(retval == -1):
-            _raise_driver_error("retain_dp_context", -1)
-
-        return (self._env_ptr.context)
-
-    def release_context(self):
-        ''' Increment the reference count of the OpenCL context object.
-        '''
-
-        retval = (lib.release_dp_context(self._env_ptr.context))
-        if retval == -1:
-            _raise_driver_error("release_dp_context", -1)
-
-    def copy_array_to_device(self, array):
-        ''' Accepts either a DeviceArray or a NumPy ndarray and copies the
-            data from host to an OpenCL device buffer. Returns either the
-            DeviceArray that was passed in as an argument, or for the case of
-            ndarrays returns a new DeviceArray.
-
-            If the function is called with a DeviceArray argument, the
-            function performs a blocking write of the data from the
-            DeviceArray's ndarray member into its OpenCL device buffer member.
-            When the function is called with an ndarray argument is, a new
-            DeviceArray is first created. The data copy operation is then
-            performed on the new DeviceArray.
-        '''
-
-        if isinstance(array, DeviceArray):
-            retval = (lib.write_dp_mem_buffer_to_device(
-                              self._env_ptr,
-                              array.get_buffer_obj()[0],
-                              True,
-                              0,
-                              array.get_buffer_size(),
-                              array.get_data_ptr()))
-            if retval == -1:
-                _logger.warning("OpenCL Error Code  : %s", retval)
-                _raise_driver_error("write_dp_mem_buffer_to_device", -1)
-            return array
-        elif (isinstance(array, ndarray) or getattr(array, '__module__', None)
-              == "ctypes"):
-            dArr = DeviceArray(self._env_ptr, array)
-            retval = (lib.write_dp_mem_buffer_to_device(
-                              self._env_ptr,
-                              dArr.get_buffer_obj()[0],
-                              True,
-                              0,
-                              dArr.get_buffer_size(),
-                              dArr.get_data_ptr()))
-            if retval == -1:
-                _logger.warning("OpenCL Error Code  : %s", retval)
-                _raise_driver_error("write_dp_mem_buffer_to_device", -1)
-            return dArr
-        else:
-            _raise_unsupported_type_error("copy_array_to_device")
-
-    def copy_array_from_device(self, array):
-        ''' Copies data from a cl_mem buffer into a DeviceArray's host memory
-            pointer. The function argument should be a DeviceArray object.
-        '''
-
-        if not isinstance(array, DeviceArray):
-            _raise_unsupported_type_error("copy_array_to_device")
-        retval = (lib.read_dp_mem_buffer_from_device(
-                          self._env_ptr,
-                          array.get_buffer_obj()[0],
-                          True,
-                          0,
-                          array.get_buffer_size(),
-                          array.get_data_ptr()))
-        if retval == -1:
-            _logger.warning("OpenCL Error Code  : %s", retval)
-            _raise_driver_error("read_dp_mem_buffer_from_device", -1)
-
-    def create_device_array(self, array):
-        ''' Returns an new DeviceArray instance.
-        '''
-
-        if not ((isinstance(array, ndarray) or
-                 getattr(array, '__module__', None)
-              == "ctypes")):
-            _raise_unsupported_type_error("alloc_array_in_device")
-
-        return DeviceArray(self._env_ptr, array)
-
-    def device_support_int64_atomics(self):
-        ''' Returns True current device supports 64-bit int atomic operations
-        '''
-
-        return self._env_ptr.support_int64_atomics
-
-    def device_support_float64_atomics(self):
-        ''' Returns True if current device supports 64-bit float atomic operations
-        '''
-
-        return self._env_ptr.support_float64_atomics
-
-    def get_context_ptr(self):
-        ''' Returns a cdata wrapper for the OpenCL cl_context object.
-        '''
-
-        return self._env_ptr.context
-
-    def get_device_ptr(self):
-        ''' Returns a cdata wrapper for the OpenCL cl_device object.
-        '''
-
-        return self._env_ptr.device
-
-    def get_queue_ptr(self):
-        ''' Returns a cdata wrapper for the OpenCL cl_command_queue object.
-        '''
-
-        return self._env_ptr.queue
-
-    def get_env_ptr(self):
-        ''' Returns a cdata wrapper for a C object encapsulating an OpenCL
-            cl_device object, a cl_command_queue object,
-            and a cl_context object.
-        '''
-
-        return self._env_ptr
-
-    def get_max_work_item_dims(self):
-        ''' Returns the maximum number of work items per work group for
-            the OpenCL device.
-        '''
-
-        return self._env_ptr.max_work_item_dims
-
-    def get_max_work_group_size(self):
-        ''' Returns the max work group size for the OpenCL device.
-        '''
-
-        return self._env_ptr.max_work_group_size
-
-    def dump(self):
-        ''' Prints metadata for the underlying OpenCL device.
-        '''
-
-        retval = self._env_ptr[0].dump_fn(self._env_ptr)
-        if retval == -1:
-            _raise_driver_error("env dump_fn", -1)
-        return retval
-
-##########################################################################
-# Runtime class
-##########################################################################
-
-
-class Runtime():
-    '''Runtime is a singleton class that creates a C wrapper object storing
-    available OpenCL contexts and corresponding OpenCL command queues. The
-    context and the queue are stored only for the first available GPU and CPU
-    OpenCL devices found on the system.
-    '''
-
-    _singleton    = None
-
-    def __new__(cls):
-        obj = cls._singleton
-        if obj is not None:
-            return obj
-        else:
-            obj = object.__new__(cls)
-
-            cls._lib = lib
-            cls._ffi = ffi
-
-            ffiobj = ffi.new("runtime_t *")
-            retval = (lib.create_dp_runtime(ffiobj))
-            if(retval):
-                _logger.warning("OpenCL Error Code  : %s", retval)
-                _raise_driver_error("create_dp_runtime", -1)
-
-            cls._runtime = ffiobj
-
-            if cls._runtime[0][0].has_cpu:
-                cls._cpu_device = DeviceEnv(cls._runtime[0][0].first_cpu_env)
-            else:
-                cls._cpu_device = None
-                _logger.warning("No CPU device")
-
-            if cls._runtime[0][0].has_gpu:
-                cls._gpu_device = DeviceEnv(cls._runtime[0][0].first_gpu_env)
-            else:
-                cls._gpu_device = None
-                _logger.warning("No GPU device")
-
-            cls._curr_device = DeviceEnv(cls._runtime[0][0].curr_env)
-            cls._singleton = obj
-
-        return obj
-
-    def __init__(self):
-        pass
-
-    def __del__(self):
-        if self._runtime:
-            retval = (self._lib.destroy_dp_runtime(self._runtime))
-            if(retval):
-                _raise_driver_error("destroy_dp_runtime", -1)
-
-    def has_cpu_device(self):
-        ''' Returns True is the system has an OpenCL driver for the CPU.'''
-
-        return self._cpu_device is not None
-
-    def has_gpu_device(self):
-        ''' Returns True is the system has an OpenCL driver for the GPU.'''
-
-        return self._gpu_device is not None
-
-
-    def get_cpu_device(self):
-        ''' Returns a cdata wrapper for the first available OpenCL
-        CPU context.
-        '''
-
-        if(self._cpu_device is None):
-            _raise_device_not_found_error("get_cpu_device")
-
-        return self._cpu_device
-
-    def get_gpu_device(self):
-        ''' Returns a cdata wrapper for the first available OpenCL
-        GPU context.
-        '''
-
-        if(self._gpu_device is None):
-            _raise_device_not_found_error("get_gpu_device")
-
-        return self._gpu_device
-
-    def get_current_device(self):
-        ''' Returns a cdata wrapper for the first available OpenCL
-        CPU context.
-        '''
-
-        return self._curr_device
-
-    def get_runtime_ptr(self):
-        ''' Returns a reference to the runtime object.
-        '''
-
-        return self._runtime[0]
-
-    def dump(self):
-        ''' Prints OpenCL metadata about the available devices and contexts.
-        '''
-
-        retval = self._runtime[0].dump_fn(Runtime._runtime[0])
-        if retval == -1:
-            _raise_driver_error("runtime dump_fn", -1)
-        return retval
-
-##########################################################################
-# Public API
-##########################################################################
-
-#------- Global Data
-
-runtime = Runtime()
-has_cpu_device = runtime.has_cpu_device()
-has_gpu_device = runtime.has_gpu_device()
-
-#------- Global Functions
-
-def enqueue_kernel (device_env, kernel, kernelargs, global_work_size,
-                    local_work_size):
-    ''' A single wrapper function over OpenCL clCreateKernelArgs and
-        clEnqueueNDRangeKernel. The function blocks till the enqueued kernel
-        finishes execution.
-    '''
-
-    l_work_size_array = None
-    kernel_arg_array = ffi.new("kernel_arg_t [" + str(len(kernelargs)) + "]")
-    g_work_size_array = ffi.new("size_t [" + str(len(global_work_size)) + "]")
-    if local_work_size:
-        l_work_size_array = ffi.new(
-                              "size_t [" + str(len(local_work_size)) + "]")
-    else:
-        l_work_size_array = ffi.NULL
-    for i in range(len(kernelargs)):
-        kernel_arg_array[i] = kernelargs[i].get_kernel_arg_obj()
-    for i in range(len(global_work_size)):
-        g_work_size_array[i] = global_work_size[i]
-    for i in range(len(local_work_size)):
-        l_work_size_array[i] = local_work_size[i]
-    retval = (lib.set_args_and_enqueue_dp_kernel(device_env.get_env_ptr(),
-                                                 kernel.get_kernel_t_obj(),
-                                                 len(kernelargs),
-                                                 kernel_arg_array,
-                                                 len(global_work_size),
-                                                 ffi.NULL,
-                                                 g_work_size_array,
-                                                 l_work_size_array))
-    if(retval):
-        _raise_driver_error("set_args_and_enqueue_dp_kernel", -1)
-
-
-def is_available():
-    ''' Return a Boolean to indicate the availability of a DPPL device.
-    '''
-
-    return runtime.has_cpu_device() or runtime.has_gpu_device()
-
-
-def dppl_error():
-    ''' Raised a DpplDriverError exception.
-    '''
-
-    _raise_driver_error()
-
-
-##########################################################################
-# Context Managers
-##########################################################################
-
-
-@contextmanager
-def igpu_context(*args, **kwds):
-    ''' A context manager sets the current DeviceEnv inside the global
-        runtime object to the default GPU DeviceEnv. The GPU DeviceEnv is
-        yielded by the context manager.
-    '''
-
-    device_id = 0
-    # some validation code
-    if(args):
-        assert(len(args) == 1 and args[0] == 0)
-    _logger.debug("Set the current env to igpu device queue %s", device_id)
-    lib.set_curr_env(runtime.get_runtime_ptr(),
-                     runtime.get_gpu_device().get_env_ptr())
-    device_env = runtime.get_current_device()
-    yield device_env
-
-    # After yield as the exit method
-    #TODO : one exit reset the current env to previous value
-    _logger.debug("Exit method called")
-
-
-@contextmanager
-def cpu_context(*args, **kwds):
-    ''' A context manager sets the current DeviceEnv inside the global
-        runtime object to the default CPU DeviceEnv. The CPU DeviceEnv is
-        yielded by the context manager.
-    '''
-
-    device_id = 0
-    # some validation code
-    if(args):
-        assert(len(args) == 1 and args[0] == 0)
-    _logger.debug("Set the current env to cpu device queue %s", device_id)
-    lib.set_curr_env(runtime.get_runtime_ptr(),
-                     runtime.get_cpu_device().get_env_ptr())
-    device_env = runtime.get_current_device()
-    yield device_env
-
-    # After yield as the exit method
-    _logger.debug("Exit method called")
diff --git a/dpctl/opencl_core.py b/dpctl/opencl_core.py
deleted file mode 100644
index 8b0a14e0c6..0000000000
--- a/dpctl/opencl_core.py
+++ /dev/null
@@ -1,79 +0,0 @@
-##===--------- opencl_core.py - dpctl.ocldrv interface -----*- Python -*---===##
-##
-##                      Data paraller Control (dpctl)
-##
-## Copyright 2020 Intel Corporation
-##
-## Licensed under the Apache License, Version 2.0 (the "License");
-## you may not use this file except in compliance with the License.
-## You may obtain a copy of the License at
-##
-##    http://www.apache.org/licenses/LICENSE-2.0
-##
-## Unless required by applicable law or agreed to in writing, software
-## distributed under the License is distributed on an "AS IS" BASIS,
-## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-## See the License for the specific language governing permissions and
-## limitations under the License.
-##
-##===----------------------------------------------------------------------===##
-###
-### \file
-### This file implements a CFFI interface for dppl_opencl_interface.h
-### functions.
-##===----------------------------------------------------------------------===##
-
-import os
-
-from cffi import FFI
-
-
-ffi = FFI()
-
-dppl_opencl_interface_incldir = os.environ.get(
-                                    'DPPL_OPENCL_INTERFACE_INCLDIR', None)
-dppl_opencl_interface_libdir  = os.environ.get(
-                                    'DPPL_OPENCL_INTERFACE_LIBDIR', None)
-opencl_libdir  = os.environ.get('OpenCL_LIBDIR', None)
-
-if opencl_libdir is None:
-    raise ValueError("Abort! Set the OpenCL_LIBDIR envar to point to "
-                     "an OpenCL ICD")
-
-if dppl_opencl_interface_libdir is None:
-    raise ValueError("Abort! Set the DPPL_OPENCL_INTERFACE_LIBDIR envar to "
-                     "point to ibdplibdpglueglue.so")
-
-if dppl_opencl_interface_incldir is None:
-    raise ValueError("Abort! Set the DP_GLUE_INCLDIR envar to point to "
-                     "dppl_opencl_interface.h")
-
-glue_h = ''.join(list(filter(lambda x: len(x) > 0 and x[0] != "#",
-                             open(dppl_opencl_interface_incldir +
-                             '/dppl_opencl_interface.h', 'r')
-                             .readlines()))).replace('DPPL_API', '')
-
-# cdef() expects a single string declaring the C types, functions and
-# globals needed to use the shared object. It must be in valid C syntax.
-ffi.cdef(glue_h)
-
-ffi_lib_name = "dpctl._opencl_core"
-
-import sys
-IS_WIN = sys.platform in ['win32', 'cygwin']
-del sys
-
-ffi.set_source(
-    ffi_lib_name,
-    """
-         #include "dppl_opencl_interface.h"   // the C header of the library
-    """,
-    include_dirs=[dppl_opencl_interface_incldir],
-    library_dirs=[dppl_opencl_interface_libdir, opencl_libdir],
-    extra_link_args=[] if IS_WIN else ['-Wl,-rpath=$ORIGIN'],
-    libraries=["DPPLOpenCLInterface", "OpenCL"],
-)   # library name, for the linker
-del IS_WIN
-
-if __name__ == "__main__":
-    ffi.compile(verbose=True)
diff --git a/dpctl/tests/__init__.py b/dpctl/tests/__init__.py
index f04131d53a..a53980d17a 100644
--- a/dpctl/tests/__init__.py
+++ b/dpctl/tests/__init__.py
@@ -23,6 +23,7 @@
 ##===----------------------------------------------------------------------===##
 
 from .test_dump_functions import *
+from .test_sycl_device import *
 from .test_sycl_kernel_submit import *
 from .test_sycl_program import *
 from .test_sycl_queue import *
diff --git a/dpctl/tests/test_dump_functions.py b/dpctl/tests/test_dump_functions.py
index 52b41edace..07c4b2d09b 100644
--- a/dpctl/tests/test_dump_functions.py
+++ b/dpctl/tests/test_dump_functions.py
@@ -25,29 +25,31 @@
 import unittest
 
 import dpctl
-import dpctl.ocldrv as drv
 
 
 class TestDumpMethods(unittest.TestCase):
-
-    def test_dpctl_dump (self):
+    def test_dpctl_dump(self):
         try:
             dpctl.dump()
         except Exception:
             self.fail("Encountered an exception inside dump().")
 
-    def test_dpctl_dump_device_info (self):
+    @unittest.skipUnless(
+        dpctl.has_sycl_platforms(), "No SYCL devices except the default host device."
+    )
+    def test_dpctl_dump_device_info(self):
         q = dpctl.get_current_queue()
         try:
             q.get_sycl_device().dump_device_info()
         except Exception:
             self.fail("Encountered an exception inside dump_device_info().")
 
-    def test_dpctl_ocldrv_dump (self):
-        try:
-            dpctl.ocldrv.runtime.dump()
-        except Exception:
-            self.fail("Encountered an exception inside dump_device_info().")
+    # def test_dpctl_ocldrv_dump (self):
+    #     try:
+    #         dpctl.ocldrv.runtime.dump()
+    #     except Exception:
+    #         self.fail("Encountered an exception inside dump_device_info().")
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py
new file mode 100644
index 0000000000..e222a55542
--- /dev/null
+++ b/dpctl/tests/test_sycl_device.py
@@ -0,0 +1,110 @@
+##===------------- test_sycl_device.py - dpctl  -------*- Python -*---------===##
+##
+##                      Data Parallel Control (dpctl)
+##
+## Copyright 2020 Intel Corporation
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##    http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+##===----------------------------------------------------------------------===##
+##
+## \file
+## Defines unit test cases for the SyclDevice classes defined in sycl_core.pyx.
+##===----------------------------------------------------------------------===##
+
+import dpctl
+import unittest
+
+
+@unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available")
+class TestSyclDevice(unittest.TestCase):
+    def test_get_max_compute_units(self):
+        try:
+            q = dpctl.get_current_queue()
+        except Exception:
+            self.fail("Encountered an exception inside get_current_queue().")
+        try:
+            max_compute_units = q.get_sycl_device().get_max_compute_units()
+        except Exception:
+            self.fail("Encountered an exception inside get_max_compute_units().")
+        self.assertTrue(max_compute_units > 0)
+
+    def test_get_max_work_item_dims(self):
+        try:
+            q = dpctl.get_current_queue()
+        except Exception:
+            self.fail("Encountered an exception inside get_current_queue().")
+        try:
+            max_work_item_dims = q.get_sycl_device().get_max_work_item_dims()
+        except Exception:
+            self.fail("Encountered an exception inside get_max_work_item_dims().")
+        self.assertTrue(max_work_item_dims > 0)
+
+    def test_get_max_work_item_sizes(self):
+        try:
+            q = dpctl.get_current_queue()
+        except Exception:
+            self.fail("Encountered an exception inside get_current_queue().")
+        try:
+            max_work_item_sizes = q.get_sycl_device().get_max_work_item_sizes()
+        except Exception:
+            self.fail("Encountered an exception inside get_max_work_item_sizes().")
+        self.assertNotEqual(max_work_item_sizes, (None, None, None))
+
+    def test_get_max_work_group_size(self):
+        try:
+            q = dpctl.get_current_queue()
+        except Exception:
+            self.fail("Encountered an exception inside get_current_queue().")
+        try:
+            max_work_group_size = q.get_sycl_device().get_max_work_group_size()
+        except Exception:
+            self.fail("Encountered an exception inside get_max_work_group_size().")
+        self.assertTrue(max_work_group_size > 0)
+
+    def test_get_max_num_sub_groups(self):
+        try:
+            q = dpctl.get_current_queue()
+        except Exception:
+            self.fail("Encountered an exception inside get_current_queue().")
+        try:
+            max_num_sub_groups = q.get_sycl_device().get_max_num_sub_groups()
+        except Exception:
+            self.fail("Encountered an exception inside get_max_num_sub_groups().")
+        self.assertTrue(max_num_sub_groups > 0)
+
+    def test_has_int64_base_atomics(self):
+        try:
+            q = dpctl.get_current_queue()
+        except Exception:
+            self.fail("Encountered an exception inside get_current_queue().")
+        try:
+            aspects_base_atomics = q.get_sycl_device().has_int64_base_atomics()
+        except Exception:
+            self.fail("Encountered an exception inside has_int64_base_atomics().")
+        self.assertNotEqual(aspects_base_atomics, False)
+
+    def test_has_int64_extended_atomics(self):
+        try:
+            q = dpctl.get_current_queue()
+        except Exception:
+            self.fail("Encountered an exception inside get_current_queue().")
+        try:
+            aspects_extended_atomics = q.get_sycl_device().has_int64_extended_atomics()
+        except Exception:
+            self.fail("Encountered an exception inside has_int64_extended_atomics().")
+        self.assertNotEqual(aspects_extended_atomics, False)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py
index fd07de1fee..772eb15042 100644
--- a/dpctl/tests/test_sycl_kernel_submit.py
+++ b/dpctl/tests/test_sycl_kernel_submit.py
@@ -28,10 +28,10 @@
 import dpctl._memory as dpctl_mem
 import numpy as np
 
-@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
-class Test1DKernelSubmit (unittest.TestCase):
 
-    def test_create_program_from_source (self):
+@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+class Test1DKernelSubmit(unittest.TestCase):
+    def test_create_program_from_source(self):
         oclSrc = "                                                             \
         kernel void axpy(global int* a, global int* b, global int* c, int d) { \
             size_t index = get_global_id(0);                                   \
@@ -40,14 +40,14 @@ def test_create_program_from_source (self):
         with dpctl.device_context("opencl:gpu:0"):
             q = dpctl.get_current_queue()
             prog = dpctl.create_program_from_source(q, oclSrc)
-            axpyKernel = prog.get_sycl_kernel('axpy')
+            axpyKernel = prog.get_sycl_kernel("axpy")
 
-            abuf = dpctl_mem.MemoryUSMShared(1024*np.dtype('i').itemsize)
-            bbuf = dpctl_mem.MemoryUSMShared(1024*np.dtype('i').itemsize)
-            cbuf = dpctl_mem.MemoryUSMShared(1024*np.dtype('i').itemsize)
-            a = np.ndarray((1024), buffer=abuf, dtype='i')
-            b = np.ndarray((1024), buffer=bbuf, dtype='i')
-            c = np.ndarray((1024), buffer=cbuf, dtype='i')
+            abuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize)
+            bbuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize)
+            cbuf = dpctl_mem.MemoryUSMShared(1024 * np.dtype("i").itemsize)
+            a = np.ndarray((1024), buffer=abuf, dtype="i")
+            b = np.ndarray((1024), buffer=bbuf, dtype="i")
+            c = np.ndarray((1024), buffer=cbuf, dtype="i")
             a[:] = np.arange(1024)
             b[:] = np.arange(1024, 0, -1)
             c[:] = 0
@@ -62,8 +62,8 @@ def test_create_program_from_source (self):
             r = [1024]
 
             q.submit(axpyKernel, args, r)
-            self.assertTrue(np.allclose(c, a*d + b))
+            self.assertTrue(np.allclose(c, a * d + b))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/dpctl/tests/test_sycl_program.py b/dpctl/tests/test_sycl_program.py
index c59ea631ed..0e695bf6bb 100644
--- a/dpctl/tests/test_sycl_program.py
+++ b/dpctl/tests/test_sycl_program.py
@@ -27,10 +27,10 @@
 import unittest
 import os
 
-@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
-class TestProgramFromOCLSource (unittest.TestCase):
 
-    def test_create_program_from_source (self):
+@unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+class TestProgramFromOCLSource(unittest.TestCase):
+    def test_create_program_from_source(self):
         oclSrc = "                                                             \
         kernel void add(global int* a, global int* b, global int* c) {         \
             size_t index = get_global_id(0);                                   \
@@ -48,62 +48,59 @@ def test_create_program_from_source (self):
             self.assertTrue(prog.has_sycl_kernel("add"))
             self.assertTrue(prog.has_sycl_kernel("axpy"))
 
-            addKernel = prog.get_sycl_kernel('add')
-            axpyKernel = prog.get_sycl_kernel('axpy')
+            addKernel = prog.get_sycl_kernel("add")
+            axpyKernel = prog.get_sycl_kernel("axpy")
 
-            self.assertEqual(addKernel.get_function_name(),"add")
-            self.assertEqual(axpyKernel.get_function_name(),"axpy")
+            self.assertEqual(addKernel.get_function_name(), "add")
+            self.assertEqual(axpyKernel.get_function_name(), "axpy")
             self.assertEqual(addKernel.get_num_args(), 3)
             self.assertEqual(axpyKernel.get_num_args(), 4)
 
 
 @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
-class TestProgramFromSPRIV (unittest.TestCase):
-
+class TestProgramFromSPRIV(unittest.TestCase):
     def test_create_program_from_spirv(self):
 
         CURR_DIR = os.path.dirname(os.path.abspath(__file__))
-        spirv_file = os.path.join(CURR_DIR, 'input_files/multi_kernel.spv')
-        with open(spirv_file, 'rb') as fin:
+        spirv_file = os.path.join(CURR_DIR, "input_files/multi_kernel.spv")
+        with open(spirv_file, "rb") as fin:
             spirv = fin.read()
             with dpctl.device_context("opencl:gpu:0"):
                 q = dpctl.get_current_queue()
-                prog = dpctl.create_program_from_spirv(q,spirv)
+                prog = dpctl.create_program_from_spirv(q, spirv)
                 self.assertIsNotNone(prog)
                 self.assertTrue(prog.has_sycl_kernel("add"))
                 self.assertTrue(prog.has_sycl_kernel("axpy"))
 
-                addKernel = prog.get_sycl_kernel('add')
-                axpyKernel = prog.get_sycl_kernel('axpy')
+                addKernel = prog.get_sycl_kernel("add")
+                axpyKernel = prog.get_sycl_kernel("axpy")
 
-                self.assertEqual(addKernel.get_function_name(),"add")
-                self.assertEqual(axpyKernel.get_function_name(),"axpy")
+                self.assertEqual(addKernel.get_function_name(), "add")
+                self.assertEqual(axpyKernel.get_function_name(), "axpy")
                 self.assertEqual(addKernel.get_num_args(), 3)
                 self.assertEqual(axpyKernel.get_num_args(), 4)
 
+
 @unittest.skipUnless(
     dpctl.has_gpu_queues(backend_ty=dpctl.backend_type.level_zero),
-    "No Level0 GPU queues available"
+    "No Level0 GPU queues available",
 )
-class TestProgramForLevel0GPU (unittest.TestCase):
-
+class TestProgramForLevel0GPU(unittest.TestCase):
     def test_create_program_from_spirv(self):
 
         CURR_DIR = os.path.dirname(os.path.abspath(__file__))
-        spirv_file = os.path.join(CURR_DIR, 'input_files/multi_kernel.spv')
-        with open(spirv_file, 'rb') as fin:
+        spirv_file = os.path.join(CURR_DIR, "input_files/multi_kernel.spv")
+        with open(spirv_file, "rb") as fin:
             spirv = fin.read()
             with dpctl.device_context("level0:gpu:0"):
                 q = dpctl.get_current_queue()
                 try:
-                    prog = dpctl.create_program_from_spirv(q,spirv)
-                    self.fail(
-                        "Tried to create program for an unsupported Level0 GPU."
-                    )
+                    prog = dpctl.create_program_from_spirv(q, spirv)
+                    self.fail("Tried to create program for an unsupported Level0 GPU.")
                 except ValueError:
                     pass
 
-    def test_create_program_from_source (self):
+    def test_create_program_from_source(self):
         oclSrc = "                                                             \
         kernel void add(global int* a, global int* b, global int* c) {         \
             size_t index = get_global_id(0);                                   \
@@ -117,12 +114,10 @@ def test_create_program_from_source (self):
             q = dpctl.get_current_queue()
             try:
                 prog = dpctl.create_program_from_source(q, oclSrc)
-                self.fail(
-                    "Tried to create program for an unsupported Level0 GPU."
-                )
+                self.fail("Tried to create program for an unsupported Level0 GPU.")
             except ValueError:
                 pass
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/dpctl/tests/test_sycl_queue.py b/dpctl/tests/test_sycl_queue.py
index 92a0dafab4..317685c9f3 100644
--- a/dpctl/tests/test_sycl_queue.py
+++ b/dpctl/tests/test_sycl_queue.py
@@ -25,25 +25,21 @@
 import dpctl
 import unittest
 
-class TestSyclQueue (unittest.TestCase):
-    @unittest.skipUnless(
-        dpctl.has_gpu_queues(), "No OpenCL GPU queues available"
-    )
-    @unittest.skipUnless(
-        dpctl.has_cpu_queues(), "No OpenCL CPU queues available"
-    )
-    def test_queue_not_equals (self):
+
+class TestSyclQueue(unittest.TestCase):
+    @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+    @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available")
+    def test_queue_not_equals(self):
         with dpctl.device_context("opencl:gpu") as gpuQ0:
             with dpctl.device_context("opencl:cpu") as cpuQ:
                 self.assertFalse(cpuQ.equals(gpuQ0))
 
-    @unittest.skipUnless(
-        dpctl.has_gpu_queues(), "No OpenCL GPU queues available"
-    )
-    def test_queue_equals (self):
+    @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+    def test_queue_equals(self):
         with dpctl.device_context("opencl:gpu") as gpuQ0:
             with dpctl.device_context("opencl:gpu") as gpuQ1:
                 self.assertTrue(gpuQ0.equals(gpuQ1))
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/dpctl/tests/test_sycl_queue_manager.py b/dpctl/tests/test_sycl_queue_manager.py
index 0427daf73f..c4b0408173 100644
--- a/dpctl/tests/test_sycl_queue_manager.py
+++ b/dpctl/tests/test_sycl_queue_manager.py
@@ -26,23 +26,22 @@
 import unittest
 
 
-class TestGetNumPlatforms (unittest.TestCase):
-    @unittest.skipIf(not dpctl.has_sycl_platforms(),
-                     "No SYCL platforms available")
-    def test_dpctl_get_num_platforms (self):
-        if(dpctl.has_sycl_platforms):
+class TestGetNumPlatforms(unittest.TestCase):
+    @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available")
+    def test_dpctl_get_num_platforms(self):
+        if dpctl.has_sycl_platforms:
             self.assertGreaterEqual(dpctl.get_num_platforms(), 1)
 
 
 @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available")
-class TestDumpMethods (unittest.TestCase):
-    def test_dpctl_dump (self):
+class TestDumpMethods(unittest.TestCase):
+    def test_dpctl_dump(self):
         try:
             dpctl.dump()
         except Exception:
             self.fail("Encountered an exception inside dump().")
 
-    def test_dpctl_dump_device_info (self):
+    def test_dpctl_dump_device_info(self):
         q = dpctl.get_current_queue()
         try:
             q.get_sycl_device().dump_device_info()
@@ -51,25 +50,18 @@ def test_dpctl_dump_device_info (self):
 
 
 @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available")
-class TestIsInDeviceContext (unittest.TestCase):
-
-    def test_is_in_device_context_outside_device_ctxt (self):
+class TestIsInDeviceContext(unittest.TestCase):
+    def test_is_in_device_context_outside_device_ctxt(self):
         self.assertFalse(dpctl.is_in_device_context())
 
-    @unittest.skipUnless(
-        dpctl.has_gpu_queues(), "No OpenCL GPU queues available"
-    )
-    def test_is_in_device_context_inside_device_ctxt (self):
+    @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+    def test_is_in_device_context_inside_device_ctxt(self):
         with dpctl.device_context("opencl:gpu:0"):
             self.assertTrue(dpctl.is_in_device_context())
 
-    @unittest.skipUnless(
-        dpctl.has_gpu_queues(), "No OpenCL GPU queues available"
-    )
-    @unittest.skipUnless(
-        dpctl.has_cpu_queues(), "No OpenCL CPU queues available"
-    )
-    def test_is_in_device_context_inside_nested_device_ctxt (self):
+    @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+    @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available")
+    def test_is_in_device_context_inside_nested_device_ctxt(self):
         with dpctl.device_context("opencl:cpu:0"):
             with dpctl.device_context("opencl:gpu:0"):
                 self.assertTrue(dpctl.is_in_device_context())
@@ -78,72 +70,52 @@ def test_is_in_device_context_inside_nested_device_ctxt (self):
 
 
 @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available")
-class TestIsInDeviceContext (unittest.TestCase):
-
-    def test_get_current_device_type_outside_device_ctxt (self):
+class TestGetCurrentDevice(unittest.TestCase):
+    def test_get_current_device_type_outside_device_ctxt(self):
         self.assertNotEqual(dpctl.get_current_device_type(), None)
 
-    def test_get_current_device_type_inside_device_ctxt (self):
+    def test_get_current_device_type_inside_device_ctxt(self):
         self.assertNotEqual(dpctl.get_current_device_type(), None)
 
         with dpctl.device_context("opencl:gpu:0"):
-            self.assertEqual(
-                dpctl.get_current_device_type(), dpctl.device_type.gpu
-            )
+            self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.gpu)
 
         self.assertNotEqual(dpctl.get_current_device_type(), None)
 
-    @unittest.skipUnless(
-        dpctl.has_cpu_queues(), "No OpenCL CPU queues available"
-    )
-    def test_get_current_device_type_inside_nested_device_ctxt (self):
+    @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available")
+    def test_get_current_device_type_inside_nested_device_ctxt(self):
         self.assertNotEqual(dpctl.get_current_device_type(), None)
 
         with dpctl.device_context("opencl:cpu:0"):
-            self.assertEqual(
-                dpctl.get_current_device_type(), dpctl.device_type.cpu
-            )
+            self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.cpu)
 
             with dpctl.device_context("opencl:gpu:0"):
-                self.assertEqual(
-                    dpctl.get_current_device_type(), dpctl.device_type.gpu
-                )
-            self.assertEqual(
-                dpctl.get_current_device_type(), dpctl.device_type.cpu
-            )
+                self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.gpu)
+            self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.cpu)
 
         self.assertNotEqual(dpctl.get_current_device_type(), None)
 
 
 @unittest.skipIf(not dpctl.has_sycl_platforms(), "No SYCL platforms available")
-class TestGetCurrentQueueInMultipleThreads (unittest.TestCase):
-
-    def test_num_current_queues_outside_with_clause (self):
+class TestGetCurrentQueueInMultipleThreads(unittest.TestCase):
+    def test_num_current_queues_outside_with_clause(self):
         self.assertEqual(dpctl.get_num_activated_queues(), 0)
 
-    @unittest.skipUnless(
-        dpctl.has_gpu_queues(), "No OpenCL GPU queues available"
-    )
-    @unittest.skipUnless(
-        dpctl.has_cpu_queues(), "No OpenCL CPU queues available"
-    )
-    def test_num_current_queues_inside_with_clause (self):
+    @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+    @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available")
+    def test_num_current_queues_inside_with_clause(self):
         with dpctl.device_context("opencl:cpu:0"):
             self.assertEqual(dpctl.get_num_activated_queues(), 1)
             with dpctl.device_context("opencl:gpu:0"):
                 self.assertEqual(dpctl.get_num_activated_queues(), 2)
         self.assertEqual(dpctl.get_num_activated_queues(), 0)
 
+    @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+    @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available")
+    def test_num_current_queues_inside_threads(self):
+        from threading import Thread
 
-    @unittest.skipUnless(
-        dpctl.has_gpu_queues(), "No OpenCL GPU queues available"
-    )
-    @unittest.skipUnless(
-        dpctl.has_cpu_queues(), "No OpenCL CPU queues available"
-    )
-    def test_num_current_queues_inside_threads (self):
-        from threading import Thread, local
-        def SessionThread (self):
+        def SessionThread(self):
             self.assertEqual(dpctl.get_num_activated_queues(), 0)
             with dpctl.device_context("opencl:gpu:0"):
                 self.assertEqual(dpctl.get_num_activated_queues(), 1)
@@ -156,5 +128,5 @@ def SessionThread (self):
             Session2.start()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/dpctl/tests/test_sycl_queue_memcpy.py b/dpctl/tests/test_sycl_queue_memcpy.py
index 6cb95ea63e..ad4cdaf92d 100644
--- a/dpctl/tests/test_sycl_queue_memcpy.py
+++ b/dpctl/tests/test_sycl_queue_memcpy.py
@@ -26,16 +26,17 @@
 import unittest
 
 
-
-class TestQueueMemcpy (unittest.TestCase):
-
-    def _create_memory (self):
+class TestQueueMemcpy(unittest.TestCase):
+    def _create_memory(self):
         nbytes = 1024
         queue = dpctl.get_current_queue()
         mobj = dpctl._memory.MemoryUSMShared(nbytes, queue)
         return mobj
 
-    def test_memcpy_copy_usm_to_usm (self):
+    @unittest.skipUnless(
+        dpctl.has_sycl_platforms(), "No SYCL devices except the default host device."
+    )
+    def test_memcpy_copy_usm_to_usm(self):
         mobj1 = self._create_memory()
         mobj2 = self._create_memory()
         q = dpctl.get_current_queue()
@@ -43,13 +44,16 @@ def test_memcpy_copy_usm_to_usm (self):
         mv1 = memoryview(mobj1)
         mv2 = memoryview(mobj2)
 
-        mv1[:3] = b'123'
+        mv1[:3] = b"123"
 
         q.memcpy(mobj2, mobj1, 3)
 
-        self.assertEqual(mv2[:3], b'123')
+        self.assertEqual(mv2[:3], b"123")
 
-    def test_memcpy_type_error (self):
+    @unittest.skipUnless(
+        dpctl.has_sycl_platforms(), "No SYCL devices except the default host device."
+    )
+    def test_memcpy_type_error(self):
         mobj = self._create_memory()
         q = dpctl.get_current_queue()
 
@@ -66,5 +70,5 @@ def test_memcpy_type_error (self):
         self.assertEqual(str(cm.exception), "Parameter src should be Memory.")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/dpctl/tests/test_sycl_usm.py b/dpctl/tests/test_sycl_usm.py
index 6a1d5ddc2f..6f938028ce 100644
--- a/dpctl/tests/test_sycl_usm.py
+++ b/dpctl/tests/test_sycl_usm.py
@@ -27,30 +27,33 @@
 from dpctl._memory import MemoryUSMShared, MemoryUSMHost, MemoryUSMDevice
 
 
-class TestMemory (unittest.TestCase):
-
-    def test_memory_create (self):
+class TestMemory(unittest.TestCase):
+    @unittest.skipUnless(
+        dpctl.has_sycl_platforms(), "No SYCL devices except the default host device."
+    )
+    def test_memory_create(self):
         nbytes = 1024
         queue = dpctl.get_current_queue()
         mobj = MemoryUSMShared(nbytes, queue)
         self.assertEqual(mobj.nbytes, nbytes)
 
-    def _create_memory (self):
+    def _create_memory(self):
         nbytes = 1024
         queue = dpctl.get_current_queue()
         mobj = MemoryUSMShared(nbytes, queue)
         return mobj
 
-    def test_memory_without_context (self):
+    @unittest.skipUnless(
+        dpctl.has_sycl_platforms(), "No SYCL devices except the default host device."
+    )
+    def test_memory_without_context(self):
         mobj = self._create_memory()
 
         # Without context
-        self.assertEqual(mobj._usm_type(), 'shared')
+        self.assertEqual(mobj._usm_type(), "shared")
 
-    @unittest.skipUnless(
-        dpctl.has_cpu_queues(), "No OpenCL CPU queues available"
-    )
-    def test_memory_cpu_context (self):
+    @unittest.skipUnless(dpctl.has_cpu_queues(), "No OpenCL CPU queues available")
+    def test_memory_cpu_context(self):
         mobj = self._create_memory()
 
         # CPU context
@@ -58,31 +61,31 @@ def test_memory_cpu_context (self):
             # type respective to the context in which
             # memory was created
             usm_type = mobj._usm_type()
-            self.assertEqual(usm_type, 'shared')
+            self.assertEqual(usm_type, "shared")
 
             current_queue = dpctl.get_current_queue()
             # type as view from current queue
             usm_type = mobj._usm_type(current_queue)
             # type can be unknown if current queue is
             # not in the same SYCL context
-            self.assertTrue(usm_type in ['unknown', 'shared'])
+            self.assertTrue(usm_type in ["unknown", "shared"])
 
-    @unittest.skipUnless(
-        dpctl.has_gpu_queues(), "No OpenCL GPU queues available"
-    )
-    def test_memory_gpu_context (self):
+    @unittest.skipUnless(dpctl.has_gpu_queues(), "No OpenCL GPU queues available")
+    def test_memory_gpu_context(self):
         mobj = self._create_memory()
 
         # GPU context
         with dpctl.device_context("opencl:gpu:0"):
             usm_type = mobj._usm_type()
-            self.assertEqual(usm_type, 'shared')
+            self.assertEqual(usm_type, "shared")
             current_queue = dpctl.get_current_queue()
             usm_type = mobj._usm_type(current_queue)
-            self.assertTrue(usm_type in ['unknown', 'shared'])
-
+            self.assertTrue(usm_type in ["unknown", "shared"])
 
-    def test_buffer_protocol (self):
+    @unittest.skipUnless(
+        dpctl.has_sycl_platforms(), "No SYCL devices except the default host device."
+    )
+    def test_buffer_protocol(self):
         mobj = self._create_memory()
         mv1 = memoryview(mobj)
         mv2 = memoryview(mobj)
@@ -95,13 +98,19 @@ class TestMemoryUSMBase:
     MemoryUSMClass = None
     usm_type = None
 
-    def test_create_with_queue (self):
+    @unittest.skipUnless(
+        dpctl.has_sycl_platforms(), "No SYCL devices except the default host device."
+    )
+    def test_create_with_queue(self):
         q = dpctl.get_current_queue()
         m = self.MemoryUSMClass(1024, q)
         self.assertEqual(m.nbytes, 1024)
         self.assertEqual(m._usm_type(), self.usm_type)
 
-    def test_create_without_queue (self):
+    @unittest.skipUnless(
+        dpctl.has_sycl_platforms(), "No SYCL devices except the default host device."
+    )
+    def test_create_without_queue(self):
         m = self.MemoryUSMClass(1024)
         self.assertEqual(m.nbytes, 1024)
         self.assertEqual(m._usm_type(), self.usm_type)
@@ -111,22 +120,22 @@ class TestMemoryUSMShared(TestMemoryUSMBase, unittest.TestCase):
     """ Tests for MemoryUSMShared """
 
     MemoryUSMClass = MemoryUSMShared
-    usm_type = 'shared'
+    usm_type = "shared"
 
 
 class TestMemoryUSMHost(TestMemoryUSMBase, unittest.TestCase):
     """ Tests for MemoryUSMHost """
 
     MemoryUSMClass = MemoryUSMHost
-    usm_type = 'host'
+    usm_type = "host"
 
 
 class TestMemoryUSMDevice(TestMemoryUSMBase, unittest.TestCase):
     """ Tests for MemoryUSMDevice """
 
     MemoryUSMClass = MemoryUSMDevice
-    usm_type = 'device'
+    usm_type = "device"
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000..2b233b61c9
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,2 @@
+[tool.black]
+exclude = 'versioneer.py'
diff --git a/scripts/build_for_develop.sh b/scripts/build_for_develop.sh
index 3d54402846..5144312eee 100755
--- a/scripts/build_for_develop.sh
+++ b/scripts/build_for_develop.sh
@@ -34,10 +34,6 @@ cp install/lib/*.so dpctl/
 mkdir -p dpctl/include
 cp -r backends/include/* dpctl/include
 
-export DPPL_OPENCL_INTERFACE_LIBDIR=dpctl
-export DPPL_OPENCL_INTERFACE_INCLDIR=dpctl/include
-# /usr/lib/x86_64-linux-gnu/
-export OpenCL_LIBDIR=${DPCPP_ROOT}/lib
 export DPPL_SYCL_INTERFACE_LIBDIR=dpctl
 export DPPL_SYCL_INTERFACE_INCLDIR=dpctl/include
 
diff --git a/setup.py b/setup.py
index 51551e099d..d4634e700a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,6 @@
 ##===---------- setup.py - dpctl.ocldrv interface -----*- Python -*-----===##
 ##
-##               Python Data Parallel Processing Library (PyDPPL)
+##               Data Parallel Control Library (dpCtl)
 ##
 ## Copyright 2020 Intel Corporation
 ##
@@ -32,51 +32,61 @@
 import numpy as np
 
 requirements = [
-    'cffi>=1.0.0',
-    'cython',
+    "cython",
 ]
 
 IS_WIN = False
 IS_MAC = False
 IS_LIN = False
 
-if 'linux' in sys.platform:
+if "linux" in sys.platform:
     IS_LIN = True
-elif sys.platform == 'darwin':
+elif sys.platform == "darwin":
     IS_MAC = True
-elif sys.platform in ['win32', 'cygwin']:
+elif sys.platform in ["win32", "cygwin"]:
     IS_WIN = True
 else:
-    assert False, sys.platform + ' not supported'
+    assert False, sys.platform + " not supported"
+
+dppl_sycl_interface_lib = os.environ["DPPL_SYCL_INTERFACE_LIBDIR"]
+dppl_sycl_interface_include = os.environ["DPPL_SYCL_INTERFACE_INCLDIR"]
+sycl_lib = os.environ["ONEAPI_ROOT"] + "\compiler\latest\windows\lib"
 
-dppl_sycl_interface_lib     = os.environ['DPPL_SYCL_INTERFACE_LIBDIR']
-dppl_sycl_interface_include = os.environ['DPPL_SYCL_INTERFACE_INCLDIR']
-sycl_lib = os.environ['ONEAPI_ROOT']+"\compiler\latest\windows\lib"
 
 def get_sdl_cflags():
     if IS_LIN or IS_MAC:
-        return ['-fstack-protector', '-fPIC',
-                '-D_FORTIFY_SOURCE=2', '-Wformat', '-Wformat-security',]
+        return [
+            "-fstack-protector",
+            "-fPIC",
+            "-D_FORTIFY_SOURCE=2",
+            "-Wformat",
+            "-Wformat-security",
+        ]
     elif IS_WIN:
         return []
 
+
 def get_sdl_ldflags():
     if IS_LIN:
-        return ['-Wl,-z,noexecstack,-z,relro,-z,now',]
+        return [
+            "-Wl,-z,noexecstack,-z,relro,-z,now",
+        ]
     elif IS_MAC:
         return []
     elif IS_WIN:
-        return ['/NXCompat', '/DynamicBase']
+        return ["/NXCompat", "/DynamicBase"]
+
 
 def get_other_cxxflags():
     if IS_LIN:
-        return ['-O3', '-fsycl', '-std=c++17']
+        return ["-O3", "-fsycl", "-std=c++17"]
     elif IS_MAC:
         return []
     elif IS_WIN:
         # FIXME: These are specific to MSVC and we should first make sure
         # what compiler we are using.
-        return ['/Ox', '/std:c++17']
+        return ["/Ox", "/std:c++17"]
+
 
 def extensions():
     # Security flags
@@ -86,11 +96,11 @@ def extensions():
     librarys = []
 
     if IS_LIN:
-        libs += ['rt', 'DPPLSyclInterface']
+        libs += ["rt", "DPPLSyclInterface"]
     elif IS_MAC:
         pass
     elif IS_WIN:
-        libs += ['DPPLSyclInterface', 'sycl']
+        libs += ["DPPLSyclInterface", "sycl"]
 
     if IS_LIN:
         librarys = [dppl_sycl_interface_lib]
@@ -105,47 +115,57 @@ def extensions():
         runtime_library_dirs = []
 
     extension_args = {
-        "depends": [dppl_sycl_interface_include,],
+        "depends": [
+            dppl_sycl_interface_include,
+        ],
         "include_dirs": [np.get_include(), dppl_sycl_interface_include],
         "extra_compile_args": eca + get_other_cxxflags(),
         "extra_link_args": ela,
         "libraries": libs,
         "library_dirs": librarys,
         "runtime_library_dirs": runtime_library_dirs,
-        "language": 'c++',
+        "language": "c++",
     }
 
     extensions = [
-        Extension('dpctl._sycl_core', [os.path.join('dpctl', 'sycl_core.pyx'),],
-            **extension_args),
-        Extension('dpctl._memory', [os.path.join('dpctl', '_memory.pyx'),],
-            **extension_args),
+        Extension(
+            "dpctl._sycl_core",
+            [
+                os.path.join("dpctl", "_sycl_core.pyx"),
+            ],
+            **extension_args
+        ),
+        Extension(
+            "dpctl._memory",
+            [
+                os.path.join("dpctl", "_memory.pyx"),
+            ],
+            **extension_args
+        ),
     ]
 
     exts = cythonize(extensions)
     return exts
 
+
 setup(
-    name='dpctl',
+    name="dpctl",
     version=versioneer.get_version(),
     cmdclass=versioneer.get_cmdclass(),
     description="A lightweight Python wrapper for a subset of OpenCL and SYCL.",
     license="Apache 2.0",
     author="Intel Corporation",
-    url='https://github.com/IntelPython/dpCtl',
+    url="https://github.com/IntelPython/dpCtl",
     packages=find_packages(include=["*"]),
     include_package_data=True,
-    ext_modules = extensions(),
+    ext_modules=extensions(),
     setup_requires=requirements,
-    cffi_modules=[
-       "./dpctl/opencl_core.py:ffi"
-    ],
     install_requires=requirements,
-    keywords='dpctl',
+    keywords="dpctl",
     classifiers=[
         "Development Status :: 3 - Alpha",
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
-        'Programming Language :: Python :: 3.8',
-    ]
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+    ],
 )